From 8fbe439345bfc4e5d2c30217e33aaf66448772f5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 15 Mar 2019 17:00:55 +0000 Subject: [PATCH] [SelectionDAG] Add SimplifyDemandedBits handling for ISD::SCALAR_TO_VECTOR Fixes a lot of constant folding mismatches between i686 and x86_64 llvm-svn: 356273 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 13 ++++ .../CodeGen/X86/broadcast-elm-cross-splat-vec.ll | 14 +--- llvm/test/CodeGen/X86/combine-sdiv.ll | 28 ++----- llvm/test/CodeGen/X86/insertelement-ones.ll | 79 +++++++------------- llvm/test/CodeGen/X86/known-signbits-vector.ll | 4 +- llvm/test/CodeGen/X86/pr30562.ll | 12 ++- llvm/test/CodeGen/X86/pr34177.ll | 27 +++---- llvm/test/CodeGen/X86/shrink_vmul.ll | 41 +++-------- llvm/test/CodeGen/X86/sse3.ll | 6 +- llvm/test/CodeGen/X86/vector-mul.ll | 86 +++++++++------------- llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll | 3 +- .../CodeGen/X86/vector-shuffle-combining-ssse3.ll | 31 ++------ llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 3 +- llvm/test/CodeGen/X86/vector-trunc-math-widen.ll | 14 +--- llvm/test/CodeGen/X86/vector-trunc-math.ll | 14 +--- 15 files changed, 129 insertions(+), 246 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2a1b974..fcda0e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -519,6 +519,19 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known2, KnownOut; switch (Op.getOpcode()) { + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + + KnownBits SrcKnown; + SDValue Src = Op.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); + if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) + return true; + Known = SrcKnown.zextOrTrunc(BitWidth, false); + break; + } case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll index 7b9f1ab..b2fd996 100644 --- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll +++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll @@ -1399,9 +1399,7 @@ define <4 x i64> @f4xi64_i128(<4 x i64> %a) { ; AVX-64-LABEL: f4xi64_i128: ; AVX-64: # %bb.0: ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-64-NEXT: movl $1, %eax -; AVX-64-NEXT: vmovq %rax, %xmm2 -; AVX-64-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] +; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -1457,9 +1455,7 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) { ; AVX-64-LABEL: f8xi64_i128: ; AVX-64: # %bb.0: ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX-64-NEXT: movl $1, %eax -; AVX-64-NEXT: vmovq %rax, %xmm3 -; AVX-64-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] +; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 ; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 @@ -1467,7 +1463,7 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) { ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 ; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1] +; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 @@ -1535,9 +1531,7 @@ define <8 x i64> @f8xi64_i256(<8 x i64> %a) { ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3] ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 -; AVX-64-NEXT: movl $1, %eax -; AVX-64-NEXT: vmovq %rax, %xmm4 -; AVX-64-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] +; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 3a3ee90..1694c2e 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -1573,10 +1573,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: movl $2, %eax -; AVX2-NEXT: vmovq %rax, %xmm2 -; AVX2-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; AVX2-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,2305843009213693952] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsubq %xmm2, %xmm1, %xmm1 @@ -1586,9 +1583,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v2i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: movl $2, %eax -; AVX512F-NEXT: vmovq %rax, %xmm1 -; AVX512F-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsraq $63, %zmm0, %zmm2 ; AVX512F-NEXT: vpsrlvq {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm2 @@ -1602,10 +1597,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; AVX512BW-NEXT: vpsraq $63, %xmm0, %xmm1 ; AVX512BW-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: movl $2, %eax -; AVX512BW-NEXT: vmovq %rax, %xmm2 -; AVX512BW-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; AVX512BW-NEXT: vpsravq %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsravq {{.*}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX512BW-NEXT: retq ; @@ -1614,10 +1606,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; XOP-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm1 ; XOP-NEXT: vpshlq {{.*}}(%rip), %xmm1, %xmm1 ; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; XOP-NEXT: movq $-2, %rax -; XOP-NEXT: vmovq %rax, %xmm2 -; XOP-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; XOP-NEXT: vpshaq %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpshaq {{.*}}(%rip), %xmm1, %xmm1 ; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; XOP-NEXT: retq %1 = sdiv <2 x i64> %x, @@ -1748,10 +1737,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; XOP-NEXT: vpshaq %xmm1, %xmm0, %xmm2 ; XOP-NEXT: vpshlq {{.*}}(%rip), %xmm2, %xmm2 ; XOP-NEXT: vpaddq %xmm2, %xmm0, %xmm2 -; XOP-NEXT: movq $-2, %rax -; XOP-NEXT: vmovq %rax, %xmm3 -; XOP-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] -; XOP-NEXT: vpshaq %xmm3, %xmm2, %xmm2 +; XOP-NEXT: vpshaq {{.*}}(%rip), %xmm2, %xmm2 ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 ; XOP-NEXT: vpshaq %xmm1, %xmm3, %xmm1 ; XOP-NEXT: vpshlq {{.*}}(%rip), %xmm1, %xmm1 @@ -1976,9 +1962,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; XOP-NEXT: vmovdqa {{.*#+}} xmm7 = [18446744073709551552,18446744073709551554] ; XOP-NEXT: vpshlq %xmm7, %xmm6, %xmm6 ; XOP-NEXT: vpaddq %xmm6, %xmm0, %xmm6 -; XOP-NEXT: movq $-2, %rax -; XOP-NEXT: vmovq %rax, %xmm5 -; XOP-NEXT: vpslldq {{.*#+}} xmm5 = zero,zero,zero,zero,zero,zero,zero,zero,xmm5[0,1,2,3,4,5,6,7] +; XOP-NEXT: vmovdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,254,255,255,255,255,255,255,255] ; XOP-NEXT: vpshaq %xmm5, %xmm6, %xmm6 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm6, %ymm2 ; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 81e5d8d..2f2acf7 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -312,39 +312,30 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) { define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) { ; SSE2-LABEL: insert_v16i8_x123456789ABCDEx: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: movl $255, %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: por {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_v16i8_x123456789ABCDEx: ; SSE3: # %bb.0: -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; SSE3-NEXT: pand %xmm1, %xmm0 ; SSE3-NEXT: movl $255, %eax -; SSE3-NEXT: movd %eax, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm1 +; SSE3-NEXT: movd %eax, %xmm1 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; SSE3-NEXT: por %xmm2, %xmm0 +; SSE3-NEXT: por {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movl $255, %eax -; SSSE3-NEXT: movd %eax, %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm2[0] -; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero -; SSSE3-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] -; SSSE3-NEXT: por %xmm2, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero +; SSSE3-NEXT: por {{.*}}(%rip), %xmm1 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -369,61 +360,45 @@ define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) { define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: movl $255, %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: pandn %xmm3, %xmm2 +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] ; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] -; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE2-NEXT: pand %xmm5, %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE2-NEXT: pandn %xmm3, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255] +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: por {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx: ; SSE3: # %bb.0: -; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; SSE3-NEXT: pand %xmm2, %xmm0 ; SSE3-NEXT: movl $255, %eax -; SSE3-NEXT: movd %eax, %xmm3 -; SSE3-NEXT: pandn %xmm3, %xmm2 +; SSE3-NEXT: movd %eax, %xmm2 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: por %xmm2, %xmm0 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] ; SSE3-NEXT: pand %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm3, %xmm4 -; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] -; SSE3-NEXT: por %xmm4, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE3-NEXT: pand %xmm5, %xmm1 -; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE3-NEXT: pandn %xmm3, %xmm5 -; SSE3-NEXT: por %xmm5, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255] +; SSE3-NEXT: por %xmm3, %xmm0 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: por {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pand %xmm2, %xmm1 -; SSE3-NEXT: por %xmm4, %xmm1 +; SSE3-NEXT: por %xmm3, %xmm1 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movl $255, %eax -; SSSE3-NEXT: movd %eax, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255] ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm2[0] ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255] ; SSSE3-NEXT: por %xmm0, %xmm2 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15] -; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0],zero -; SSSE3-NEXT: por %xmm3, %xmm1 +; SSSE3-NEXT: por {{.*}}(%rip), %xmm1 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero ; SSSE3-NEXT: por %xmm0, %xmm1 ; SSSE3-NEXT: movdqa %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 5ab1cf2..719013f 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -244,12 +244,12 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 ; ; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: vpsrlq $61, %xmm0, %xmm0 ; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8] ; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0 -; X64-NEXT: movslq %edi, %rax -; X64-NEXT: vmovq %rax, %xmm1 +; X64-NEXT: vmovq %rdi, %xmm1 ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovq %xmm0, %rax ; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr30562.ll b/llvm/test/CodeGen/X86/pr30562.ll index 24cbf10..05d5c09 100644 --- a/llvm/test/CodeGen/X86/pr30562.ll +++ b/llvm/test/CodeGen/X86/pr30562.ll @@ -6,20 +6,18 @@ define i32 @foo(i64* nocapture %perm, i32 %n) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: movq %rcx, %xmm0 -; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2,3] +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [2,3] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq -24(%rsp,%rcx,8), %rdx -; CHECK-NEXT: movdqu %xmm0, (%rdi,%rdx,8) +; CHECK-NEXT: movups %xmm0, (%rdi,%rdx,8) ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll index 3d77ddb..6926e30 100644 --- a/llvm/test/CodeGen/X86/pr34177.ll +++ b/llvm/test/CodeGen/X86/pr34177.ll @@ -8,27 +8,29 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(<4 x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unnamed_addr { ; CHECK-LABEL: test: ; CHECK: # %bb.0: -; CHECK-NEXT: vpextrq $1, %xmm0, %r8 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] +; CHECK-NEXT: vmovq %xmm1, %r8 +; CHECK-NEXT: vmovq %xmm0, %r9 +; CHECK-NEXT: vpextrq $1, %xmm1, %r10 +; CHECK-NEXT: vpextrq $1, %xmm0, %r11 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,3] -; CHECK-NEXT: vmovq %xmm1, %r9 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2 -; CHECK-NEXT: vmovq %xmm2, %rdx -; CHECK-NEXT: vpextrq $1, %xmm1, %rsi -; CHECK-NEXT: vpextrq $1, %xmm2, %rax +; CHECK-NEXT: vmovq %xmm1, %rax +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vmovq %xmm0, %rcx -; CHECK-NEXT: negq %rcx +; CHECK-NEXT: vpextrq $1, %xmm1, %rdx +; CHECK-NEXT: vpextrq $1, %xmm0, %rsi +; CHECK-NEXT: cmpq %rsi, %rdx ; CHECK-NEXT: fld1 ; CHECK-NEXT: fldz ; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: fcmove %st(2), %st -; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: cmpq %rcx, %rax ; CHECK-NEXT: fld %st(1) ; CHECK-NEXT: fcmove %st(3), %st -; CHECK-NEXT: cmpq %rdx, %r9 +; CHECK-NEXT: cmpq %r11, %r10 ; CHECK-NEXT: fld %st(2) ; CHECK-NEXT: fcmove %st(4), %st -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmpq %r8, %rax +; CHECK-NEXT: cmpq %r9, %r8 ; CHECK-NEXT: fxch %st(3) ; CHECK-NEXT: fcmove %st(4), %st ; CHECK-NEXT: fstp %st(4) @@ -40,12 +42,11 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unn ; CHECK-NEXT: fstpt 30(%rdi) ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt 10(%rdi) +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt 60(%rdi) -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt 40(%rdi) -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fadd %st, %st(0) ; CHECK-NEXT: fstpt 20(%rdi) ; CHECK-NEXT: fadd %st, %st(0) diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 85ffce8..f693a57c 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1425,10 +1425,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax ; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; X64-AVX-NEXT: movl $255, %ecx -; X64-AVX-NEXT: vmovq %rcx, %xmm1 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1564,10 +1561,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax ; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; X64-AVX-NEXT: movl $256, %ecx # imm = 0x100 -; X64-AVX-NEXT: vmovq %rcx, %xmm1 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1842,11 +1836,7 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X64-AVX-NEXT: movl $65535, %ecx # imm = 0xFFFF -; X64-AVX-NEXT: vmovq %rcx, %xmm1 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X64-AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1964,11 +1954,8 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: pxor %xmm1, %xmm1 ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X64-SSE-NEXT: movl $65536, %ecx # imm = 0x10000 -; X64-SSE-NEXT: movq %rcx, %xmm1 -; X64-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-SSE-NEXT: pmuludq %xmm0, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1977,11 +1964,7 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X64-AVX-NEXT: movl $65536, %ecx # imm = 0x10000 -; X64-AVX-NEXT: vmovq %rcx, %xmm1 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X64-AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -2035,11 +2018,8 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; X64-SSE-NEXT: psrad $16, %xmm0 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X64-SSE-NEXT: movl $32768, %ecx # imm = 0x8000 -; X64-SSE-NEXT: movq %rcx, %xmm1 -; X64-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-SSE-NEXT: pmuludq %xmm0, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -2047,10 +2027,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax ; X64-AVX-NEXT: vpmovsxwq (%rdi,%rsi), %xmm0 -; X64-AVX-NEXT: movl $32768, %ecx # imm = 0x8000 -; X64-AVX-NEXT: vmovq %rcx, %xmm1 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll index 1761567..9ad4c65 100644 --- a/llvm/test/CodeGen/X86/sse3.ll +++ b/llvm/test/CodeGen/X86/sse3.ll @@ -12,16 +12,14 @@ define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind { ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $1, %edx -; X86-NEXT: movd %edx, %xmm0 +; X86-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] ; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; X86-NEXT: movdqa %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: t0: ; X64: # %bb.0: # %entry -; X64-NEXT: movl $1, %eax -; X64-NEXT: movd %eax, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; X64-NEXT: movdqa %xmm0, (%rdi) ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index d2f7b45..d16e187 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -932,9 +932,7 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind { ; ; X64-LABEL: mul_v2i64_0_1: ; X64: # %bb.0: -; X64-NEXT: movl $1, %eax -; X64-NEXT: movq %rax, %xmm1 -; X64-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; X64-NEXT: movdqa %xmm0, %xmm2 ; X64-NEXT: pmuludq %xmm1, %xmm2 ; X64-NEXT: psrlq $32, %xmm0 @@ -945,9 +943,7 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind { ; ; X64-AVX-LABEL: mul_v2i64_0_1: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl $1, %eax -; X64-AVX-NEXT: vmovq %rax, %xmm1 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 ; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 @@ -976,37 +972,28 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; ; X64-LABEL: mul_v2i64_neg_0_1: ; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psrlq $32, %xmm1 -; X64-NEXT: movq $-1, %rax -; X64-NEXT: movq %rax, %xmm2 -; X64-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; X64-NEXT: pmuludq %xmm2, %xmm1 -; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: movq %rax, %xmm3 -; X64-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] -; X64-NEXT: pmuludq %xmm0, %xmm3 -; X64-NEXT: paddq %xmm1, %xmm3 -; X64-NEXT: psllq $32, %xmm3 -; X64-NEXT: pmuludq %xmm2, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: pmuludq %xmm1, %xmm2 +; X64-NEXT: movdqa %xmm0, %xmm3 +; X64-NEXT: psrlq $32, %xmm3 +; X64-NEXT: pmuludq %xmm1, %xmm3 +; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; X64-NEXT: paddq %xmm3, %xmm0 +; X64-NEXT: psllq $32, %xmm0 +; X64-NEXT: paddq %xmm2, %xmm0 ; X64-NEXT: retq ; ; X64-AVX-LABEL: mul_v2i64_neg_0_1: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm1 -; X64-AVX-NEXT: movq $-1, %rax -; X64-AVX-NEXT: vmovq %rax, %xmm2 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 -; X64-AVX-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-AVX-NEXT: vmovq %rax, %xmm3 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 -; X64-AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; X64-AVX-NEXT: vpsllq $32, %xmm1, %xmm1 -; X64-AVX-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] +; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm3 +; X64-AVX-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 +; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 +; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-AVX-NEXT: retq %1 = mul <2 x i64> %a0, ret <2 x i64> %1 @@ -1030,33 +1017,28 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; ; X64-LABEL: mul_v2i64_15_neg_63: ; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psrlq $32, %xmm1 -; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,18446744073709551553] -; X64-NEXT: pmuludq %xmm2, %xmm1 -; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: movq %rax, %xmm3 -; X64-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] -; X64-NEXT: pmuludq %xmm0, %xmm3 -; X64-NEXT: paddq %xmm1, %xmm3 -; X64-NEXT: psllq $32, %xmm3 -; X64-NEXT: pmuludq %xmm2, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,18446744073709551553] +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: pmuludq %xmm1, %xmm2 +; X64-NEXT: movdqa %xmm0, %xmm3 +; X64-NEXT: psrlq $32, %xmm3 +; X64-NEXT: pmuludq %xmm1, %xmm3 +; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; X64-NEXT: paddq %xmm3, %xmm0 +; X64-NEXT: psllq $32, %xmm0 +; X64-NEXT: paddq %xmm2, %xmm0 ; X64-NEXT: retq ; ; X64-AVX-LABEL: mul_v2i64_15_neg_63: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm1 -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,18446744073709551553] -; X64-AVX-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 -; X64-AVX-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-AVX-NEXT: vmovq %rax, %xmm3 -; X64-AVX-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] -; X64-AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 -; X64-AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; X64-AVX-NEXT: vpsllq $32, %xmm1, %xmm1 -; X64-AVX-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,18446744073709551553] +; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm3 +; X64-AVX-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 +; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 +; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-AVX-NEXT: retq %1 = mul <2 x i64> %a0, ret <2 x i64> %1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index f718bbe..20a8095 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1986,8 +1986,7 @@ define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b) ; SSSE3-NEXT: movzbl (%rsi), %ecx ; SSSE3-NEXT: shll $8, %ecx ; SSSE3-NEXT: orl %eax, %ecx -; SSSE3-NEXT: movzwl %cx, %eax -; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] ; SSSE3-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index a531bf6..b42f2ac 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -745,37 +745,16 @@ define <16 x i8> @constant_fold_pshufb() { ret <16 x i8> %1 } -; FIXME - unnecessary pshufb/broadcast being used - pshufb mask only needs lowest byte. define <16 x i8> @constant_fold_pshufb_2() { ; SSE-LABEL: constant_fold_pshufb_2: ; SSE: # %bb.0: -; SSE-NEXT: movl $2, %eax -; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pshufb %xmm1, %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; SSE-NEXT: retq ; -; AVX1-LABEL: constant_fold_pshufb_2: -; AVX1: # %bb.0: -; AVX1-NEXT: movl $2, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: constant_fold_pshufb_2: -; AVX2: # %bb.0: -; AVX2-NEXT: movl $2, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: constant_fold_pshufb_2: -; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $2, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0 -; AVX512F-NEXT: retq +; AVX-LABEL: constant_fold_pshufb_2: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; AVX-NEXT: retq %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> , <16 x i8> ) ret <16 x i8> %1 } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index 621058c..85666aa 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -46,8 +46,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: movq $-1, %rax -; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} diff --git a/llvm/test/CodeGen/X86/vector-trunc-math-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-math-widen.ll index 7129a04..29e0065 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math-widen.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math-widen.ll @@ -2433,11 +2433,8 @@ define <8 x i16> @trunc_mul_v8i32_v8i16_zext_8i8(<16 x i8> %a0, <8 x i32> %a1) { define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v4i64_v4i32: ; SSE: # %bb.0: -; SSE-NEXT: movl $1, %eax -; SSE-NEXT: movq %rax, %xmm2 -; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; SSE-NEXT: pmuludq %xmm2, %xmm0 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm1 +; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE-NEXT: retq ; @@ -2591,10 +2588,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v16i64_v16i8: ; SSE: # %bb.0: -; SSE-NEXT: movl $1, %eax -; SSE-NEXT: movq %rax, %xmm8 -; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7] -; SSE-NEXT: pmuludq %xmm8, %xmm0 +; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm1 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm2 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm3 @@ -2622,9 +2616,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX1-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: movl $1, %eax -; AVX1-NEXT: vmovq %rax, %xmm4 -; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm5 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm6 ; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index d28c940..ef34127 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -2433,11 +2433,8 @@ define <8 x i16> @trunc_mul_v8i32_v8i16_zext_8i8(<16 x i8> %a0, <8 x i32> %a1) { define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v4i64_v4i32: ; SSE: # %bb.0: -; SSE-NEXT: movl $1, %eax -; SSE-NEXT: movq %rax, %xmm2 -; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; SSE-NEXT: pmuludq %xmm2, %xmm0 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm1 +; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE-NEXT: retq ; @@ -2591,10 +2588,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v16i64_v16i8: ; SSE: # %bb.0: -; SSE-NEXT: movl $1, %eax -; SSE-NEXT: movq %rax, %xmm8 -; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7] -; SSE-NEXT: pmuludq %xmm8, %xmm0 +; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm1 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm2 ; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm3 @@ -2622,9 +2616,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX1-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: movl $1, %eax -; AVX1-NEXT: vmovq %rax, %xmm4 -; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm5 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm6 ; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 -- 2.7.4