The shift amount might have peeked through a extract_subvector, altering the number of vector elements in the 'Amt' variable - so we were incorrectly calculating the ratio when peeking through bitcasts, resulting in incorrectly detecting splats.
llvm-svn: 343373
}
// Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
- if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST &&
+ if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
- unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
- VT.getVectorNumElements();
+ unsigned Ratio = 64 / Amt.getScalarValueSizeInBits();
std::vector<SDValue> Vals(Ratio);
for (unsigned i = 0; i != Ratio; ++i)
Vals[i] = Amt.getOperand(i);
- for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) {
for (unsigned j = 0; j != Ratio; ++j)
if (Vals[j] != Amt.getOperand(i + j))
return SDValue();
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-16, %esp
; X32-NEXT: subl $16, %esp
+; X32-NEXT: vmovdqa {{.*#+}} xmm3 = [33,0,63,0]
+; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
+; X32-NEXT: vpsrlq %xmm3, %xmm4, %xmm5
+; X32-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1]
+; X32-NEXT: vpsrlq %xmm6, %xmm4, %xmm4
+; X32-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; X32-NEXT: vextractf128 $1, %ymm2, %xmm5
+; X32-NEXT: vpsrlq %xmm6, %xmm5, %xmm7
+; X32-NEXT: vpsrlq %xmm3, %xmm5, %xmm5
+; X32-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
+; X32-NEXT: vpsrlq %xmm6, %xmm2, %xmm6
+; X32-NEXT: vpsrlq %xmm3, %xmm2, %xmm2
+; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
; X32-NEXT: vpmovsxdq 16(%ebp), %xmm3
+; X32-NEXT: vpxor %xmm4, %xmm5, %xmm5
+; X32-NEXT: vpsubq %xmm4, %xmm5, %xmm5
+; X32-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; X32-NEXT: vpsubq %xmm4, %xmm2, %xmm2
; X32-NEXT: vpmovsxdq 8(%ebp), %xmm4
-; X32-NEXT: vmovdqa {{.*#+}} xmm5 = [33,0,63,0]
-; X32-NEXT: vmovdqa {{.*#+}} xmm6 = [0,2147483648,0,2147483648]
-; X32-NEXT: vpsrlq %xmm5, %xmm6, %xmm6
-; X32-NEXT: vextractf128 $1, %ymm2, %xmm7
-; X32-NEXT: vpsrlq %xmm5, %xmm7, %xmm7
-; X32-NEXT: vpxor %xmm6, %xmm7, %xmm7
-; X32-NEXT: vpsubq %xmm6, %xmm7, %xmm7
-; X32-NEXT: vpsrlq %xmm5, %xmm2, %xmm2
-; X32-NEXT: vpxor %xmm6, %xmm2, %xmm2
-; X32-NEXT: vpsubq %xmm6, %xmm2, %xmm2
-; X32-NEXT: vinsertf128 $1, %xmm7, %ymm2, %ymm2
+; X32-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
; X32-NEXT: vextractf128 $1, %ymm1, %xmm4
; X32-NEXT: vextractf128 $1, %ymm0, %xmm5
; X86-AVX1-NEXT: movl $63, %eax
; X86-AVX1-NEXT: vmovd %eax, %xmm1
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X86-AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm4
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
-; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm4
+; X86-AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
+; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm4
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm5
+; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; X86-AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm2, %xmm3
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; X86-AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1