ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask);
}
+ // Peek through any zext node if we can get back to a 128-bit source.
+ if (AmtVT.getScalarSizeInBits() == 64 &&
+ (ShAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ ShAmt.getOperand(0).getValueType().isSimple() &&
+ ShAmt.getOperand(0).getValueType().is128BitVector()) {
+ ShAmt = ShAmt.getOperand(0);
+ AmtVT = ShAmt.getSimpleValueType();
+ }
+
// See if we can mask off the upper elements using the existing source node.
// The shift uses the entire lower 64-bits of the amount vector, so no need to
// do this for vXi64 types.
// Zero-extend bottom element to v2i64 vector type, either by extension or
// shuffle masking.
if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) {
- if (Subtarget.hasSSE41())
+ if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST ||
+ ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) {
+ ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+ } else if (Subtarget.hasSSE41()) {
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
- else {
+ } else {
SDValue ByteShift = DAG.getTargetConstant(
(128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
; AVX1-LABEL: PR52719:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
-; AVX1-NEXT: vpsrlq %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR52719:
;
; X86-AVX1-LABEL: PR52719:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; X86-AVX1-NEXT: # xmm3 = mem[0,0]
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2
-; X86-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: # xmm2 = mem[0,0]
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0