// 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
// so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
- Res = DAG.getBitcast(MVT::v4i64, Res);
- Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3});
+ // Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
+ SmallVector<int, 64> Mask;
+ int Scale = 64 / OutVT.getScalarSizeInBits();
+ scaleShuffleMask<int>(Scale, makeArrayRef<int>({ 0, 2, 1, 3 }), Mask);
+ Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
if (DstVT.is256BitVector())
return DAG.getBitcast(DstVT, Res);
;
; AVX2-LABEL: allones_v8i64_sign:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax
;
; AVX2-LABEL: allzeros_v8i64_sign:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax
;
; AVX2-LABEL: allones_v8i64_and1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax
;
; AVX2-LABEL: allzeros_v8i64_and1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax
;
; AVX2-LABEL: allones_v8i64_and4:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax
;
; AVX2-LABEL: allzeros_v8i64_and4:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vmovmskps %ymm0, %eax