Reduce the size of an any-extended i64 scalar_to_vector source to i32 - the any_extend nodes are often introduced by SimplifyDemandedBits.
llvm-svn: 356292
static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
+ SDLoc DL(N);
// If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
// This occurs frequently in our masked scalar intrinsic code and our
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
if (C->getAPIntValue().isOneValue())
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
Src.getOperand(0));
// Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
if (C->isNullValue())
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
- Src.getOperand(0), Src.getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
+ Src.getOperand(1));
+
+ // Reduce v2i64 to v4i32 if we don't need the upper bits.
+ // TODO: Move to DAGCombine?
+ if (VT == MVT::v2i64 && Src.getOpcode() == ISD::ANY_EXTEND &&
+ Src.getValueType() == MVT::i64 && Src.hasOneUse() &&
+ Src.getOperand(0).getScalarValueSizeInBits() <= 32)
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
+ DAG.getAnyExtOrTrunc(Src.getOperand(0), DL, MVT::i32)));
return SDValue();
}
define <2 x i64> @ext_i2_2i64(i2 %a0) {
; SSE2-SSSE3-LABEL: ext_i2_2i64:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
;
; AVX1-LABEL: ext_i2_2i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
;
; AVX2-LABEL: ext_i2_2i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX2-NEXT: vmovq %rdi, %xmm0
+; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
define <4 x i64> @ext_i4_4i64(i4 %a0) {
; SSE2-SSSE3-LABEL: ext_i4_4i64:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
;
; AVX1-LABEL: ext_i4_4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
define <8 x i64> @ext_i8_8i64(i8 %a0) {
; SSE2-SSSE3-LABEL: ext_i8_8i64:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
;
; AVX1-LABEL: ext_i8_8i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
define <2 x i64> @ext_i2_2i64(i2 %a0) {
; SSE2-SSSE3-LABEL: ext_i2_2i64:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
;
; AVX1-LABEL: ext_i2_2i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
;
; AVX2-LABEL: ext_i2_2i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX2-NEXT: vmovq %rdi, %xmm0
+; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
define <4 x i64> @ext_i4_4i64(i4 %a0) {
; SSE2-SSSE3-LABEL: ext_i4_4i64:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
;
; AVX1-LABEL: ext_i4_4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
define <8 x i64> @ext_i8_8i64(i8 %a0) {
; SSE2-SSSE3-LABEL: ext_i8_8i64:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
;
; AVX1-LABEL: ext_i8_8i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
; SSE2-SSSE3-LABEL: bitcast_i2_2i1:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
+; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
;
; AVX1-LABEL: bitcast_i2_2i1:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
;
; AVX2-LABEL: bitcast_i2_2i1:
; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX2-NEXT: vmovq %rdi, %xmm0
+; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
;
; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8]
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
-; X64-NEXT: vmovq %rdi, %xmm1
+; X64-NEXT: vmovd %edi, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
;
; X64-LABEL: t1:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: shll $12, %edi
-; X64-NEXT: movq %rdi, %xmm0
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: movq %xmm0, (%rsi)
define <2 x i32> @simplify_select(i32 %x, <2 x i1> %z) {
; SSE2-LABEL: simplify_select:
; SSE2: # %bb.0:
-; SSE2-NEXT: # kill: def $edi killed $edi def $rdi
; SSE2-NEXT: psllq $63, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: movq %rdi, %xmm1
+; SSE2-NEXT: movd %edi, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
;
; SSE41-LABEL: simplify_select:
; SSE41: # %bb.0:
-; SSE41-NEXT: # kill: def $edi killed $edi def $rdi
-; SSE41-NEXT: movq %rdi, %xmm0
+; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: retq
;
; AVX1-LABEL: simplify_select:
; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
-; AVX1-NEXT: vmovq %rdi, %xmm0
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; X64-SSE2-LABEL: convert_v3i8_to_v3f32:
; X64-SSE2: # %bb.0: # %entry
; X64-SSE2-NEXT: movzwl (%rsi), %eax
-; X64-SSE2-NEXT: movq %rax, %xmm0
+; X64-SSE2-NEXT: movd %eax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE42: # %bb.0: # %entry
; X64-SSE42-NEXT: movzbl 2(%rsi), %eax
; X64-SSE42-NEXT: movzwl (%rsi), %ecx
-; X64-SSE42-NEXT: movq %rcx, %xmm0
+; X64-SSE42-NEXT: movd %ecx, %xmm0
; X64-SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE42-NEXT: pinsrd $2, %eax, %xmm0
; X64-SSE42-NEXT: pslld $24, %xmm0
; X64-SSE2-LABEL: convert_v3i8_to_v3f32:
; X64-SSE2: # %bb.0: # %entry
; X64-SSE2-NEXT: movzwl (%rsi), %eax
-; X64-SSE2-NEXT: movq %rax, %xmm0
+; X64-SSE2-NEXT: movd %eax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
; X64-SSE42: # %bb.0: # %entry
; X64-SSE42-NEXT: movzbl 2(%rsi), %eax
; X64-SSE42-NEXT: movzwl (%rsi), %ecx
-; X64-SSE42-NEXT: movq %rcx, %xmm0
+; X64-SSE42-NEXT: movd %ecx, %xmm0
; X64-SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE42-NEXT: pinsrd $2, %eax, %xmm0
; X64-SSE42-NEXT: pand {{.*}}(%rip), %xmm0