See if we can simplify the demanded vector elts from the extraction before trying to simplify the demanded bits.
This helps us with target shuffles and hops in particular.
llvm-svn: 360535
if (DemandedVecBits == 0)
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
- KnownBits KnownVec;
+ APInt KnownUndef, KnownZero;
APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx);
+ if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
KnownVec, TLO, Depth + 1))
return true;
; AVX12: # %bb.0:
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: movl %eax, %ecx
-; AVX12-NEXT: andl $3, %ecx
-; AVX12-NEXT: vmovq %rcx, %xmm0
-; AVX12-NEXT: shrl $2, %eax
-; AVX12-NEXT: vmovq %rax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm1, %eax
+; AVX12-NEXT: shrl $2, %ecx
+; AVX12-NEXT: vmovd %ecx, %xmm0
+; AVX12-NEXT: andl $3, %eax
+; AVX12-NEXT: vmovd %eax, %xmm1
+; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $2, %ecx
; AVX512-NEXT: andl $3, %ecx
-; AVX512-NEXT: vmovq %rcx, %xmm0
-; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $3, %eax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: movzbl %al, %ecx
; AVX12-NEXT: shrl $4, %ecx
-; AVX12-NEXT: vmovq %rcx, %xmm0
+; AVX12-NEXT: vmovd %ecx, %xmm0
; AVX12-NEXT: andl $15, %eax
-; AVX12-NEXT: vmovq %rax, %xmm1
+; AVX12-NEXT: vmovd %eax, %xmm1
; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: addb %cl, %al
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $4, %ecx
-; AVX512-NEXT: vmovq %rcx, %xmm0
-; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $15, %eax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX12: # %bb.0:
; AVX12-NEXT: vmovmskpd %ymm0, %eax
; AVX12-NEXT: movl %eax, %ecx
-; AVX12-NEXT: andl $3, %ecx
-; AVX12-NEXT: vmovq %rcx, %xmm0
-; AVX12-NEXT: shrl $2, %eax
-; AVX12-NEXT: vmovq %rax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm1, %eax
+; AVX12-NEXT: shrl $2, %ecx
+; AVX12-NEXT: vmovd %ecx, %xmm0
+; AVX12-NEXT: andl $3, %eax
+; AVX12-NEXT: vmovd %eax, %xmm1
+; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $2, %ecx
; AVX512-NEXT: andl $3, %ecx
-; AVX512-NEXT: vmovq %rcx, %xmm0
-; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $3, %eax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX12: # %bb.0:
; AVX12-NEXT: vmovmskps %ymm0, %eax
; AVX12-NEXT: movl %eax, %ecx
-; AVX12-NEXT: andl $15, %ecx
-; AVX12-NEXT: vmovq %rcx, %xmm0
-; AVX12-NEXT: shrl $4, %eax
-; AVX12-NEXT: vmovq %rax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm1, %eax
+; AVX12-NEXT: shrl $4, %ecx
+; AVX12-NEXT: vmovd %ecx, %xmm0
+; AVX12-NEXT: andl $15, %eax
+; AVX12-NEXT: vmovd %eax, %xmm1
+; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
+; AVX12-NEXT: vpextrb $0, %xmm0, %eax
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $4, %ecx
-; AVX512-NEXT: vmovq %rcx, %xmm0
-; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $15, %eax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $4, %ecx
-; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vmovq %rax, %xmm1
+; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: addb %cl, %al
; AVX2-NEXT: vmovmskps %ymm0, %eax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $4, %ecx
-; AVX2-NEXT: vmovq %rcx, %xmm0
+; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vmovq %rax, %xmm1
+; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: addb %cl, %al
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: shrl $4, %ecx
-; AVX512-NEXT: vmovq %rcx, %xmm0
-; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: andl $15, %eax
-; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; X86-LABEL: test_demanded_phaddw_128:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpbroadcastw %xmm1, %xmm1
-; X86-NEXT: vphaddw %xmm1, %xmm0, %xmm0
+; X86-NEXT: vphaddw %xmm0, %xmm0, %xmm0
; X86-NEXT: vpextrw $0, %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: test_demanded_phaddw_128:
; X64: ## %bb.0:
-; X64-NEXT: vpbroadcastw %xmm1, %xmm1
-; X64-NEXT: vphaddw %xmm1, %xmm0, %xmm0
+; X64-NEXT: vphaddw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpextrw $0, %xmm0, (%rdi)
; X64-NEXT: retq
%1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> zeroinitializer
; X86-LABEL: test_demanded_phsubw_128:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpbroadcastw %xmm1, %xmm1
-; X86-NEXT: vphsubw %xmm1, %xmm0, %xmm0
+; X86-NEXT: vphsubw %xmm0, %xmm0, %xmm0
; X86-NEXT: vpextrw $2, %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: test_demanded_phsubw_128:
; X64: ## %bb.0:
-; X64-NEXT: vpbroadcastw %xmm1, %xmm1
-; X64-NEXT: vphsubw %xmm1, %xmm0, %xmm0
+; X64-NEXT: vphsubw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpextrw $2, %xmm0, (%rdi)
; X64-NEXT: retq
%1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> zeroinitializer
; X86-LABEL: test_demanded_phaddw_256:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpbroadcastw %xmm1, %xmm1
-; X86-NEXT: vphaddw %xmm1, %xmm0, %xmm0
+; X86-NEXT: vpbroadcastw %xmm1, %xmm0
+; X86-NEXT: vphaddw %xmm0, %xmm0, %xmm0
; X86-NEXT: vpextrw $4, %xmm0, (%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: test_demanded_phaddw_256:
; X64: ## %bb.0:
-; X64-NEXT: vpbroadcastw %xmm1, %xmm1
-; X64-NEXT: vphaddw %xmm1, %xmm0, %xmm0
+; X64-NEXT: vpbroadcastw %xmm1, %xmm0
+; X64-NEXT: vphaddw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpextrw $4, %xmm0, (%rdi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X86-LABEL: test_demanded_phsubw_256:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpbroadcastw %xmm0, %xmm0
; X86-NEXT: vphsubw %xmm1, %xmm0, %xmm0
; X86-NEXT: vpextrw $6, %xmm0, (%eax)
; X86-NEXT: vzeroupper
;
; X64-LABEL: test_demanded_phsubw_256:
; X64: ## %bb.0:
-; X64-NEXT: vpbroadcastw %xmm0, %xmm0
; X64-NEXT: vphsubw %xmm1, %xmm0, %xmm0
; X64-NEXT: vpextrw $6, %xmm0, (%rdi)
; X64-NEXT: vzeroupper
define void @store_v2f64_v2i64(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %val) {
; SSE2-LABEL: store_v2f64_v2i64:
; SSE2: ## %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm3, %xmm0
-; SSE2-NEXT: movdqa %xmm3, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pand %xmm2, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm3, %xmm4
-; SSE2-NEXT: movd %xmm4, %eax
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB1_2
; SSE2-NEXT: ## %bb.1: ## %cond.store
; SSE2-NEXT: movlpd %xmm1, (%rdi)
; SSE2-NEXT: LBB1_2: ## %else
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: por %xmm3, %xmm0
; SSE2-NEXT: pextrw $4, %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB1_4
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm7
-; SSE2-NEXT: pand %xmm5, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm7
-; SSE2-NEXT: movd %xmm7, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB2_2
; SSE2-NEXT: ## %bb.1: ## %cond.store
; SSE2-NEXT: movlpd %xmm2, (%rdi)
; SSE2-NEXT: LBB2_2: ## %else
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,2,2]
-; SSE2-NEXT: pand %xmm5, %xmm0
-; SSE2-NEXT: por %xmm6, %xmm0
; SSE2-NEXT: pextrw $4, %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB2_4
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pextrw $0, %xmm0, %eax
define void @store_v2i64_v2i64(<2 x i64> %trigger, <2 x i64>* %addr, <2 x i64> %val) {
; SSE2-LABEL: store_v2i64_v2i64:
; SSE2: ## %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm3, %xmm0
-; SSE2-NEXT: movdqa %xmm3, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pand %xmm2, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; SSE2-NEXT: por %xmm3, %xmm4
-; SSE2-NEXT: movd %xmm4, %eax
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB7_2
; SSE2-NEXT: ## %bb.1: ## %cond.store
; SSE2-NEXT: movq %xmm1, (%rdi)
; SSE2-NEXT: LBB7_2: ## %else
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: por %xmm3, %xmm0
; SSE2-NEXT: pextrw $4, %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB7_4
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm7
-; SSE2-NEXT: pand %xmm5, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm7
-; SSE2-NEXT: movd %xmm7, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB8_2
; SSE2-NEXT: ## %bb.1: ## %cond.store
; SSE2-NEXT: movq %xmm2, (%rdi)
; SSE2-NEXT: LBB8_2: ## %else
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,2,2]
-; SSE2-NEXT: pand %xmm5, %xmm0
-; SSE2-NEXT: por %xmm6, %xmm0
; SSE2-NEXT: pextrw $4, %xmm0, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: je LBB8_4
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pextrw $0, %xmm0, %eax