ret <2 x i64> %1
}
+; Make sure we fold fully undef input vectors. We previously folded only when
+; undef had a single use so use 2 undefs.
+define <4 x i32> @test17(<4 x i32> %a0, <4 x i32>* %dummy) {
+; X86-LABEL: test17:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pslld $6, %xmm0
+; X86-NEXT: movdqa %xmm0, (%eax)
+; X86-NEXT: pslld $7, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test17:
+; X64: # %bb.0:
+; X64-NEXT: pslld $6, %xmm0
+; X64-NEXT: movdqa %xmm0, (%rdi)
+; X64-NEXT: pslld $7, %xmm0
+; X64-NEXT: retq
+ %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6)
+ store <4 x i32> %a, <4 x i32>* %dummy
+ %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 7)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test18(<4 x i32> %a0, <4 x i32>* %dummy) {
+; X86-LABEL: test18:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pslld $3, %xmm0
+; X86-NEXT: movdqa %xmm0, (%eax)
+; X86-NEXT: pslld $1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test18:
+; X64: # %bb.0:
+; X64-NEXT: pslld $3, %xmm0
+; X64-NEXT: movdqa %xmm0, (%rdi)
+; X64-NEXT: pslld $1, %xmm0
+; X64-NEXT: retq
+ %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 3)
+ store <4 x i32> %a, <4 x i32>* %dummy
+ %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 1)
+ ret <4 x i32> %res
+}
+
declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32)
declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)