// For a broadcast, peek through an extract element of index 0 to find the
// horizontal op: broadcast (ext_vec_elt HOp, 0)
+ EVT VT = N->getValueType(0);
if (Opcode == X86ISD::VBROADCAST) {
SDValue SrcOp = N->getOperand(0);
if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
+ SrcOp.getValueType() == MVT::f64 &&
+ SrcOp.getOperand(0).getValueType() == VT &&
+ isNullConstant(SrcOp.getOperand(1)))
N = SrcOp.getNode();
}
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
assert((HOp.getValueType() == MVT::v2f64 ||
- HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
+ HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
+ "Unexpected type for h-op");
return HOp;
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
define float @pr26491(<4 x float> %a0) {
; SSE2-LABEL: pr26491:
%5 = fadd float %3, %4
ret float %5
}
+
+; When simplifying away a splat (broadcast), the hop type must match the shuffle type.
+
+define <4 x double> @PR41414(i64 %x, <4 x double> %y) {
+; SSE2-LABEL: PR41414:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq %rdi, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; SSE2-NEXT: subpd {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm3
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
+; SSE2-NEXT: addpd %xmm2, %xmm3
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0]
+; SSE2-NEXT: divpd %xmm3, %xmm1
+; SSE2-NEXT: divpd %xmm3, %xmm0
+; SSE2-NEXT: xorpd %xmm2, %xmm2
+; SSE2-NEXT: addpd %xmm2, %xmm0
+; SSE2-NEXT: addpd %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: PR41414:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movq %rdi, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: haddpd %xmm2, %xmm2
+; SSSE3-NEXT: divpd %xmm2, %xmm1
+; SSSE3-NEXT: divpd %xmm2, %xmm0
+; SSSE3-NEXT: xorpd %xmm2, %xmm2
+; SSSE3-NEXT: addpd %xmm2, %xmm0
+; SSSE3-NEXT: addpd %xmm2, %xmm1
+; SSSE3-NEXT: retq
+;
+; AVX1-LABEL: PR41414:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovq %rdi, %xmm1
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR41414:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %rdi, %xmm1
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
+; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %conv = uitofp i64 %x to double
+ %t0 = insertelement <4 x double> undef, double %conv, i32 0
+ %t1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer
+ %t2 = fdiv <4 x double> %y, %t1
+ %t3 = fadd <4 x double> zeroinitializer, %t2
+ ret <4 x double> %t3
+}