case X86::FsFLD0SS:
case X86::FsFLD0SD:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
- case X86::AVX_SET0:
+ case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
- return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+ MIB->getOperand(0).setReg(XReg);
+ return Expand2AddrUndef(MIB, get(X86::VXORPSrr));
+ }
case X86::AVX512_128_SET0:
case X86::AVX512_FsFLD0SS:
case X86::AVX512_FsFLD0SD: {
bool HasVLX = Subtarget.hasVLX();
unsigned SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
- if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
- return Expand2AddrUndef(MIB,
- get(HasVLX ? X86::VPXORDZ256rr : X86::VXORPSYrr));
+ if (HasVLX)
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
+ if (TRI->getEncodingValue(SrcReg) < 16) {
+ unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
+ MIB->getOperand(0).setReg(XReg);
+ return Expand2AddrUndef(MIB, get(X86::VXORPSrr));
+ }
// Extended register without VLX. Use a larger XOR.
SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
MIB->getOperand(0).setReg(SrcReg);
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
-; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
; CHECK-NEXT: vmovaps %ymm0, (%eax)
; CHECK-LABEL: func:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups 0, %xmm0
-; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
; CHECK-NEXT: vbroadcastss 32, %xmm3
; CHECK-NEXT: vmovaps %ymm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
%tmp = load <4 x float>, <4 x float>* null, align 1
%tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2
%tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1
define void @bad_cast() {
; CHECK-LABEL: bad_cast:
; CHECK: # BB#0:
-; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm0, (%eax)
; CHECK-NEXT: movl $0, (%eax)
; CHECK-NEXT: vzeroupper
;
; X32-AVX1-LABEL: allones_v32i8:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v32i8:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v16i16:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v16i16:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v8i32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v8i32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v4i64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v4i64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v4f64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v4f64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v4f64_optsize:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v4f64_optsize:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v8f32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v8f32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v8f32_optsize:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
;
; X64-AVX1-LABEL: allones_v8f32_optsize:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
;
; X32-AVX1-LABEL: allones_v64i8:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: allones_v64i8:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
; X32-AVX1-LABEL: allones_v32i16:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: allones_v32i16:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
; X32-AVX1-LABEL: allones_v16i32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: allones_v16i32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
; X32-AVX1-LABEL: allones_v8i64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: allones_v8i64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
; X32-AVX1-LABEL: allones_v8f64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: allones_v8f64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
; X32-AVX1-LABEL: allones_v16f32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
; X64-AVX1-LABEL: allones_v16f32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
; CHECK-LABEL: zero256:
; CHECK: ## BB#0:
; CHECK-NEXT: movq _x@{{.*}}(%rip), %rax
-; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %ymm0, (%rax)
; CHECK-NEXT: movq _y@{{.*}}(%rip), %rax
; CHECK-NEXT: vmovaps %ymm0, (%rax)
define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones:
; CHECK: ## BB#0: ## %allocas
-; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones2:
; CHECK: ## BB#0: ## %allocas
-; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; AVX-LABEL: castA:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX-NEXT: retq
%shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
; AVX-LABEL: castB:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX-NEXT: retq
%shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; AVX1-LABEL: castC:
; AVX1: ## BB#0:
; AVX1-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: castC:
; AVX2: ## BB#0:
; AVX2-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
%shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
define <8 x float> @sitofp_insert_zero_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_insert_zero_v8i32:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_insert_zero_v8i32:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
; X64-NEXT: retq
define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_shuffle_zero_v8i32:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_shuffle_zero_v8i32:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
; X64-NEXT: retq
define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_insert_allbits_v8i32:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
;
; X64-LABEL: sitofp_insert_allbits_v8i32:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
define <8 x float> @sitofp_shuffle_allbits_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_shuffle_allbits_v8i32:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
;
; X64-LABEL: sitofp_shuffle_allbits_v8i32:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
define <8 x float> @sitofp_insert_constants_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_insert_constants_v8i32:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7]
;
; X64-LABEL: sitofp_insert_constants_v8i32:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7]
define <4 x double> @test_mm256_andnot_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_andnot_pd:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X32-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
; X32-NEXT: vxorps %ymm2, %ymm0, %ymm0
; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
;
; X64-LABEL: test_mm256_andnot_pd:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X64-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
; X64-NEXT: vxorps %ymm2, %ymm0, %ymm0
; X64-NEXT: vandps %ymm1, %ymm0, %ymm0
define <4 x double> @test_mm256_setzero_pd() nounwind {
; X32-LABEL: test_mm256_setzero_pd:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setzero_pd:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
ret <4 x double> zeroinitializer
}
define <8 x float> @test_mm256_setzero_ps() nounwind {
; X32-LABEL: test_mm256_setzero_ps:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setzero_ps:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
ret <8 x float> zeroinitializer
}
define <4 x i64> @test_mm256_setzero_si256() nounwind {
; X32-LABEL: test_mm256_setzero_si256:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setzero_si256:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
ret <4 x i64> zeroinitializer
}
; X86-LABEL: test_x86_avx_storeu_pd_256:
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X86-NEXT: vmovupd %ymm0, (%eax)
; X86-NEXT: vzeroupper
;
; X64-LABEL: test_x86_avx_storeu_pd_256:
; X64: # BB#0:
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X64-NEXT: vmovupd %ymm0, (%rdi)
; X64-NEXT: vzeroupper
; AVX-LABEL: movnt_pd:
; AVX: # BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x57,0xc9]
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
; AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK_O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK_O0-NEXT: # implicit-def: %YMM1
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
-; CHECK_O0-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK_O0-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm2[1,2,3,4,5,6,7]
; CHECK_O0-NEXT: retq
%val = load float, float* %ptr
; CHECK_O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK_O0-NEXT: # implicit-def: %YMM1
; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1
-; CHECK_O0-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; CHECK_O0-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK_O0-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm2[1,2,3]
; CHECK_O0-NEXT: retq
%val = load double, double* %ptr
; CHECK-LABEL: andpd256:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vandpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: andpd256fold:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: xorpd256:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vxorpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: xorpd256fold:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: orpd256:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vorpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: orpd256fold:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: andnotpd256:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: andnotpd256fold:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vandnpd (%rdi), %ymm0, %ymm0
-; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
; X86-LABEL: select00:
; X86: # BB#0:
; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp)
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: je .LBB0_2
; X86-NEXT: # BB#1:
; X86-NEXT: vmovaps %ymm0, %ymm1
;
; X64-LABEL: select00:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
; X64-NEXT: je .LBB0_2
; X64-NEXT: # BB#1:
; X86-LABEL: select01:
; X86: # BB#0:
; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp)
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: je .LBB1_2
; X86-NEXT: # BB#1:
; X86-NEXT: vmovaps %ymm0, %ymm1
;
; X64-LABEL: select01:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
; X64-NEXT: je .LBB1_2
; X64-NEXT: # BB#1:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovaps (%ecx), %xmm0
-; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vmovaps %ymm1, (%eax)
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
; X64-LABEL: PR29088:
; X64: # BB#0:
; X64-NEXT: vmovaps (%rdi), %xmm0
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vmovaps %ymm1, (%rsi)
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
define void @t9(i64* %p) {
; CHECK-LABEL: t9:
; CHECK: ## BB#0:
-; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovups %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_zz23:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
define <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_zz23_optsize:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
define <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_zz67:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_zz67_optsize:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_01zz:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
define <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_01zz_optsize:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_45zz:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
define <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_45zz_optsize:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
define <4 x i64> @mul_const4(<4 x i64> %x) {
; X32-LABEL: mul_const4:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpsubq %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const4:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpsubq %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
define <8 x i32> @mul_const5(<8 x i32> %x) {
; X32-LABEL: mul_const5:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: mul_const5:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %y
; X32-LABEL: test_x86_avx2_gather_d_ps_256:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X32-NEXT: vgatherdps %ymm1, (%eax,%ymm0,4), %ymm2
; X32-NEXT: vmovaps %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_x86_avx2_gather_d_ps_256:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
; X64-NEXT: vmovaps %ymm2, %ymm0
; X64-NEXT: retq
; X32-LABEL: test_x86_avx2_gather_d_pd_256:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vxorpd %ymm2, %ymm2, %ymm2
+; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; X32-NEXT: vgatherdpd %ymm1, (%eax,%xmm0,8), %ymm2
; X32-NEXT: vmovapd %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_x86_avx2_gather_d_pd_256:
; X64: # BB#0:
-; X64-NEXT: vxorpd %ymm2, %ymm2, %ymm2
+; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; X64-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0,8), %ymm2
; X64-NEXT: vmovapd %ymm2, %ymm0
; X64-NEXT: retq
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1
; X32-NEXT: vmovdqa %ymm1, %ymm0
; X32-NEXT: retl
; X64-LABEL: test_mm256_i32gather_epi32:
; X64: # BB#0:
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovdqa %ymm1, %ymm0
; X64-NEXT: retq
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1
; X32-NEXT: vmovdqa %ymm1, %ymm0
; X32-NEXT: retl
; X64-LABEL: test_mm256_i32gather_epi64:
; X64: # BB#0:
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1
; X64-NEXT: vmovdqa %ymm1, %ymm0
; X64-NEXT: retq
; X32-LABEL: test_mm256_i32gather_pd:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X32-NEXT: vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1
; X32-NEXT: vmovapd %ymm1, %ymm0
;
; X64-LABEL: test_mm256_i32gather_pd:
; X64: # BB#0:
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1
; X64-NEXT: vmovapd %ymm1, %ymm0
; X32-LABEL: test_mm256_i32gather_ps:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2
; X32-NEXT: vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1
; X32-NEXT: vmovaps %ymm1, %ymm0
;
; X64-LABEL: test_mm256_i32gather_ps:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2
; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovaps %ymm1, %ymm0
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1
; X32-NEXT: vmovdqa %ymm1, %ymm0
; X32-NEXT: retl
; X64-LABEL: test_mm256_i64gather_epi64:
; X64: # BB#0:
; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovdqa %ymm1, %ymm0
; X64-NEXT: retq
; X32-LABEL: test_mm256_i64gather_pd:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X32-NEXT: vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1
; X32-NEXT: vmovapd %ymm1, %ymm0
;
; X64-LABEL: test_mm256_i64gather_pd:
; X64: # BB#0:
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovapd %ymm1, %ymm0
define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
; X32-LABEL: sra_v32i8_7:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: sra_v32i8_7:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
}
define void @isel_crash_32b(i8* %cV_R.addr) {
-; X32-LABEL: isel_crash_32b:
-; X32: ## BB#0: ## %eintry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: Lcfi1:
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: Lcfi2:
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: Lcfi3:
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-32, %esp
-; X32-NEXT: subl $128, %esp
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
-; X32-NEXT: vmovaps %ymm0, (%esp)
-; X32-NEXT: vpbroadcastb (%eax), %ymm1
-; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
+; X32-AVX2-LABEL: isel_crash_32b:
+; X32-AVX2: ## BB#0: ## %eintry
+; X32-AVX2-NEXT: pushl %ebp
+; X32-AVX2-NEXT: Lcfi1:
+; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT: Lcfi2:
+; X32-AVX2-NEXT: .cfi_offset %ebp, -8
+; X32-AVX2-NEXT: movl %esp, %ebp
+; X32-AVX2-NEXT: Lcfi3:
+; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT: andl $-32, %esp
+; X32-AVX2-NEXT: subl $128, %esp
+; X32-AVX2-NEXT: movl 8(%ebp), %eax
+; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT: vpbroadcastb (%eax), %ymm1
+; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: movl %ebp, %esp
+; X32-AVX2-NEXT: popl %ebp
+; X32-AVX2-NEXT: vzeroupper
+; X32-AVX2-NEXT: retl
;
-; X64-LABEL: isel_crash_32b:
-; X64: ## BB#0: ## %eintry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: Lcfi0:
-; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: Lcfi1:
-; X64-NEXT: .cfi_offset %rbp, -16
-; X64-NEXT: movq %rsp, %rbp
-; X64-NEXT: Lcfi2:
-; X64-NEXT: .cfi_def_cfa_register %rbp
-; X64-NEXT: andq $-32, %rsp
-; X64-NEXT: subq $128, %rsp
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
-; X64-NEXT: vmovaps %ymm0, (%rsp)
-; X64-NEXT: movb (%rdi), %al
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastb %xmm1, %ymm1
-; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
-; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; X64-NEXT: movq %rbp, %rsp
-; X64-NEXT: popq %rbp
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
+; X64-AVX2-LABEL: isel_crash_32b:
+; X64-AVX2: ## BB#0: ## %eintry
+; X64-AVX2-NEXT: pushq %rbp
+; X64-AVX2-NEXT: Lcfi0:
+; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT: Lcfi1:
+; X64-AVX2-NEXT: .cfi_offset %rbp, -16
+; X64-AVX2-NEXT: movq %rsp, %rbp
+; X64-AVX2-NEXT: Lcfi2:
+; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
+; X64-AVX2-NEXT: andq $-32, %rsp
+; X64-AVX2-NEXT: subq $128, %rsp
+; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT: movb (%rdi), %al
+; X64-AVX2-NEXT: vmovd %eax, %xmm1
+; X64-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT: movq %rbp, %rsp
+; X64-AVX2-NEXT: popq %rbp
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X32-AVX512VL-LABEL: isel_crash_32b:
+; X32-AVX512VL: ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT: pushl %ebp
+; X32-AVX512VL-NEXT: Lcfi1:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT: Lcfi2:
+; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT: movl %esp, %ebp
+; X32-AVX512VL-NEXT: Lcfi3:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT: andl $-32, %esp
+; X32-AVX512VL-NEXT: subl $128, %esp
+; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT: vpbroadcastb (%eax), %ymm1
+; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: movl %ebp, %esp
+; X32-AVX512VL-NEXT: popl %ebp
+; X32-AVX512VL-NEXT: vzeroupper
+; X32-AVX512VL-NEXT: retl
+;
+; X64-AVX512VL-LABEL: isel_crash_32b:
+; X64-AVX512VL: ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT: pushq %rbp
+; X64-AVX512VL-NEXT: Lcfi0:
+; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
+; X64-AVX512VL-NEXT: Lcfi1:
+; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
+; X64-AVX512VL-NEXT: movq %rsp, %rbp
+; X64-AVX512VL-NEXT: Lcfi2:
+; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
+; X64-AVX512VL-NEXT: andq $-32, %rsp
+; X64-AVX512VL-NEXT: subq $128, %rsp
+; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
+; X64-AVX512VL-NEXT: movb (%rdi), %al
+; X64-AVX512VL-NEXT: vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1
+; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT: movq %rbp, %rsp
+; X64-AVX512VL-NEXT: popq %rbp
+; X64-AVX512VL-NEXT: vzeroupper
+; X64-AVX512VL-NEXT: retq
eintry:
%__a.addr.i = alloca <4 x i64>, align 16
%__b.addr.i = alloca <4 x i64>, align 16
}
define void @isel_crash_16w(i16* %cV_R.addr) {
-; X32-LABEL: isel_crash_16w:
-; X32: ## BB#0: ## %eintry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: Lcfi5:
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: Lcfi6:
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: Lcfi7:
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-32, %esp
-; X32-NEXT: subl $128, %esp
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
-; X32-NEXT: vmovaps %ymm0, (%esp)
-; X32-NEXT: vpbroadcastw (%eax), %ymm1
-; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
+; X32-AVX2-LABEL: isel_crash_16w:
+; X32-AVX2: ## BB#0: ## %eintry
+; X32-AVX2-NEXT: pushl %ebp
+; X32-AVX2-NEXT: Lcfi5:
+; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT: Lcfi6:
+; X32-AVX2-NEXT: .cfi_offset %ebp, -8
+; X32-AVX2-NEXT: movl %esp, %ebp
+; X32-AVX2-NEXT: Lcfi7:
+; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT: andl $-32, %esp
+; X32-AVX2-NEXT: subl $128, %esp
+; X32-AVX2-NEXT: movl 8(%ebp), %eax
+; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT: vpbroadcastw (%eax), %ymm1
+; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: movl %ebp, %esp
+; X32-AVX2-NEXT: popl %ebp
+; X32-AVX2-NEXT: vzeroupper
+; X32-AVX2-NEXT: retl
;
-; X64-LABEL: isel_crash_16w:
-; X64: ## BB#0: ## %eintry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: Lcfi3:
-; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: Lcfi4:
-; X64-NEXT: .cfi_offset %rbp, -16
-; X64-NEXT: movq %rsp, %rbp
-; X64-NEXT: Lcfi5:
-; X64-NEXT: .cfi_def_cfa_register %rbp
-; X64-NEXT: andq $-32, %rsp
-; X64-NEXT: subq $128, %rsp
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
-; X64-NEXT: vmovaps %ymm0, (%rsp)
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastw %xmm1, %ymm1
-; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
-; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; X64-NEXT: movq %rbp, %rsp
-; X64-NEXT: popq %rbp
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
+; X64-AVX2-LABEL: isel_crash_16w:
+; X64-AVX2: ## BB#0: ## %eintry
+; X64-AVX2-NEXT: pushq %rbp
+; X64-AVX2-NEXT: Lcfi3:
+; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT: Lcfi4:
+; X64-AVX2-NEXT: .cfi_offset %rbp, -16
+; X64-AVX2-NEXT: movq %rsp, %rbp
+; X64-AVX2-NEXT: Lcfi5:
+; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
+; X64-AVX2-NEXT: andq $-32, %rsp
+; X64-AVX2-NEXT: subq $128, %rsp
+; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT: movw (%rdi), %ax
+; X64-AVX2-NEXT: vmovd %eax, %xmm1
+; X64-AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
+; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT: movq %rbp, %rsp
+; X64-AVX2-NEXT: popq %rbp
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+;
+; X32-AVX512VL-LABEL: isel_crash_16w:
+; X32-AVX512VL: ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT: pushl %ebp
+; X32-AVX512VL-NEXT: Lcfi5:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT: Lcfi6:
+; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT: movl %esp, %ebp
+; X32-AVX512VL-NEXT: Lcfi7:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT: andl $-32, %esp
+; X32-AVX512VL-NEXT: subl $128, %esp
+; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT: vpbroadcastw (%eax), %ymm1
+; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: movl %ebp, %esp
+; X32-AVX512VL-NEXT: popl %ebp
+; X32-AVX512VL-NEXT: vzeroupper
+; X32-AVX512VL-NEXT: retl
+;
+; X64-AVX512VL-LABEL: isel_crash_16w:
+; X64-AVX512VL: ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT: pushq %rbp
+; X64-AVX512VL-NEXT: Lcfi3:
+; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
+; X64-AVX512VL-NEXT: Lcfi4:
+; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
+; X64-AVX512VL-NEXT: movq %rsp, %rbp
+; X64-AVX512VL-NEXT: Lcfi5:
+; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
+; X64-AVX512VL-NEXT: andq $-32, %rsp
+; X64-AVX512VL-NEXT: subq $128, %rsp
+; X64-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
+; X64-AVX512VL-NEXT: movw (%rdi), %ax
+; X64-AVX512VL-NEXT: vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm1
+; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT: movq %rbp, %rsp
+; X64-AVX512VL-NEXT: popq %rbp
+; X64-AVX512VL-NEXT: vzeroupper
+; X64-AVX512VL-NEXT: retq
eintry:
%__a.addr.i = alloca <4 x i64>, align 16
%__b.addr.i = alloca <4 x i64>, align 16
}
define void @isel_crash_8d(i32* %cV_R.addr) {
-; X32-LABEL: isel_crash_8d:
-; X32: ## BB#0: ## %eintry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: Lcfi9:
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: Lcfi10:
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: Lcfi11:
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-32, %esp
-; X32-NEXT: subl $128, %esp
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
-; X32-NEXT: vmovaps %ymm0, (%esp)
-; X32-NEXT: vbroadcastss (%eax), %ymm1
-; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
+; X32-AVX2-LABEL: isel_crash_8d:
+; X32-AVX2: ## BB#0: ## %eintry
+; X32-AVX2-NEXT: pushl %ebp
+; X32-AVX2-NEXT: Lcfi9:
+; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT: Lcfi10:
+; X32-AVX2-NEXT: .cfi_offset %ebp, -8
+; X32-AVX2-NEXT: movl %esp, %ebp
+; X32-AVX2-NEXT: Lcfi11:
+; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT: andl $-32, %esp
+; X32-AVX2-NEXT: subl $128, %esp
+; X32-AVX2-NEXT: movl 8(%ebp), %eax
+; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT: vbroadcastss (%eax), %ymm1
+; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: movl %ebp, %esp
+; X32-AVX2-NEXT: popl %ebp
+; X32-AVX2-NEXT: vzeroupper
+; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: isel_crash_8d:
; X64-AVX2: ## BB#0: ## %eintry
; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
; X64-AVX2-NEXT: andq $-32, %rsp
; X64-AVX2-NEXT: subq $128, %rsp
-; X64-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
; X64-AVX2-NEXT: movl (%rdi), %eax
; X64-AVX2-NEXT: vmovd %eax, %xmm1
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
+; X32-AVX512VL-LABEL: isel_crash_8d:
+; X32-AVX512VL: ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT: pushl %ebp
+; X32-AVX512VL-NEXT: Lcfi9:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT: Lcfi10:
+; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT: movl %esp, %ebp
+; X32-AVX512VL-NEXT: Lcfi11:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT: andl $-32, %esp
+; X32-AVX512VL-NEXT: subl $128, %esp
+; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT: vbroadcastss (%eax), %ymm1
+; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: movl %ebp, %esp
+; X32-AVX512VL-NEXT: popl %ebp
+; X32-AVX512VL-NEXT: vzeroupper
+; X32-AVX512VL-NEXT: retl
+;
; X64-AVX512VL-LABEL: isel_crash_8d:
; X64-AVX512VL: ## BB#0: ## %eintry
; X64-AVX512VL-NEXT: pushq %rbp
}
define void @isel_crash_4q(i64* %cV_R.addr) {
-; X32-LABEL: isel_crash_4q:
-; X32: ## BB#0: ## %eintry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: Lcfi13:
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: Lcfi14:
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: Lcfi15:
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-32, %esp
-; X32-NEXT: subl $128, %esp
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
-; X32-NEXT: vmovaps %ymm0, (%esp)
-; X32-NEXT: movl (%eax), %ecx
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: vmovd %ecx, %xmm1
-; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
-; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
-; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
-; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
+; X32-AVX2-LABEL: isel_crash_4q:
+; X32-AVX2: ## BB#0: ## %eintry
+; X32-AVX2-NEXT: pushl %ebp
+; X32-AVX2-NEXT: Lcfi13:
+; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT: Lcfi14:
+; X32-AVX2-NEXT: .cfi_offset %ebp, -8
+; X32-AVX2-NEXT: movl %esp, %ebp
+; X32-AVX2-NEXT: Lcfi15:
+; X32-AVX2-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT: andl $-32, %esp
+; X32-AVX2-NEXT: subl $128, %esp
+; X32-AVX2-NEXT: movl 8(%ebp), %eax
+; X32-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT: movl (%eax), %ecx
+; X32-AVX2-NEXT: movl 4(%eax), %eax
+; X32-AVX2-NEXT: vmovd %ecx, %xmm1
+; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
+; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT: movl %ebp, %esp
+; X32-AVX2-NEXT: popl %ebp
+; X32-AVX2-NEXT: vzeroupper
+; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: isel_crash_4q:
; X64-AVX2: ## BB#0: ## %eintry
; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
; X64-AVX2-NEXT: andq $-32, %rsp
; X64-AVX2-NEXT: subq $128, %rsp
-; X64-AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
; X64-AVX2-NEXT: movq (%rdi), %rax
; X64-AVX2-NEXT: vmovq %rax, %xmm1
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
+; X32-AVX512VL-LABEL: isel_crash_4q:
+; X32-AVX512VL: ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT: pushl %ebp
+; X32-AVX512VL-NEXT: Lcfi13:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT: Lcfi14:
+; X32-AVX512VL-NEXT: .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT: movl %esp, %ebp
+; X32-AVX512VL-NEXT: Lcfi15:
+; X32-AVX512VL-NEXT: .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT: andl $-32, %esp
+; X32-AVX512VL-NEXT: subl $128, %esp
+; X32-AVX512VL-NEXT: movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT: movl (%eax), %ecx
+; X32-AVX512VL-NEXT: movl 4(%eax), %eax
+; X32-AVX512VL-NEXT: vmovd %ecx, %xmm1
+; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
+; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
+; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT: movl %ebp, %esp
+; X32-AVX512VL-NEXT: popl %ebp
+; X32-AVX512VL-NEXT: vzeroupper
+; X32-AVX512VL-NEXT: retl
+;
; X64-AVX512VL-LABEL: isel_crash_4q:
; X64-AVX512VL: ## BB#0: ## %eintry
; X64-AVX512VL-NEXT: pushq %rbp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovaps (%ecx), %xmm0
-; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vmovaps %ymm1, (%eax)
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
; X64-LABEL: PR29088:
; X64: # BB#0:
; X64-NEXT: vmovaps (%rdi), %xmm0
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vmovaps %ymm1, (%rsi)
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-LABEL: shl_16i16:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
;
; X64-LABEL: shl_16i16:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-LABEL: ashr_16i16:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
;
; X64-LABEL: ashr_16i16:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-LABEL: lshr_16i16:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
;
; X64-LABEL: lshr_16i16:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
; AVX512F-LABEL: test_mask_vminpd:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
-; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: retq
; AVX512BW-LABEL: test_mask_vminpd:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
-; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
; AVX512DQ-LABEL: test_mask_vminpd:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
-; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
; AVX512F-LABEL: test_mask_vmaxpd:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
-; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: retq
; AVX512BW-LABEL: test_mask_vmaxpd:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
-; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
; AVX512DQ-LABEL: test_mask_vmaxpd:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: # kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
-; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
; NOVLDQ-LABEL: sitofp_8i1_float:
; NOVLDQ: # BB#0:
; NOVLDQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; NOVLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
; AVX512DQ-LABEL: sitofp_8i1_float:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; AVX512DQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
; NOVL-LABEL: sitofp_4i1_double:
; NOVL: # BB#0:
-; NOVL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
; NOVL-LABEL: uitofp_8i1_float:
; NOVL: # BB#0:
; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
; NOVL-LABEL: uitofp_8i1_double:
; NOVL: # BB#0:
; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
-; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxbd (%rdi), %ymm1
-; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
-; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: vpmovsxwd (%rdi), %ymm1
-; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; X32-LABEL: test_mm512_zextpd256_pd512:
; X32: # BB#0:
; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextpd256_pd512:
; X64: # BB#0:
; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; X32-LABEL: test_mm512_zextps256_ps512:
; X32: # BB#0:
; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextps256_ps512:
; X64: # BB#0:
; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; X32-LABEL: test_mm512_zextsi256_si512:
; X32: # BB#0:
; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextsi256_si512:
; X64: # BB#0:
; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT: movq %rbp, %rsp
; AVX512DQ-NEXT: popq %rbp
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT: movq %rbp, %rsp
; AVX512DQ-NEXT: popq %rbp
; ALL-LABEL: _sd8xdouble_mask:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
-; ALL-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; ALL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; ALL-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; ALL-NEXT: vmovapd %zmm1, %zmm0
; ALL-LABEL: _sd8xdouble_maskz:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; ALL-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; ALL-NEXT: retq
; ALL-LABEL: _sd8xdouble_mask_load:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; ALL-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; ALL-NEXT: retq
; ALL-LABEL: _sd8xdouble_maskz_load:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; ALL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; ALL-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
; ALL-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; ALL-NEXT: retq
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %eax
; AVX2-NEXT: vzeroupper
;
; AVX1-LABEL: _clearupper4xi64a:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: _clearupper4xi64a:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%x0 = extractelement <4 x i64> %0, i32 0
;
; AVX2-LABEL: _clearupper8xi32a:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%x0 = extractelement <8 x i32> %0, i32 0
;
; AVX1-LABEL: _clearupper4xi64b:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: _clearupper4xi64b:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%x32 = bitcast <4 x i64> %0 to <8 x i32>
;
; AVX2-LABEL: _clearupper8xi32b:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%x16 = bitcast <8 x i32> %0 to <16 x i16>
;
; AVX1-LABEL: _clearupper4xi64c:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: _clearupper4xi64c:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0
;
; AVX2-LABEL: _clearupper8xi32c:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0
define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
; AVX2-LABEL: combine_v4i64_abs_abs:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
;
; AVX-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
;
; KNL-LABEL: test16:
; KNL: # BB#0:
-; KNL-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; KNL-NEXT: vpcmpeqd %zmm3, %zmm4, %k1
; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k2
define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
; FMA-LABEL: test_v4f64_fneg_fmul:
; FMA: # BB#0:
-; FMA-NEXT: vxorpd %ymm2, %ymm2, %ymm2
+; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
; FMA-NEXT: retq
;
; FMA4-LABEL: test_v4f64_fneg_fmul:
; FMA4: # BB#0:
-; FMA4-NEXT: vxorpd %ymm2, %ymm2, %ymm2
+; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: retq
;
define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
; FMA-LABEL: test_v16f32_fneg_fmul:
; FMA: # BB#0:
-; FMA-NEXT: vxorps %ymm4, %ymm4, %ymm4
+; FMA-NEXT: vxorps %xmm4, %xmm4, %xmm4
; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
; FMA-NEXT: vfnmsub213ps %ymm4, %ymm3, %ymm1
; FMA-NEXT: retq
;
; FMA4-LABEL: test_v16f32_fneg_fmul:
; FMA4: # BB#0:
-; FMA4-NEXT: vxorps %ymm4, %ymm4, %ymm4
+; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: retq
define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
; FMA-LABEL: test_v8f64_fneg_fmul:
; FMA: # BB#0:
-; FMA-NEXT: vxorpd %ymm4, %ymm4, %ymm4
+; FMA-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0
; FMA-NEXT: vfnmsub213pd %ymm4, %ymm3, %ymm1
; FMA-NEXT: retq
;
; FMA4-LABEL: test_v8f64_fneg_fmul:
; FMA4: # BB#0:
-; FMA4-NEXT: vxorpd %ymm4, %ymm4, %ymm4
+; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: retq
; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vbroadcastss 304(%ecx), %xmm0
-; AVX-32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6,7]
; AVX-32-NEXT: vmovups %ymm0, 608(%eax)
; AVX-32-NEXT: vzeroupper
; AVX-64-LABEL: PR15298:
; AVX-64: # BB#0: # %L.entry
; AVX-64-NEXT: vbroadcastss 304(%rdi), %xmm0
-; AVX-64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6,7]
; AVX-64-NEXT: vmovups %ymm0, 608(%rsi)
; AVX-64-NEXT: vzeroupper
;
; AVX-LABEL: insert_v4f64_0zz3:
; AVX: # BB#0:
-; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
; AVX-NEXT: retq
%1 = insertelement <4 x double> %a, double 0.0, i32 1
;
; AVX1-LABEL: insert_v4i64_01z3:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_v4i64_01z3:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
%1 = insertelement <4 x i64> %a, i64 0, i32 2
;
; AVX-LABEL: insert_v8f32_z12345z7:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
; AVX-NEXT: retq
%1 = insertelement <8 x float> %a, float 0.0, i32 0
;
; AVX1-LABEL: insert_v8i32_z12345z7:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_v8i32_z12345z7:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
; AVX2-NEXT: retq
%1 = insertelement <8 x i32> %a, i32 0, i32 0
; AVX2-LABEL: _Z10test_shortPsS_i:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB0_1: # %vector.body
; AVX512-LABEL: _Z10test_shortPsS_i:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
-; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB0_1: # %vector.body
; AVX2-LABEL: test_unsigned_short:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB1_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512-LABEL: test_unsigned_short:
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
-; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB1_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
; AVX2-LABEL: _Z9test_charPcS_i:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB2_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0
-; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB2_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL_64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL_32-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
; KNL_64-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero
-; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL_32-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero
-; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
; KNL_64: # BB#0:
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32: # BB#0:
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
;
; AVX2-LABEL: test11a:
; AVX2: ## BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX512F: ## BB#0:
; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
;
; AVX2-LABEL: test12:
; AVX2: ## BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX512F: ## BB#0:
; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; YMM-NEXT: movl %esp, %ebp
; YMM-NEXT: andl $-32, %esp
; YMM-NEXT: subl $96, %esp
-; YMM-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0
; YMM-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; YMM-NEXT: leal {{[0-9]+}}(%esp), %eax
; YMM-NEXT: movl %eax, (%esp)
define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_34z6:
; AVX: # BB#0:
-; AVX-NEXT: vxorpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_f64_34z6:
; X32-AVX: # BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vxorpd %ymm0, %ymm0, %ymm0
+; X32-AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
; X32-AVX-LABEL: merge_8f32_2f32_23z5:
; X32-AVX: # BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vxorpd %ymm0, %ymm0, %ymm0
+; X32-AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2
define <8 x float> @merge_8f32_f32_1u3u5zu8(float* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_8f32_f32_1u3u5zu8:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_8f32_f32_1u3u5zu8:
; X32-AVX: # BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds float, float* %ptr, i64 1
define <8 x i32> @merge_8i32_i32_1u3u5zu8(i32* %ptr) nounwind uwtable noinline ssp {
; AVX1-LABEL: merge_8i32_i32_1u3u5zu8:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: merge_8i32_i32_1u3u5zu8:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: merge_8i32_i32_1u3u5zu8:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
; AVX512F-NEXT: retq
;
; X32-AVX-LABEL: merge_8i32_i32_1u3u5zu8:
; X32-AVX: # BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_4f64_z2:
; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vxorpd %ymm0, %ymm0, %ymm0
+; X32-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
; ALL-NEXT: vmovupd 8(%rdi), %xmm0
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-NEXT: vmovupd 8(%eax), %xmm0
; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_4i64_z3:
; ALL: # BB#0:
-; ALL-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; ALL-NEXT: vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; X32-AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X32-AVX512F-NEXT: vinserti64x4 $1, 96(%eax), %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
%ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
;
; AVX-LABEL: test_zero_v8f32:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovntps %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX-LABEL: test_zero_v8i32:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovntps %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX-LABEL: test_zero_v4f64:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovntps %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX-LABEL: test_zero_v4i64:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovntps %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX-LABEL: test_zero_v16i16:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovntps %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX-LABEL: test_zero_v32i8:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovntps %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX2-LABEL: test_masked_v16i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqd %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
; AVX2-NEXT: vpxor %ymm5, %ymm3, %ymm3
;
; X64-AVX2-LABEL: trunc_ashr_v4i64:
; X64-AVX2: # BB#0:
-; X64-AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %ymm1, (%rax)
; AVX1-NEXT: vmovups %ymm0, (%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-LABEL: test2:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vpmovsxbq (%ecx), %ymm0
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovups %ymm1, (%eax)
; X32-AVX2-NEXT: vmovdqu %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %ymm1, (%rax)
; AVX1-NEXT: vmovups %ymm0, (%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-LABEL: test4:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vpmovsxbd (%ecx), %ymm0
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovups %ymm1, (%eax)
; X32-AVX2-NEXT: vmovdqu %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %ymm1, (%rax)
; AVX1-NEXT: vmovups %ymm0, (%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-LABEL: test6:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vpmovsxbw (%ecx), %ymm0
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovups %ymm1, (%eax)
; X32-AVX2-NEXT: vmovdqu %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %ymm1, (%rax)
; AVX1-NEXT: vmovups %ymm0, (%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-LABEL: test8:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vpmovsxwq (%ecx), %ymm0
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovups %ymm1, (%eax)
; X32-AVX2-NEXT: vmovdqu %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0
; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %ymm1, (%rax)
; AVX1-NEXT: vmovups %ymm0, (%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-LABEL: test10:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vpmovsxwd (%ecx), %ymm0
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovups %ymm1, (%eax)
; X32-AVX2-NEXT: vmovdqu %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; AVX1-NEXT: vpmovsxdq (%rdi), %xmm0
; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vmovups %ymm1, (%rax)
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %ymm1, (%rax)
; AVX1-NEXT: vmovups %ymm0, (%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-LABEL: test12:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
; AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vpmovsxdq (%ecx), %ymm0
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovups %ymm1, (%eax)
; X32-AVX2-NEXT: vmovdqu %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
define <4 x double> @cmp4f64_domain(<4 x double> %a) {
; X86-LABEL: cmp4f64_domain:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp4f64_domain:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
define <4 x double> @cmp4f64_domain_optsize(<4 x double> %a) optsize {
; X86-LABEL: cmp4f64_domain_optsize:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp4f64_domain_optsize:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
define <8 x float> @cmp8f32_domain(<8 x float> %a) {
; X86-LABEL: cmp8f32_domain:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp8f32_domain:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
define <8 x float> @cmp8f32_domain_optsize(<8 x float> %a) optsize {
; X86-LABEL: cmp8f32_domain_optsize:
; X86: # BB#0:
-; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp8f32_domain_optsize:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
;
; AVX2-LABEL: sad_16i8:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB0_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
;
; AVX2-LABEL: sad_32i8:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB1_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
;
; AVX2-LABEL: sad_avx64i8:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
-; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
-; AVX2-NEXT: vpxor %ymm6, %ymm6, %ymm6
-; AVX2-NEXT: vpxor %ymm5, %ymm5, %ymm5
-; AVX2-NEXT: vpxor %ymm7, %ymm7, %ymm7
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6
+; AVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX2-NEXT: vpxor %xmm7, %xmm7, %xmm7
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB2_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
; KNL-32: # BB#0: # %entry
; KNL-32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607]
; KNL-32-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-32-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; KNL-32-NEXT: movb $15, %al
; KNL-32-NEXT: kmovw %eax, %k1
; X32: # BB#0: # %BB
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
; X32-NEXT: movb $1, %al
; X32-NEXT: .p2align 4, 0x90
; X64: # BB#0: # %BB
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
; X64-NEXT: movb $1, %al
; X64-NEXT: .p2align 4, 0x90
; AVX1-LABEL: signum32b:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX2-LABEL: signum32b:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512F-LABEL: signum32b:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovaps (%rdi), %ymm0
-; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; AVX512F-NEXT: vpmovqd %zmm2, %ymm2
; AVX1-LABEL: signum64b:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vmovapd (%rdi), %ymm0
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
; AVX2-LABEL: signum64b:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovapd (%rdi), %ymm0
-; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
; AVX512F-LABEL: signum64b:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovapd (%rdi), %ymm0
-; AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vpmovqd %zmm2, %ymm2
; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2
; AVX-LABEL: signum32c:
; AVX: # BB#0: # %entry
; AVX-NEXT: vmovaps (%rdi), %ymm0
-; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
; AVX-NEXT: vcvtdq2ps %ymm2, %ymm2
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-LABEL: signum64c:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vmovapd (%rdi), %ymm0
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-LABEL: signum64c:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovapd (%rdi), %ymm0
-; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpsubd %ymm0, %ymm2, %ymm0
; AVX512F-LABEL: signum64c:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovapd (%rdi), %ymm0
-; AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpsubd %ymm0, %ymm2, %ymm0
;
; AVX2-LABEL: d:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%b = trunc <8 x i32> %a to <8 x i16>
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X64-LABEL: legal_vzmovl_2i32_8i32:
; X64: # BB#0:
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X64-NEXT: vmovaps %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovupd (%ecx), %xmm0
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X32-NEXT: vmovapd %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X64-LABEL: legal_vzmovl_2i64_4i64:
; X64: # BB#0:
; X64-NEXT: vmovupd (%rdi), %xmm0
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X64-NEXT: vmovapd %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-LABEL: legal_vzmovl_2f32_8f32:
; X64: # BB#0:
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X64-NEXT: vmovaps %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovupd (%ecx), %xmm0
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X32-NEXT: vmovapd %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X64-LABEL: legal_vzmovl_2f64_4f64:
; X64: # BB#0:
; X64-NEXT: vmovupd (%rdi), %xmm0
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X64-NEXT: vmovapd %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; AVX2-NEXT: vcvtdq2ps %ymm1, %ymm1
; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR_v8]](%rip), %ymm2
; AVX2-NEXT: vmulps %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpxor %ymm6, %ymm6, %ymm6
+; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpxor %ymm6, %ymm6, %ymm6
+; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
; X32-AVX-NEXT: movl 40(%ebp), %ecx
; X32-AVX-NEXT: vbroadcastsd 32(%ebp), %ymm0
-; X32-AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
; X64-AVX-NEXT: subq $256, %rsp # imm = 0x100
; X64-AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm3[3,1,2,3]
-; X64-AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X64-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%rsp)
; X64-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%rsp)
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4
; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1
-; X32-AVX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4
; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1
-; X32-AVX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4
; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1
-; X32-AVX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4
; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1
-; X32-AVX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4
; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1
-; X32-AVX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4
; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1
-; X32-AVX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5
; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
; X32-AVX-NEXT: vpand %ymm1, %ymm2, %ymm1
; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
; X32-AVX-NEXT: vpand %ymm1, %ymm2, %ymm1
; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm1, %ymm1
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm1, %ymm1
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm5, %ymm5
; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512DQ-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
; AVX512DQ-NEXT: vpsadbw %ymm3, %ymm5, %ymm5
; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm5
; AVX512DQ-NEXT: vpand %ymm2, %ymm5, %ymm5
-; AVX512DQ-NEXT: vpxor %ymm6, %ymm6, %ymm6
+; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm5, %ymm7
; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3
; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm5
; AVX512DQ-NEXT: vpand %ymm2, %ymm5, %ymm5
-; AVX512DQ-NEXT: vpxor %ymm6, %ymm6, %ymm6
+; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512DQ-NEXT: vpcmpeqb %ymm6, %ymm5, %ymm7
; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3
; AVX512DQ-NEXT: vpshufb %ymm5, %ymm4, %ymm5
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %ymm5, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm6
; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %ymm5, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm6
; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512DQ-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5
; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5
; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15]
; AVX2-NEXT: vpsllvd %ymm4, %ymm5, %ymm4
; AVX2-LABEL: constant_rotate_v16i16:
; AVX2: # BB#0:
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm1
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
;
; AVX2-LABEL: var_shift_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; AVX2-NEXT: vpsravd %ymm3, %ymm4, %ymm3
;
; X32-AVX2-LABEL: var_shift_v16i16:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-AVX2-NEXT: vpsravd %ymm3, %ymm4, %ymm3
;
; AVX2-LABEL: constant_shift_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
;
; X32-AVX2-LABEL: constant_shift_v16i16:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
; AVX512DQ-LABEL: ashr_const7_v64i8:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
; AVX512DQ-NEXT: retq
;
; AVX2-LABEL: var_shift_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
;
; X32-AVX2-LABEL: var_shift_v16i16:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
;
; AVX2-LABEL: constant_shift_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
;
; X32-AVX2-LABEL: constant_shift_v16i16:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
;
; AVX2-LABEL: var_shift_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
;
; X32-AVX2-LABEL: var_shift_v16i16:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
; AVX2-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2OR512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <32 x i8> %shuffle
}
;
; AVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2OR512VL-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; AVX2OR512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; AVX2OR512VL-NEXT: retq
+; AVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <32 x i8> %shuffle
}
; AVX1-LABEL: shuffle_v4f64_0z3z:
; AVX1: # BB#0:
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4f64_0z3z:
; AVX2: # BB#0:
; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
-; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
; AVX2-NEXT: retq
;
define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_1z2z:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
;
; AVX2-LABEL: shuffle_v4f64_1z2z:
; AVX2: # BB#0:
-; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
; AVX2-NEXT: retq
define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
; AVX1-LABEL: shuffle_v4i64_z4z6:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX1-NEXT: retq
;
define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
; AVX1-LABEL: shuffle_v4i64_5zuz:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX1-NEXT: retq
;
define <4 x i64> @shuffle_v4i64_1z3z(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_1z3z:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; AVX1-LABEL: insert_reg_and_zero_v4f64:
; AVX1: # BB#0:
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_reg_and_zero_v4f64:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX2-NEXT: retq
;
; AVX1-LABEL: shuffle_v4i64_z0z3:
; AVX1: # BB#0:
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_z0z3:
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
;
define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_1z2z:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
;
; AVX2-LABEL: shuffle_v4i64_1z2z:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0]
; AVX2-NEXT: retq
define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
; AVX1-NEXT: retq
;
define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
; AVX1-NEXT: retq
define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; AVX1-NEXT: retq
define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
; AVX1: # BB#0:
-; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq
; KNL-NEXT: movl $65535, %eax ## imm = 0xFFFF
; KNL-NEXT: vmovd %eax, %xmm1
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: retq
;
; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
; AVX512F-NEXT: movl $255, %eax
; AVX512F-NEXT: vmovd %eax, %xmm1
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
; AVX512DQ-NEXT: movl $255, %eax
; AVX512DQ-NEXT: vmovd %eax, %xmm1
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: retq
;
; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
; KNL64-LABEL: expand:
; KNL64: # BB#0:
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; KNL64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7]
; KNL64-NEXT: retq
;
; KNL32-LABEL: expand:
; KNL32: # BB#0:
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; KNL32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7]
; KNL32-NEXT: retl
%res = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 5, i32 1, i32 5, i32 5, i32 5, i32 5, i32 5>
; KNL64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3>
; KNL64-NEXT: vpermps %ymm0, %ymm1, %ymm0
-; KNL64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; KNL64-NEXT: retq
;
; KNL32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3>
; KNL32-NEXT: vpermps %ymm0, %ymm1, %ymm0
-; KNL32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; KNL32-NEXT: retl
%res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; KNL64: # BB#0:
; KNL64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
-; KNL64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
; KNL64-NEXT: retq
;
; KNL32: # BB#0:
; KNL32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
-; KNL32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
; KNL32-NEXT: retl
%res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 2, i32 1>
; KNL64-LABEL: expand3:
; KNL64: # BB#0:
; KNL64-NEXT: vpbroadcastq %xmm0, %ymm0
-; KNL64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7]
; KNL64-NEXT: retq
;
; KNL32-LABEL: expand3:
; KNL32: # BB#0:
; KNL32-NEXT: vpbroadcastq %xmm0, %ymm0
-; KNL32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7]
; KNL32-NEXT: retl
%res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <8 x i32> <i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,i32 5>
; KNL64: # BB#0:
; KNL64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
-; KNL64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; KNL64-NEXT: retq
;
; KNL32: # BB#0:
; KNL32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
-; KNL32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; KNL32-NEXT: retl
%res = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <4 x i32> <i32 2, i32 0, i32 0, i32 3>
; KNL64-LABEL: expand5:
; KNL64: # BB#0:
; KNL64-NEXT: vbroadcastss %xmm0, %ymm0
-; KNL64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; KNL64-NEXT: retq
;
; KNL32-LABEL: expand5:
; KNL32: # BB#0:
; KNL32-NEXT: vbroadcastss %xmm0, %ymm0
-; KNL32-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; KNL32-NEXT: retl
%res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
;
; KNL64-LABEL: expand13:
; KNL64: # BB#0:
-; KNL64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; KNL64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; KNL64-NEXT: retq
;
;
; KNL32-LABEL: expand13:
; KNL32: # BB#0:
-; KNL32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; KNL32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; KNL32-NEXT: retl
%res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64-AVX512
;
; Combine tests involving AVX target shuffles
; X32: # BB#0:
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_identity:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_identity:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_identity:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_identity:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_identity:
; X64: # BB#0:
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_identity:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_identity:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
ret <4 x float> %2
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_movddup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_movddup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_movddup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_movddup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_movddup:
; X64: # BB#0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_movddup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_movddup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 1, i32 0, i32 1>)
ret <4 x float> %1
}
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_movddup_load:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_movddup_load:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_movddup_load:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_movddup_load:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_movddup_load:
; X64: # BB#0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_movddup_load:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32AVX512F-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_movddup_load:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64AVX512F-NEXT: retq
%1 = load <4 x float>, <4 x float> *%a0
%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 0, i32 1, i32 0, i32 1>)
ret <4 x float> %2
; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_movshdup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_movshdup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_movshdup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_movshdup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_movshdup:
; X64: # BB#0:
; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_movshdup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_movshdup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 undef, i32 1, i32 3, i32 3>)
ret <4 x float> %1
}
; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_movsldup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_movsldup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_movsldup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_movsldup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_movsldup:
; X64: # BB#0:
; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_movsldup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_movsldup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)
ret <4 x float> %1
}
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_unpckh:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_unpckh:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_unpckh:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_unpckh:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_unpckh:
; X64: # BB#0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_unpckh:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_unpckh:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)
ret <4 x float> %1
}
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_unpckl:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_unpckl:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_unpckl:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_unpckl:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_unpckl:
; X64: # BB#0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_unpckl:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_unpckl:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)
ret <4 x float> %1
}
; X32: # BB#0:
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_identity:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_identity:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_identity:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_identity:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_identity:
; X64: # BB#0:
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_identity:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_identity:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 undef>)
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1>)
ret <8 x float> %2
; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_10326u4u:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_10326u4u:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_10326u4u:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_10326u4u:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_10326u4u:
; X64: # BB#0:
; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_10326u4u:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_10326u4u:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 0, i32 1, i32 2, i32 undef>)
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 undef>)
ret <8 x float> %2
; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_vperm2f128_8f32:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_vperm2f128_8f32:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_vperm2f128_8f32:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_vperm2f128_8f32:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X64: # BB#0:
; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_vperm2f128_8f32:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_vperm2f128_8f32:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
%2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
; X64: # BB#0:
; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
%2 = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) {
; X32-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; X32: # BB#0:
-; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; X64: # BB#0:
-; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X32AVX512F-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X64AVX512F-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
%2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%3 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %2, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_movddup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_movddup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_movddup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_movddup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_movddup:
; X64: # BB#0:
; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_movddup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_movddup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>)
ret <8 x float> %1
}
; X32-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_movddup_load:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_movddup_load:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_movddup_load:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_movddup_load:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_movddup_load:
; X64: # BB#0:
; X64-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_movddup_load:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32AVX512F-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_movddup_load:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; X64AVX512F-NEXT: retq
%1 = load <8 x float>, <8 x float> *%a0
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>)
ret <8 x float> %2
; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_movshdup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_movshdup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_movshdup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_movshdup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_movshdup:
; X64: # BB#0:
; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_movshdup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_movshdup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 5, i32 7, i32 7>)
ret <8 x float> %1
}
; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_movsldup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_movsldup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_movsldup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_movsldup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_movsldup:
; X64: # BB#0:
; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_movsldup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_movsldup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>)
ret <8 x float> %1
}
; X32: # BB#0:
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_2f64_identity:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_2f64_identity:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_2f64_identity:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_2f64_identity:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_2f64_identity:
; X64: # BB#0:
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_2f64_identity:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_2f64_identity:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: retq
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>)
%2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> <i64 2, i64 0>)
ret <2 x double> %2
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_2f64_movddup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_2f64_movddup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_2f64_movddup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_2f64_movddup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_2f64_movddup:
; X64: # BB#0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_2f64_movddup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_2f64_movddup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64AVX512F-NEXT: retq
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
ret <2 x double> %1
}
; X32: # BB#0:
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f64_identity:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f64_identity:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f64_identity:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f64_identity:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f64_identity:
; X64: # BB#0:
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f64_identity:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f64_identity:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
%2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
ret <4 x double> %2
; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f64_movddup:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f64_movddup:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f64_movddup:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f64_movddup:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f64_movddup:
; X64: # BB#0:
; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f64_movddup:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f64_movddup:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)
ret <4 x double> %1
}
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_4stage:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_4stage:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_4stage:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_4stage:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_4stage:
; X64: # BB#0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_4stage:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_4stage:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; X64AVX512F-NEXT: retq
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)
%3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)
; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_8f32_4stage:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_8f32_4stage:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_8f32_4stage:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_8f32_4stage:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_8f32_4stage:
; X64: # BB#0:
; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_8f32_4stage:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_8f32_4stage:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; X64AVX512F-NEXT: retq
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)
%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
; X32-NEXT: retl
;
+; X32-AVX-LABEL: combine_vpermilvar_4f32_as_insertps:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: combine_vpermilvar_4f32_as_insertps:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: combine_vpermilvar_4f32_as_insertps:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: combine_vpermilvar_4f32_as_insertps:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
+; X64-AVX512-NEXT: retq
; X64-LABEL: combine_vpermilvar_4f32_as_insertps:
; X64: # BB#0:
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
; X64-NEXT: retq
+; X32AVX512F-LABEL: combine_vpermilvar_4f32_as_insertps:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: combine_vpermilvar_4f32_as_insertps:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
+; X64AVX512F-NEXT: retq
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
%2 = shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 1, i32 4>
ret <4 x float> %2
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: constant_fold_vpermilvar_pd:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: constant_fold_vpermilvar_pd:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: constant_fold_vpermilvar_pd:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: constant_fold_vpermilvar_pd:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
+; X64-AVX512-NEXT: retq
; X64-LABEL: constant_fold_vpermilvar_pd:
; X64: # BB#0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
; X64-NEXT: retq
+; X32AVX512F-LABEL: constant_fold_vpermilvar_pd:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: constant_fold_vpermilvar_pd:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
+; X64AVX512F-NEXT: retq
%1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> <double 1.0, double 2.0>, <2 x i64> <i64 2, i64 0>)
ret <2 x double> %1
}
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: constant_fold_vpermilvar_pd_256:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: constant_fold_vpermilvar_pd_256:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: constant_fold_vpermilvar_pd_256:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: constant_fold_vpermilvar_pd_256:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
+; X64-AVX512-NEXT: retq
; X64-LABEL: constant_fold_vpermilvar_pd_256:
; X64: # BB#0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
; X64-NEXT: retq
+; X32AVX512F-LABEL: constant_fold_vpermilvar_pd_256:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: constant_fold_vpermilvar_pd_256:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
+; X64AVX512F-NEXT: retq
%1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
ret <4 x double> %1
}
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: constant_fold_vpermilvar_ps:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: constant_fold_vpermilvar_ps:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: constant_fold_vpermilvar_ps:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: constant_fold_vpermilvar_ps:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
+; X64-AVX512-NEXT: retq
; X64-LABEL: constant_fold_vpermilvar_ps:
; X64: # BB#0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
; X64-NEXT: retq
+; X32AVX512F-LABEL: constant_fold_vpermilvar_ps:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: constant_fold_vpermilvar_ps:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
+; X64AVX512F-NEXT: retq
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x i32> <i32 3, i32 0, i32 2, i32 1>)
ret <4 x float> %1
}
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
; X32-NEXT: retl
;
+; X32-AVX-LABEL: constant_fold_vpermilvar_ps_256:
+; X32-AVX: # BB#0:
+; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
+; X32-AVX-NEXT: retl
+;
+; X32-AVX512-LABEL: constant_fold_vpermilvar_ps_256:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
+; X32-AVX512-NEXT: retl
+;
+; X64-AVX-LABEL: constant_fold_vpermilvar_ps_256:
+; X64-AVX: # BB#0:
+; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
+; X64-AVX-NEXT: retq
+;
+; X64-AVX512-LABEL: constant_fold_vpermilvar_ps_256:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
+; X64-AVX512-NEXT: retq
; X64-LABEL: constant_fold_vpermilvar_ps_256:
; X64: # BB#0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
; X64-NEXT: retq
+; X32AVX512F-LABEL: constant_fold_vpermilvar_ps_256:
+; X32AVX512F: # BB#0:
+; X32AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
+; X32AVX512F-NEXT: retl
+; X64AVX512F-LABEL: constant_fold_vpermilvar_ps_256:
+; X64AVX512F: # BB#0:
+; X64AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
+; X64AVX512F-NEXT: retq
%1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 1>)
ret <8 x float> %1
}
define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_pslldq:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_pslldq:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
%2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_psrldq:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_psrldq:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
%2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
define <32 x i8> @combine_and_pshufb(<32 x i8> %a0) {
; X32-LABEL: combine_and_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
; X32-NEXT: retl
;
; X64-LABEL: combine_and_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
; X64-NEXT: retq
%1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
define <32 x i8> @combine_pshufb_and(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_and:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_and:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
; X64-NEXT: retq
%1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 8, i8 9, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 8, i8 9, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) {
; X32-LABEL: combine_permq_pshufb_as_vpblendd:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: combine_permq_pshufb_as_vpblendd:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X64-NEXT: retq
%1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
; X32-AVX2-LABEL: combine_pshufb_as_vzmovl_64:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X32-AVX2-NEXT: retl
;
;
; X64-AVX2-LABEL: combine_pshufb_as_vzmovl_64:
; X64-AVX2: # BB#0:
-; X64-AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; X64-AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X64-AVX2-NEXT: retq
;
define <8 x float> @combine_pshufb_as_vzmovl_32(<8 x float> %a0) {
; X32-AVX2-LABEL: combine_pshufb_as_vzmovl_32:
; X32-AVX2: # BB#0:
-; X32-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X32-AVX2-NEXT: retl
;
;
; X64-AVX2-LABEL: combine_pshufb_as_vzmovl_32:
; X64-AVX2: # BB#0:
-; X64-AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X64-AVX2-NEXT: retq
;
define <32 x i8> @combine_pshufb_as_unpacklo_zero(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_as_unpacklo_zero:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_as_unpacklo_zero:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; X64-NEXT: retq
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 -1, i8 -1, i8 2, i8 3, i8 -1, i8 -1, i8 4, i8 5, i8 -1, i8 -1, i8 6, i8 7, i8 -1, i8 -1, i8 16, i8 17, i8 -1, i8 -1, i8 18, i8 19, i8 -1, i8 -1, i8 20, i8 21, i8 -1, i8 -1, i8 22, i8 23, i8 -1, i8 -1>)
define <32 x i8> @combine_pshufb_as_unpackhi_zero(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_as_unpackhi_zero:
; X32: # BB#0:
-; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_as_unpackhi_zero:
; X64: # BB#0:
-; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; X64-NEXT: retq
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 -1, i8 8, i8 -1, i8 9, i8 -1, i8 10, i8 -1, i8 11, i8 -1, i8 12, i8 -1, i8 13, i8 -1, i8 14, i8 -1, i8 15, i8 -1, i8 24, i8 -1, i8 25, i8 -1, i8 26, i8 -1, i8 27, i8 -1, i8 28, i8 -1, i8 29, i8 -1, i8 30, i8 -1, i8 31>)
define <8 x float> @combine_vpermil2ps256_zero(<8 x float> %a0, <8 x float> %a1) {
; X32-LABEL: combine_vpermil2ps256_zero:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps256_zero:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>, i8 2)
ret <8 x float> %res0
;
; AVX2-LABEL: testv4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX512CD-LABEL: testv4i64:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubq %ymm0, %ymm1, %ymm2
; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX512VPOPCNTDQ-LABEL: testv4i64:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubq %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv4i64:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubq %ymm0, %ymm1, %ymm2
; X32-AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
; X32-AVX-NEXT: vpsubq {{\.LCPI.*}}, %ymm0, %ymm0
;
; AVX2-LABEL: testv4i64u:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX512CD-LABEL: testv4i64u:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubq %ymm0, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
;
; AVX512VPOPCNTDQ-LABEL: testv4i64u:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubq %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv4i64u:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubq %ymm0, %ymm1, %ymm2
; X32-AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
; X32-AVX-NEXT: vpsubq {{\.LCPI.*}}, %ymm0, %ymm0
;
; AVX2-LABEL: testv8i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX512CD-LABEL: testv8i32:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubd %ymm0, %ymm1, %ymm2
; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX512VPOPCNTDQ-LABEL: testv8i32:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubd %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv8i32:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubd %ymm0, %ymm1, %ymm2
; X32-AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
; X32-AVX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX2-LABEL: testv8i32u:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX512CD-LABEL: testv8i32u:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubd %ymm0, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
;
; AVX512VPOPCNTDQ-LABEL: testv8i32u:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubd %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv8i32u:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubd %ymm0, %ymm1, %ymm2
; X32-AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
; X32-AVX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
;
; AVX2-LABEL: testv16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512CD-LABEL: testv16i16:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512VPOPCNTDQ-LABEL: testv16i16:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv16i16:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX2-LABEL: testv16i16u:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512CD-LABEL: testv16i16u:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512VPOPCNTDQ-LABEL: testv16i16u:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv16i16u:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX2-LABEL: testv32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512CD-LABEL: testv32i8:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512VPOPCNTDQ-LABEL: testv32i8:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv32i8:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX2-LABEL: testv32i8u:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512CD-LABEL: testv32i8u:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; AVX512VPOPCNTDQ-LABEL: testv32i8u:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
;
; X32-AVX-LABEL: testv32i8u:
; X32-AVX: # BB#0:
-; X32-AVX-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
; X32-AVX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512CD-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512CD-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512CD-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512CD-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512CD-NEXT: vpsadbw %ymm3, %ymm1, %ymm1
; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm5
; AVX512CD-NEXT: vpshufb %ymm5, %ymm4, %ymm5
; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512CD-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512CD-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512CD-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512CD-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512CD-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
; AVX512CD-NEXT: vpsadbw %ymm3, %ymm5, %ymm5
; AVX512CD-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
; AVX512CD-LABEL: testv32i16:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512CD-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512CD-NEXT: vpsubw %ymm0, %ymm2, %ymm3
; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
;
; AVX512VPOPCNTDQ-LABEL: testv32i16:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VPOPCNTDQ-NEXT: vpsubw %ymm0, %ymm2, %ymm3
; AVX512VPOPCNTDQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
; AVX512CD-LABEL: testv32i16u:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512CD-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512CD-NEXT: vpsubw %ymm0, %ymm2, %ymm3
; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
;
; AVX512VPOPCNTDQ-LABEL: testv32i16u:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VPOPCNTDQ-NEXT: vpsubw %ymm0, %ymm2, %ymm3
; AVX512VPOPCNTDQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
; AVX512CD-LABEL: testv64i8:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512CD-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512CD-NEXT: vpsubb %ymm0, %ymm2, %ymm3
; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
;
; AVX512VPOPCNTDQ-LABEL: testv64i8:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VPOPCNTDQ-NEXT: vpsubb %ymm0, %ymm2, %ymm3
; AVX512VPOPCNTDQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
; AVX512CD-LABEL: testv64i8u:
; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512CD-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512CD-NEXT: vpsubb %ymm0, %ymm2, %ymm3
; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512CD-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
;
; AVX512VPOPCNTDQ-LABEL: testv64i8u:
; AVX512VPOPCNTDQ: # BB#0:
-; AVX512VPOPCNTDQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VPOPCNTDQ-NEXT: vpsubb %ymm0, %ymm2, %ymm3
; AVX512VPOPCNTDQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
;
; AVX2-LABEL: test_abs_gt_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
;
; AVX2-LABEL: test_abs_le_v8i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %ymm0
; AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: signbit_sel_v32i8:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpxor %ymm3, %ymm3, %ymm3
-; AVX512-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
-; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: signbit_sel_v32i8:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: signbit_sel_v32i8:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512VL-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
%tr = icmp slt <32 x i8> %mask, zeroinitializer
%z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
ret <32 x i8> %z
;
; AVX2-LABEL: signbit_sel_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: signbit_sel_v16i16:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpxor %ymm3, %ymm3, %ymm3
-; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
-; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: signbit_sel_v16i16:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: signbit_sel_v16i16:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
%tr = icmp slt <16 x i16> %mask, zeroinitializer
%z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
ret <16 x i16> %z
; AVX512F-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
; AVX512F-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; X32-LABEL: test_mm256_cmov_si256:
; X32: # BB#0:
-; X32-NEXT: vxorps %ymm3, %ymm3, %ymm3
+; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3
; X32-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3
; X32-NEXT: vxorps %ymm3, %ymm2, %ymm3
; X32-NEXT: vandps %ymm2, %ymm0, %ymm0
;
; X64-LABEL: test_mm256_cmov_si256:
; X64: # BB#0:
-; X64-NEXT: vxorps %ymm3, %ymm3, %ymm3
+; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3
; X64-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3
; X64-NEXT: vxorps %ymm3, %ymm2, %ymm3
; X64-NEXT: vandps %ymm2, %ymm0, %ymm0