AVX512F only has integer domain logic instructions. AVX512DQ added FP domain logic instructions.
Execution domain fixing runs before EVEX->VEX. So if we have AVX512F and not AVX512DQ we fail to do execution domain switching of the logic operations. This leads to mismatches in execution domain and more test differences.
This patch adds custom domain fixing that switches EVEX integer logic operations to VEX fp logic operations if XMM16-31 are not used.
llvm-svn: 337137
{ X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
};
+// Special table for changing EVEX logic instructions to VEX.
+// TODO: Should we run EVEX->VEX earlier?
+static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = {
+ // Two integer columns for 64-bit and 32-bit elements.
+ //PackedSingle PackedDouble PackedInt PackedInt
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
+ { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
+ { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
+ { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
+ { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
+ { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm },
+ { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr },
+ { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
+};
+
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
case X86::VPBLENDWYrmi:
case X86::VPBLENDWYrri:
return GetBlendDomains(8, false);
+ case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
+ case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
+ case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
+ case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
+ case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
+ case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
+ case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
+ case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
+ case X86::VPORDZ128rr: case X86::VPORDZ128rm:
+ case X86::VPORDZ256rr: case X86::VPORDZ256rm:
+ case X86::VPORQZ128rr: case X86::VPORQZ128rm:
+ case X86::VPORQZ256rr: case X86::VPORQZ256rm:
+ case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
+ case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
+ case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
+ case X86::VPXORQZ256rr: case X86::VPXORQZ256rm:
+ // If we don't have DQI see if we can still switch from an EVEX integer
+ // instruction to a VEX floating point instruction.
+ if (Subtarget.hasDQI())
+ return 0;
+
+ if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16)
+ return 0;
+ if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16)
+ return 0;
+ // Register forms will have 3 operands. Memory form will have more.
+ if (NumOperands == 3 &&
+ RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16)
+ return 0;
+
+ // All domains are valid.
+ return 0xe;
}
return 0;
}
case X86::VPBLENDWYrmi:
case X86::VPBLENDWYrri:
return SetBlendDomain(16, true);
+ case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
+ case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
+ case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
+ case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
+ case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
+ case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
+ case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
+ case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
+ case X86::VPORDZ128rr: case X86::VPORDZ128rm:
+ case X86::VPORDZ256rr: case X86::VPORDZ256rm:
+ case X86::VPORQZ128rr: case X86::VPORQZ128rm:
+ case X86::VPORQZ256rr: case X86::VPORQZ256rm:
+ case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
+ case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
+ case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
+ case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: {
+ // Without DQI, convert EVEX instructions to VEX instructions.
+ if (Subtarget.hasDQI())
+ return false;
+
+ const uint16_t *table = lookupAVX512(MI.getOpcode(), dom,
+ ReplaceableCustomAVX512LogicInstrs);
+ assert(table && "Instruction not found in table?");
+ // Don't change integer Q instructions to D instructions and
+ // use D intructions if we started with a PS instruction.
+ if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
+ Domain = 4;
+ MI.setDesc(get(table[Domain - 1]));
+ return true;
+ }
}
return false;
}
}
define <16 x double> @sbto16f64(<16 x double> %a) {
-; NOVLDQ-LABEL: sbto16f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
-; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
-; NOVLDQ-NEXT: kunpckbw %k0, %k1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0
-; NOVLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
-; NOVLDQ-NEXT: retq
+; NODQ-LABEL: sbto16f64:
+; NODQ: # %bb.0:
+; NODQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
+; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
+; NODQ-NEXT: kunpckbw %k0, %k1, %k1
+; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
+; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
+; NODQ-NEXT: retq
;
; VLDQ-LABEL: sbto16f64:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; VLDQ-NEXT: retq
;
-; VLNODQ-LABEL: sbto16f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
-; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
-; VLNODQ-NEXT: kunpckbw %k0, %k1, %k1
-; VLNODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
-; VLNODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
-; VLNODQ-NEXT: retq
-;
; DQNOVL-LABEL: sbto16f64:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
;
; VLNODQ-LABEL: sbto8f64:
; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
}
define <8 x float> @sbto8f32(<8 x float> %a) {
-; NOVL-LABEL: sbto8f32:
-; NOVL: # %bb.0:
-; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; NOVL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0
-; NOVL-NEXT: retq
-;
-; VLDQ-LABEL: sbto8f32:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: sbto8f32:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
-; VLNODQ-NEXT: retq
+; ALL-LABEL: sbto8f32:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; ALL-NEXT: vcvtdq2ps %ymm0, %ymm0
+; ALL-NEXT: retq
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
%1 = sitofp <8 x i1> %cmpres to <8 x float>
ret <8 x float> %1
}
define <4 x float> @sbto4f32(<4 x float> %a) {
-; NOVL-LABEL: sbto4f32:
-; NOVL: # %bb.0:
-; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVL-NEXT: retq
-;
-; VLDQ-LABEL: sbto4f32:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: sbto4f32:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLNODQ-NEXT: retq
+; ALL-LABEL: sbto4f32:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0
+; ALL-NEXT: retq
%cmpres = fcmp ogt <4 x float> %a, zeroinitializer
%1 = sitofp <4 x i1> %cmpres to <4 x float>
ret <4 x float> %1
;
; VLNODQ-LABEL: sbto4f64:
; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
}
define <2 x float> @sbto2f32(<2 x float> %a) {
-; NOVL-LABEL: sbto2f32:
-; NOVL: # %bb.0:
-; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVL-NEXT: retq
-;
-; VLDQ-LABEL: sbto2f32:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: sbto2f32:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLNODQ-NEXT: retq
+; ALL-LABEL: sbto2f32:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0
+; ALL-NEXT: retq
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x float>
ret <2 x float> %1
}
define <2 x double> @sbto2f64(<2 x double> %a) {
-; NOVL-LABEL: sbto2f64:
-; NOVL: # %bb.0:
-; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
-; NOVL-NEXT: retq
-;
-; VLDQ-LABEL: sbto2f64:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: sbto2f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; VLNODQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
-; VLNODQ-NEXT: retq
+; ALL-LABEL: sbto2f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; ALL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; ALL-NEXT: retq
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x double>
ret <2 x double> %1
define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> %default, <4 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_4_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> %default, <8 x double> %mask) {
; CHECK-LABEL: test_masked_double_to_8_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %default, <4 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_4_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %default, <8 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_8_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %default, <16 x float> %mask) {
; CHECK-LABEL: test_masked_float_to_16_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_masked_z_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[2,1,3,1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,3,1]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,3,2,1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,2,1]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,3,1,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,3,1,3]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[1,3,2,0]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[1,3,2,0]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[2,1,3,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2]
; CHECK-NEXT: retq
define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[0,1,3,0]
; CHECK-NEXT: retq
define <4 x float> @test_masked_z_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,0]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,6,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,6,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,7,6,7,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,7,6,7,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,2,1,6,5,4,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,2,1,6,5,4,4]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,2,1,0,6,6,5,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,1,0,6,6,5,4]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,3,7,7,6,5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,3,7,7,6,5]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3,6,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3,6,5,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_mask6:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,5,6,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask6:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,5,6,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,0,2,1,7,4,6,5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_masked_z_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,2,1,7,4,6,5]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[3,0,0,2,4,6,7,6]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[3,0,0,2,4,6,7,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,2,2,6,4,6,6]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,2,2,6,4,6,6]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[2,1,1,3,4,4,7,4]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[2,1,1,3,4,4,7,4]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,0,3,3,4,4,7,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,0,3,3,4,4,7,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,0,1,4,6,5,4]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,0,1,4,6,5,4]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,0,3,6,4,4,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,0,3,6,4,4,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,2,3,7,4,6,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,2,3,7,4,6,7]
; CHECK-NEXT: retq
define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,2,3,1,4,6,7,5]
; CHECK-NEXT: retq
define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,1,4,6,7,5]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask6:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask6:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: retq
define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
; CHECK-NEXT: retq
define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_masked_z_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_z_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0]
; CHECK-NEXT: retq
define <2 x double> @test_masked_z_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,0,2,3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,2,3]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[0,1,3,3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,3,3]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,3,3]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,3,3]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,3]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,3]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,2]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,3,2,4,5,7,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,3,2,4,5,7,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,7,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_mask1(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,7,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,3,5,5,6,7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,3,5,5,6,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,2,4,4,6,7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_mask3(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,5,4,7,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,5,4,7,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,3,3,4,5,7,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,3,3,4,5,7,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,4,7,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,4,7,6]
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,0,3,2,4,5,6,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,2,4,5,6,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[0,3],xmm3[0,1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3],xmm2[0,1]
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[0,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],xmm3[0,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm2[0,2]
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,0],xmm0[0,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,2],xmm3[0,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[0,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,2],xmm2[0,2]
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3]
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[3,3,1,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,1,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps (%rdi), %ymm2
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm3[2,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm2[2,0],xmm3[0,1]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm0, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,0],xmm2[0,1]
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm2
-; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vmovaps (%rdi), %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3]
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2]
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vmovaps (%rdi), %ymm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps (%rdi), %ymm2
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,0],xmm2[3,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm2[3,1],xmm3[2,0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[3,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm0, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm1[3,1],xmm2[2,0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps (%rdi), %ymm2
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm2[1,3],xmm3[0,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[3,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm0, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3],xmm2[0,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,12,10,8,2,11,7]
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,12,10,8,2,11,7]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [2,4,11,4,12,7,9,6]
; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,4,11,4,12,7,9,6]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,10,11,6,1,4,4]
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm2[0,0]
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,10,11,6,1,4,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [4,6,1,8,4,12,13,0]
; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,6,1,8,4,12,13,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = <12,0,1,2,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %xmm0, %xmm2, %k1
; CHECK-NEXT: vblendmps %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <12,0,1,2,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm0, %ymm2, %ymm3
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqps %xmm0, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3]
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm3[0,0],ymm0[0,1],ymm3[4,4],ymm0[4,5]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm2[0,0],ymm0[0,1],ymm2[4,4],ymm0[4,5]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7]
-; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm3
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7]
+; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
-; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm2
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [7,6,7,11,5,10,0,4]
; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovaps (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,7,11,5,10,0,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [11,0,9,0,7,14,0,8]
; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovaps (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [11,0,9,0,7,14,0,8]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [8,5,2,3,2,9,10,1]
; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[1,0,0,3]
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [8,5,2,3,2,9,10,1]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [7,5,3,3,11,4,12,9]
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovaps (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [7,5,3,3,11,4,12,9]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
-; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,2,3,3]
-; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm2
-; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vmovaps (%rdi), %zmm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[0,2,3,3]
+; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2
+; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[3,1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,3,3]
-; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3]
+; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
+; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,10,6,15,4,14,6,15]
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,10,6,15,4,14,6,15]
; CHECK-NEXT: vpermi2ps %ymm1, %ymm2, %ymm3
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [4,14,4,14,4,14,6,7]
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,14,4,14,4,14,6,7]
; CHECK-NEXT: vpermi2ps %ymm1, %ymm2, %ymm3
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = <3,3,15,9,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <3,3,15,9,u,u,u,u>
; CHECK-NEXT: vpermi2ps %ymm2, %ymm1, %ymm3
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm3[0],xmm0[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],xmm3[1]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm2[1]
; CHECK-NEXT: vzeroupper
define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mem_mask0(<4 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm2
-; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vmovapd (%rdi), %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vblendpd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1
; CHECK-NEXT: vmovapd %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask0(<4 x double>* %vp, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vmovapd (%rdi), %ymm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vblendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm0, %k1
; CHECK-NEXT: vmovapd %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd (%rdi), %ymm2
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} = xmm3[0],xmm2[0]
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd (%rdi), %ymm1
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm0, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,7,3,7]
; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [3,7,3,7]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [2,0,7,6]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,0,7,6]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask2(<8 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,3,2,0]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,2,0]
; CHECK-NEXT: retq
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm3[1]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm3[1]
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,0,1,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm2[1]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,1,1]
; CHECK-NEXT: retq
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [2,6,2,2]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,6,2,2]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,4,3,4]
; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [1,4,3,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,5,0,6]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [3,5,0,6]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,6,2,6]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %xmm0, %xmm2, %k1
; CHECK-NEXT: vblendmpd %xmm4, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [0,6,2,6]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3
-; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %xmm0, %xmm1, %k1
; CHECK-NEXT: vmovapd %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_to_2xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask1(<8 x double> %vec, <2 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_2xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,6,7,2]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [1,6,7,2]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,4,2,4]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,2,3,4]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [1,2,3,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [4,2,1,0]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [4,2,1,0]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [2,4,1,5]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [2,4,1,5]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [6,1,1,1]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [6,1,1,1]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,6,1]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,6,1]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,5,2,5]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,5,2,5]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,6,3,6]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vmovapd %xmm4, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [1,6,3,6]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm1, %ymm3
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovapd %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd (%rdi), %zmm2
; CHECK-NEXT: vextractf32x4 $2, %zmm2, %xmm3
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} = xmm2[1],xmm3[0]
; CHECK-NEXT: vzeroupper
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd (%rdi), %zmm1
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm2
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm0, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm1[1],xmm2[0]
; CHECK-NEXT: vzeroupper
; CHECK-LABEL: test_masked_8xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1]
; CHECK-NEXT: retq
define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2]
; CHECK-NEXT: retq
define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2]
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7]
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4]
; CHECK-NEXT: retq
define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[2,1],xmm1[3,1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],xmm1[3,1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,2],xmm1[3,2]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2],xmm1[3,2]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,3],xmm1[2,1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm1[2,1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[3,3],xmm1[3,3]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],xmm1[3,3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,0],mem[1,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0],mem[1,2]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,3],mem[1,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],mem[1,3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],mem[2,0]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],mem[2,0]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[2,1],mem[3,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],mem[3,2]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_2xdouble_zero_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[3],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[3],mem[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: retq
define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: retq
define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
; CHECK-NEXT: retq
define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: retq
define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: retq
define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: retq
}
define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
-; X64-AVX512VL-LABEL: PR29088:
-; X64-AVX512VL: ## %bb.0:
-; X64-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi)
-; X64-AVX512VL-NEXT: retq
-;
-; X64-AVX512BWVL-LABEL: PR29088:
-; X64-AVX512BWVL: ## %bb.0:
-; X64-AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512BWVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi)
-; X64-AVX512BWVL-NEXT: retq
-;
-; X64-AVX512DQVL-LABEL: PR29088:
-; X64-AVX512DQVL: ## %bb.0:
-; X64-AVX512DQVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi)
-; X64-AVX512DQVL-NEXT: retq
+; X64-AVX512-LABEL: PR29088:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
+; X64-AVX512-NEXT: vmovaps %ymm1, (%rsi)
+; X64-AVX512-NEXT: retq
%ld = load <4 x i32>, <4 x i32>* %p0
store <8 x float> zeroinitializer, <8 x float>* %p1
%shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_mask_and_epi32_rr_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
+; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
ret <4 x i32> %res
; X86-LABEL: test_mask_and_epi32_rm_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpand (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x00]
+; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_and_epi32_rm_128:
; X64: # %bb.0:
-; X64-NEXT: vpand (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x07]
+; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_mask_and_epi32_rr_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0xc1]
+; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res
; X86-LABEL: test_mask_and_epi32_rm_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpand (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0x00]
+; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_and_epi32_rm_256:
; X64: # %bb.0:
-; X64-NEXT: vpand (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0x07]
+; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_mask_or_epi32_rr_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0xc1]
+; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
ret <4 x i32> %res
; X86-LABEL: test_mask_or_epi32_rm_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpor (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0x00]
+; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_or_epi32_rm_128:
; X64: # %bb.0:
-; X64-NEXT: vpor (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0x07]
+; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_mask_or_epi32_rr_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0xc1]
+; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res
; X86-LABEL: test_mask_or_epi32_rm_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpor (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0x00]
+; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_or_epi32_rm_256:
; X64: # %bb.0:
-; X64-NEXT: vpor (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0x07]
+; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_mask_xor_epi32_rr_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc1]
+; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
ret <4 x i32> %res
; X86-LABEL: test_mask_xor_epi32_rm_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpxor (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0x00]
+; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_xor_epi32_rm_128:
; X64: # %bb.0:
-; X64-NEXT: vpxor (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0x07]
+; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_mask_xor_epi32_rr_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0xc1]
+; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res
; X86-LABEL: test_mask_xor_epi32_rm_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpxor (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0x00]
+; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_xor_epi32_rm_256:
; X64: # %bb.0:
-; X64-NEXT: vpxor (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0x07]
+; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1]
+; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
ret <4 x i32> %res
; X86-LABEL: test_mask_andnot_epi32_rm_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpandn (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x00]
+; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_andnot_epi32_rm_128:
; X64: # %bb.0:
-; X64-NEXT: vpandn (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x07]
+; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1]
+; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res
; X86-LABEL: test_mask_andnot_epi32_rm_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpandn (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x00]
+; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_andnot_epi32_rm_256:
; X64: # %bb.0:
-; X64-NEXT: vpandn (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x07]
+; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1]
+; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
ret <2 x i64> %res
; X86-LABEL: test_mask_andnot_epi64_rm_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpandn (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x00]
+; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_andnot_epi64_rm_128:
; X64: # %bb.0:
-; X64-NEXT: vpandn (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x07]
+; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <2 x i64>, <2 x i64>* %ptr_b
%res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1]
+; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
ret <4 x i64> %res
; X86-LABEL: test_mask_andnot_epi64_rm_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpandn (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x00]
+; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_andnot_epi64_rm_256:
; X64: # %bb.0:
-; X64-NEXT: vpandn (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x07]
+; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%b = load <4 x i64>, <4 x i64>* %ptr_b
%res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
; X86-LABEL: test_mask_andnot_epi64_rmb_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
+; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
; X86-NEXT: # xmm1 = mem[0],zero
-; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
-; X86-NEXT: vpandn %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1]
+; X86-NEXT: vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9]
+; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_andnot_epi64_rmb_256:
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
; X86-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
-; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
+; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
; X86-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
; X86-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
; X64-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
-; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
+; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
; X64-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
; X64-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
; X86-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
-; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
; X86-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
; X64-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
; X64-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
; X86-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
-; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
+; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
; X86-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
; X86-NEXT: vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
; X86-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
; X64-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
-; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
+; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
; X64-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
; X64-NEXT: vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
; X64-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
; X86-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
-; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
+; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
; X86-NEXT: vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
; X86-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
; X86-NEXT: vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
; X64-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
-; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
+; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
; X64-NEXT: vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
; X64-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
; X64-NEXT: vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
-; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
; X86-NEXT: vaddps %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4]
; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05]
; X64-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xe2,0x05]
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
; X64-NEXT: vaddps %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0]
; X64-NEXT: vaddps %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
-; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
; X86-NEXT: vaddps %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4]
; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05]
; X64-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xe2,0x05]
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
; X64-NEXT: vaddps %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0]
; X64-NEXT: vaddps %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3]
; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
-; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
; X86-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
; X86-NEXT: vaddps %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4]
; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05]
; X64-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xe2,0x05]
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
; X64-NEXT: vaddps %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0]
; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
-; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
; X86-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
; X86-NEXT: vaddps %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4]
; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05]
; X64-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xe2,0x05]
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2]
; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
; X64-NEXT: vaddps %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0]
; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
; CHECK-LABEL: test_256_25:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
; CHECK-LABEL: test_256_26:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
; CHECK-LABEL: test_256_27:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
; CHECK-LABEL: test_256_28:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
; fold (abs x) -> x iff not-negative
define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) {
-; AVX2-LABEL: combine_v16i8_abs_constant:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: combine_v16i8_abs_constant:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: combine_v16i8_abs_constant:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; CHECK-LABEL: combine_v16i8_abs_constant:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%1 = insertelement <16 x i8> undef, i8 15, i32 0
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
%3 = and <16 x i8> %a, %2
;
; VLX-LABEL: test_zero_v4f32:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
ret void
;
; VLX-LABEL: test_zero_v4i32:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
;
; VLX-LABEL: test_zero_v2f64:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
ret void
;
; VLX-LABEL: test_zero_v2i64:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
ret void
;
; VLX-LABEL: test_zero_v8i16:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
ret void
;
; VLX-LABEL: test_zero_v16i8:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %xmm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %xmm0, (%rdi)
; VLX-NEXT: retq
store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
ret void
;
; VLX-LABEL: test_zero_v8f32:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
;
; VLX-LABEL: test_zero_v8i32:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
;
; VLX-LABEL: test_zero_v4f64:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
;
; VLX-LABEL: test_zero_v4i64:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
;
; VLX-LABEL: test_zero_v16i16:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
;
; VLX-LABEL: test_zero_v32i8:
; VLX: # %bb.0:
-; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; VLX-NEXT: vmovntdq %ymm0, (%rdi)
+; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vmovntps %ymm0, (%rdi)
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
; X32-AVX-NEXT: vmovaps %xmm1, (%eax)
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
-; X32-AVX512F: # %bb.0:
-; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
-; X32-AVX512BW: # %bb.0:
-; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
-; X32-AVX512DQ: # %bb.0:
-; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X32-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: test_broadcast_4i32_8i32_chain:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
+; X32-AVX512-NEXT: vmovaps %xmm1, (%eax)
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps %xmm1, (%rsi)
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
-; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: test_broadcast_4i32_8i32_chain:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
+; X64-AVX512-NEXT: vmovaps %xmm1, (%rsi)
+; X64-AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
store <4 x float> zeroinitializer, <4 x float>* %p1
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: test_broadcast_4i32_16i32_chain:
-; X32-AVX512F: # %bb.0:
-; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
-; X32-AVX512BW: # %bb.0:
-; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
-; X32-AVX512DQ: # %bb.0:
-; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: test_broadcast_4i32_16i32_chain:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X32-AVX512-NEXT: vmovaps %xmm1, (%eax)
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: test_broadcast_4i32_16i32_chain:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
-; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: test_broadcast_4i32_16i32_chain:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT: vmovaps %xmm1, (%rsi)
+; X64-AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
store <4 x float> zeroinitializer, <4 x float>* %p1
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
}
define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
-; AVX512VL-LABEL: v2f64:
-; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512VLDQ-LABEL: v2f64:
-; AVX512VLDQ: ## %bb.0:
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512VLDQ-NEXT: retq
+; CHECK-LABEL: v2f64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b )
ret <2 x double> %tmp
}
; 2013.
define <2 x double> @fabs_v2f64(<2 x double> %p) {
-; X32_AVX-LABEL: fabs_v2f64:
-; X32_AVX: # %bb.0:
-; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32_AVX-NEXT: retl
-;
-; X32_AVX512VL-LABEL: fabs_v2f64:
-; X32_AVX512VL: # %bb.0:
-; X32_AVX512VL-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32_AVX512VL-NEXT: retl
-;
-; X32_AVX512VLDQ-LABEL: fabs_v2f64:
-; X32_AVX512VLDQ: # %bb.0:
-; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32_AVX512VLDQ-NEXT: retl
-;
-; X64_AVX-LABEL: fabs_v2f64:
-; X64_AVX: # %bb.0:
-; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; X64_AVX-NEXT: retq
-;
-; X64_AVX512VL-LABEL: fabs_v2f64:
-; X64_AVX512VL: # %bb.0:
-; X64_AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64_AVX512VL-NEXT: retq
+; X32-LABEL: fabs_v2f64:
+; X32: # %bb.0:
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: retl
;
-; X64_AVX512VLDQ-LABEL: fabs_v2f64:
-; X64_AVX512VLDQ: # %bb.0:
-; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; X64_AVX512VLDQ-NEXT: retq
+; X64-LABEL: fabs_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: retq
%t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
ret <2 x double> %t
}
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vandpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vorpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vorpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v2f32_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v2f32_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v2f32_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0)
ret float %1
}
; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v4f32_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v4f32_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v4f32_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0)
ret float %1
}
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v8f32_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v8f32_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v8f32_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0)
ret float %1
}
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v16f32_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
-; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
-; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm1
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v16f32_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
-; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
-; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v16f32_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
+; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
+; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0)
ret float %1
}
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v2f64_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v2f64_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v2f64_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0)
ret double %1
}
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v4f64_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v4f64_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v4f64_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0)
ret double %1
}
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v8f64_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm1
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v8f64_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm1
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v8f64_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
+; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0)
ret double %1
}
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-; AVX512BW-LABEL: test_v16f64_zero:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm2
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm3
-; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm2, %xmm2
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512BW-NEXT: vaddsd %xmm0, %xmm2, %xmm0
-; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm1, %xmm2
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm1, %xmm1
-; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
-; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_v16f64_zero:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm2
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm3
-; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm2, %xmm2
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: vaddsd %xmm0, %xmm2, %xmm0
-; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vextractf32x4 $2, %zmm1, %xmm2
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vextractf32x4 $3, %zmm1, %xmm1
-; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
-; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_v16f64_zero:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm2
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
+; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
+; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
+; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm2
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm0
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
+; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0)
ret double %1
}
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
+; AVX: # %bb.0:
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
ret <16 x i8> %shuffle
}
; SSE-NEXT: movaps %xmm0, (%rsi)
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: constant_gets_selected:
-; AVX1OR2: # %bb.0: # %entry
-; AVX1OR2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovaps %xmm0, (%rdi)
-; AVX1OR2-NEXT: vmovaps %xmm0, (%rsi)
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: constant_gets_selected:
-; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi)
-; AVX512VL-NEXT: retq
+; AVX-LABEL: constant_gets_selected:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovaps %xmm0, (%rdi)
+; AVX-NEXT: vmovaps %xmm0, (%rsi)
+; AVX-NEXT: retq
entry:
%weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
%shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_z1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_z1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_z1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_z1:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2f64_1z:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2f64_1z:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2f64_1z:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2f64_1z:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
ret <2 x double> %shuffle
}
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2f64_z0:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2f64_z0:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2f64_z0:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2f64_z0:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x double> %shuffle
}
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2f64_z1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2f64_z1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2f64_z1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2f64_z1:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
ret <2 x double> %shuffle
}
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT: retq
%shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
%bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
%shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: retq
%bitcast32 = bitcast <2 x i64> %x to <4 x float>
%shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
%bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
; AVX512VL-SLOW-LABEL: shuffle_v4f64_0z3z:
; AVX512VL-SLOW: # %bb.0:
; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
-; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
; AVX512VL-SLOW-NEXT: retq
;
;
; AVX512VL-SLOW-LABEL: shuffle_v4f64_1z2z:
; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
-; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0]
+; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v4f64_1z2z:
;
; AVX512VL-SLOW-LABEL: shuffle_v4i64_z0z3:
; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v4i64_z0z3:
;
; AVX512VL-SLOW-LABEL: shuffle_v4i64_1z2z:
; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
-; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0]
+; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v4i64_1z2z:
}
define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
-; AVX1OR2-LABEL: shuffle_v4f64_0zzz_optsize:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_0zzz_optsize:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_0zzz_optsize:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT: retq
%b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x double> %b
}
define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize {
-; AVX1OR2-LABEL: shuffle_v4i64_0zzz_optsize:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4i64_0zzz_optsize:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4i64_0zzz_optsize:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT: retq
%b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x i64> %b
}
define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize {
-; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_optsize:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_optsize:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_0zzzzzzz_optsize:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; ALL-NEXT: retq
%b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x float> %b
}
define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize {
-; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_optsize:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_optsize:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_0zzzzzzz_optsize:
+; ALL: # %bb.0:
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; ALL-NEXT: retq
%b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i32> %b
}
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: trunc16i64_16i8_const:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc16i64_16i8_const:
-; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512BW-LABEL: trunc16i64_16i8_const:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512BWVL-LABEL: trunc16i64_16i8_const:
-; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc16i64_16i8_const:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
entry:
%0 = trunc <16 x i64> zeroinitializer to <16 x i8>
;
; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT: retq