From cdb0ed291017160ffec4dc84f88aad075945a118 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 15 Jul 2018 23:32:36 +0000 Subject: [PATCH] [X86] Add custom execution domain fixing for 128/256-bit integer logic operations with AVX512F, but not AVX512DQ. AVX512F only has integer domain logic instructions. AVX512DQ added FP domain logic instructions. Execution domain fixing runs before EVEX->VEX. So if we have AVX512F and not AVX512DQ we fail to do execution domain switching of the logic operations. This leads to mismatches in execution domain and more test differences. This patch adds custom domain fixing that switches EVEX integer logic operations to VEX fp logic operations if XMM16-31 are not used. llvm-svn: 337137 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 85 ++++ llvm/test/CodeGen/X86/avx512-cvt.ll | 146 ++----- .../X86/avx512-shuffles/broadcast-scalar-fp.ll | 160 ++++---- .../CodeGen/X86/avx512-shuffles/duplicate-high.ll | 120 +++--- .../CodeGen/X86/avx512-shuffles/duplicate-low.ll | 216 +++++----- .../CodeGen/X86/avx512-shuffles/in_lane_permute.ll | 240 +++++------ .../CodeGen/X86/avx512-shuffles/partial_permute.ll | 304 +++++++------- llvm/test/CodeGen/X86/avx512-shuffles/permute.ll | 160 ++++---- .../X86/avx512-shuffles/shuffle-interleave.ll | 176 ++++---- .../CodeGen/X86/avx512-shuffles/shuffle-vec.ll | 128 +++--- llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll | 352 ++++++++-------- llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll | 26 +- .../CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 66 +-- llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 32 +- llvm/test/CodeGen/X86/avx512vl-mov.ll | 8 +- llvm/test/CodeGen/X86/combine-abs.ll | 18 +- llvm/test/CodeGen/X86/nontemporal-2.ll | 48 +-- llvm/test/CodeGen/X86/subvector-broadcast.ll | 120 ++---- llvm/test/CodeGen/X86/vec-copysign-avx512.ll | 19 +- llvm/test/CodeGen/X86/vec_fabs.ll | 36 +- llvm/test/CodeGen/X86/vector-compare-all_of.ll | 24 +- llvm/test/CodeGen/X86/vector-compare-any_of.ll | 24 +- llvm/test/CodeGen/X86/vector-reduce-fadd.ll | 452 +++++++-------------- llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll | 32 +- llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 132 ++---- llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 84 ++-- llvm/test/CodeGen/X86/vector-trunc.ll | 23 +- llvm/test/CodeGen/X86/vselect-pcmp.ll | 2 +- 28 files changed, 1427 insertions(+), 1806 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1576d90..dd1c658 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6311,6 +6311,29 @@ static const uint16_t ReplaceableCustomAVX2Instrs[][3] = { { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri }, }; +// Special table for changing EVEX logic instructions to VEX. +// TODO: Should we run EVEX->VEX earlier? +static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = { + // Two integer columns for 64-bit and 32-bit elements. + //PackedSingle PackedDouble PackedInt PackedInt + { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm }, + { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr }, + { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm }, + { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr }, + { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm }, + { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr }, + { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm }, + { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr }, + { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm }, + { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr }, + { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm }, + { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr }, + { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm }, + { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr }, + { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm }, + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr }, +}; + // FIXME: Some shuffle and unpack instructions have equivalents in different // domains, but they require a bit more work than just switching opcodes. @@ -6410,6 +6433,38 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const { case X86::VPBLENDWYrmi: case X86::VPBLENDWYrri: return GetBlendDomains(8, false); + case X86::VPANDDZ128rr: case X86::VPANDDZ128rm: + case X86::VPANDDZ256rr: case X86::VPANDDZ256rm: + case X86::VPANDQZ128rr: case X86::VPANDQZ128rm: + case X86::VPANDQZ256rr: case X86::VPANDQZ256rm: + case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm: + case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm: + case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm: + case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm: + case X86::VPORDZ128rr: case X86::VPORDZ128rm: + case X86::VPORDZ256rr: case X86::VPORDZ256rm: + case X86::VPORQZ128rr: case X86::VPORQZ128rm: + case X86::VPORQZ256rr: case X86::VPORQZ256rm: + case X86::VPXORDZ128rr: case X86::VPXORDZ128rm: + case X86::VPXORDZ256rr: case X86::VPXORDZ256rm: + case X86::VPXORQZ128rr: case X86::VPXORQZ128rm: + case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: + // If we don't have DQI see if we can still switch from an EVEX integer + // instruction to a VEX floating point instruction. + if (Subtarget.hasDQI()) + return 0; + + if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16) + return 0; + if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16) + return 0; + // Register forms will have 3 operands. Memory form will have more. + if (NumOperands == 3 && + RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16) + return 0; + + // All domains are valid. + return 0xe; } return 0; } @@ -6486,6 +6541,36 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, case X86::VPBLENDWYrmi: case X86::VPBLENDWYrri: return SetBlendDomain(16, true); + case X86::VPANDDZ128rr: case X86::VPANDDZ128rm: + case X86::VPANDDZ256rr: case X86::VPANDDZ256rm: + case X86::VPANDQZ128rr: case X86::VPANDQZ128rm: + case X86::VPANDQZ256rr: case X86::VPANDQZ256rm: + case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm: + case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm: + case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm: + case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm: + case X86::VPORDZ128rr: case X86::VPORDZ128rm: + case X86::VPORDZ256rr: case X86::VPORDZ256rm: + case X86::VPORQZ128rr: case X86::VPORQZ128rm: + case X86::VPORQZ256rr: case X86::VPORQZ256rm: + case X86::VPXORDZ128rr: case X86::VPXORDZ128rm: + case X86::VPXORDZ256rr: case X86::VPXORDZ256rm: + case X86::VPXORQZ128rr: case X86::VPXORQZ128rm: + case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: { + // Without DQI, convert EVEX instructions to VEX instructions. + if (Subtarget.hasDQI()) + return false; + + const uint16_t *table = lookupAVX512(MI.getOpcode(), dom, + ReplaceableCustomAVX512LogicInstrs); + assert(table && "Instruction not found in table?"); + // Don't change integer Q instructions to D instructions and + // use D intructions if we started with a PS instruction. + if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode())) + Domain = 4; + MI.setDesc(get(table[Domain - 1])); + return true; + } } return false; } diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 1a277d8..f6aead9 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -1542,17 +1542,17 @@ define <16 x double> @scto16f64(<16 x i8> %a) { } define <16 x double> @sbto16f64(<16 x double> %a) { -; NOVLDQ-LABEL: sbto16f64: -; NOVLDQ: # %bb.0: -; NOVLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 -; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 -; NOVLDQ-NEXT: kunpckbw %k0, %k1, %k1 -; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0 -; NOVLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 -; NOVLDQ-NEXT: retq +; NODQ-LABEL: sbto16f64: +; NODQ: # %bb.0: +; NODQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 +; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 +; NODQ-NEXT: kunpckbw %k0, %k1, %k1 +; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 +; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 +; NODQ-NEXT: retq ; ; VLDQ-LABEL: sbto16f64: ; VLDQ: # %bb.0: @@ -1566,18 +1566,6 @@ define <16 x double> @sbto16f64(<16 x double> %a) { ; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 ; VLDQ-NEXT: retq ; -; VLNODQ-LABEL: sbto16f64: -; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 -; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 -; VLNODQ-NEXT: kunpckbw %k0, %k1, %k1 -; VLNODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 -; VLNODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 -; VLNODQ-NEXT: retq -; ; DQNOVL-LABEL: sbto16f64: ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 @@ -1613,7 +1601,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) { ; ; VLNODQ-LABEL: sbto8f64: ; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} @@ -1633,52 +1621,24 @@ define <8 x double> @sbto8f64(<8 x double> %a) { } define <8 x float> @sbto8f32(<8 x float> %a) { -; NOVL-LABEL: sbto8f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0 -; NOVL-NEXT: retq -; -; VLDQ-LABEL: sbto8f32: -; VLDQ: # %bb.0: -; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0 -; VLDQ-NEXT: retq -; -; VLNODQ-LABEL: sbto8f32: -; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0 -; VLNODQ-NEXT: retq +; ALL-LABEL: sbto8f32: +; ALL: # %bb.0: +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; ALL-NEXT: vcvtdq2ps %ymm0, %ymm0 +; ALL-NEXT: retq %cmpres = fcmp ogt <8 x float> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x float> ret <8 x float> %1 } define <4 x float> @sbto4f32(<4 x float> %a) { -; NOVL-LABEL: sbto4f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq -; -; VLDQ-LABEL: sbto4f32: -; VLDQ: # %bb.0: -; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VLDQ-NEXT: retq -; -; VLNODQ-LABEL: sbto4f32: -; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VLNODQ-NEXT: retq +; ALL-LABEL: sbto4f32: +; ALL: # %bb.0: +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; ALL-NEXT: retq %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> ret <4 x float> %1 @@ -1703,7 +1663,7 @@ define <4 x double> @sbto4f64(<4 x double> %a) { ; ; VLNODQ-LABEL: sbto4f64: ; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} @@ -1715,55 +1675,25 @@ define <4 x double> @sbto4f64(<4 x double> %a) { } define <2 x float> @sbto2f32(<2 x float> %a) { -; NOVL-LABEL: sbto2f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq -; -; VLDQ-LABEL: sbto2f32: -; VLDQ: # %bb.0: -; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VLDQ-NEXT: retq -; -; VLNODQ-LABEL: sbto2f32: -; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VLNODQ-NEXT: retq +; ALL-LABEL: sbto2f32: +; ALL: # %bb.0: +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; ALL-NEXT: retq %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> ret <2 x float> %1 } define <2 x double> @sbto2f64(<2 x double> %a) { -; NOVL-LABEL: sbto2f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; NOVL-NEXT: retq -; -; VLDQ-LABEL: sbto2f64: -; VLDQ: # %bb.0: -; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; VLDQ-NEXT: retq -; -; VLNODQ-LABEL: sbto2f64: -; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; VLNODQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; VLNODQ-NEXT: retq +; ALL-LABEL: sbto2f64: +; ALL: # %bb.0: +; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; ALL-NEXT: vcvtdq2pd %xmm0, %xmm0 +; ALL-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> ret <2 x double> %1 diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll index 1d47794..fac4b2f 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll @@ -13,7 +13,7 @@ define <4 x double> @test_double_to_4(double %s) { define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -28,7 +28,7 @@ define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %defa define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -41,7 +41,7 @@ define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %ma define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -56,7 +56,7 @@ define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %defa define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -69,7 +69,7 @@ define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %ma define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -84,7 +84,7 @@ define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %defa define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -97,7 +97,7 @@ define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %ma define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -112,7 +112,7 @@ define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %defa define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -134,7 +134,7 @@ define <8 x double> @test_double_to_8(double %s) { define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -149,7 +149,7 @@ define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %defa define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -162,7 +162,7 @@ define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %ma define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -177,7 +177,7 @@ define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %defa define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -190,7 +190,7 @@ define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %ma define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -205,7 +205,7 @@ define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %defa define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -218,7 +218,7 @@ define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %ma define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -233,7 +233,7 @@ define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %defa define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -255,7 +255,7 @@ define <4 x float> @test_float_to_4(float %s) { define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -270,7 +270,7 @@ define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -283,7 +283,7 @@ define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -298,7 +298,7 @@ define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -311,7 +311,7 @@ define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -326,7 +326,7 @@ define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -339,7 +339,7 @@ define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -354,7 +354,7 @@ define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -376,7 +376,7 @@ define <8 x float> @test_float_to_8(float %s) { define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -391,7 +391,7 @@ define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -404,7 +404,7 @@ define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -419,7 +419,7 @@ define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -432,7 +432,7 @@ define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -447,7 +447,7 @@ define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -460,7 +460,7 @@ define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -475,7 +475,7 @@ define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -497,7 +497,7 @@ define <16 x float> @test_float_to_16(float %s) { define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -512,7 +512,7 @@ define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %defau define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -525,7 +525,7 @@ define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mas define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -540,7 +540,7 @@ define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %defau define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -553,7 +553,7 @@ define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mas define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -568,7 +568,7 @@ define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %defau define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -581,7 +581,7 @@ define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mas define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -596,7 +596,7 @@ define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %defau define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -619,7 +619,7 @@ define <4 x double> @test_double_to_4_mem(double* %p) { define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -634,7 +634,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -648,7 +648,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -663,7 +663,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -677,7 +677,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -692,7 +692,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -706,7 +706,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_4_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -721,7 +721,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -745,7 +745,7 @@ define <8 x double> @test_double_to_8_mem(double* %p) { define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -760,7 +760,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -774,7 +774,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -789,7 +789,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -803,7 +803,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -818,7 +818,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -832,7 +832,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_double_to_8_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -847,7 +847,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -871,7 +871,7 @@ define <4 x float> @test_float_to_4_mem(float* %p) { define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq @@ -886,7 +886,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %def define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -900,7 +900,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %m define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq @@ -915,7 +915,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %def define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -929,7 +929,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %m define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq @@ -944,7 +944,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %def define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -958,7 +958,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %m define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %default, <4 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_4_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq @@ -973,7 +973,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %def define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -997,7 +997,7 @@ define <8 x float> @test_float_to_8_mem(float* %p) { define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1012,7 +1012,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %def define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1026,7 +1026,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %m define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1041,7 +1041,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %def define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1055,7 +1055,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %m define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1070,7 +1070,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %def define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1084,7 +1084,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %m define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_8_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1099,7 +1099,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %def define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1123,7 +1123,7 @@ define <16 x float> @test_float_to_16_mem(float* %p) { define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -1138,7 +1138,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> % define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1152,7 +1152,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -1167,7 +1167,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> % define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1181,7 +1181,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -1196,7 +1196,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> % define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1210,7 +1210,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_float_to_16_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq @@ -1225,7 +1225,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> % define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll index 195c856..3555049 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll @@ -12,7 +12,7 @@ define <4 x float> @test_4xfloat_dup_high(<4 x float> %vec) { define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -26,7 +26,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x fl define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq @@ -38,7 +38,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -52,7 +52,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x fl define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq @@ -64,7 +64,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -78,7 +78,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x fl define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq @@ -90,7 +90,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -104,7 +104,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x fl define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq @@ -116,7 +116,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -130,7 +130,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x fl define <4 x float> @test_masked_z_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq @@ -151,7 +151,7 @@ define <4 x float> @test_4xfloat_dup_high_mem(<4 x float>* %vp) { define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -165,7 +165,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -178,7 +178,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, < define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -192,7 +192,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -205,7 +205,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, < define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -219,7 +219,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -232,7 +232,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, < define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -246,7 +246,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -259,7 +259,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, < define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -273,7 +273,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq @@ -294,7 +294,7 @@ define <8 x float> @test_8xfloat_dup_high(<8 x float> %vec) { define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -308,7 +308,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -320,7 +320,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -334,7 +334,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -346,7 +346,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -360,7 +360,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -372,7 +372,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -386,7 +386,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -398,7 +398,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -412,7 +412,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -433,7 +433,7 @@ define <8 x float> @test_8xfloat_dup_high_mem(<8 x float>* %vp) { define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -447,7 +447,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -460,7 +460,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, < define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -474,7 +474,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -487,7 +487,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, < define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -501,7 +501,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -514,7 +514,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, < define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -528,7 +528,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -541,7 +541,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, < define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -555,7 +555,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq @@ -576,7 +576,7 @@ define <16 x float> @test_16xfloat_dup_high(<16 x float> %vec) { define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -590,7 +590,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -602,7 +602,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec, <1 define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -616,7 +616,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -628,7 +628,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec, <1 define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -642,7 +642,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -654,7 +654,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec, <1 define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -668,7 +668,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -680,7 +680,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec, <1 define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -694,7 +694,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -715,7 +715,7 @@ define <16 x float> @test_16xfloat_dup_high_mem(<16 x float>* %vp) { define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -729,7 +729,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -742,7 +742,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -756,7 +756,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -769,7 +769,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -783,7 +783,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -796,7 +796,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -810,7 +810,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -823,7 +823,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq @@ -837,7 +837,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll index b32cb60..3a80f64 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll @@ -12,7 +12,7 @@ define <2 x double> @test_2xdouble_dup_low(<2 x double> %vec) { define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -26,7 +26,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; CHECK-NEXT: retq @@ -38,7 +38,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec, <2 define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -52,7 +52,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x define <2 x double> @test_masked_z_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; CHECK-NEXT: retq @@ -73,7 +73,7 @@ define <2 x double> @test_2xdouble_dup_low_mem(<2 x double>* %vp) { define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] ; CHECK-NEXT: retq @@ -87,7 +87,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, < define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] ; CHECK-NEXT: retq @@ -100,7 +100,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] ; CHECK-NEXT: retq @@ -114,7 +114,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, < define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] ; CHECK-NEXT: retq @@ -135,7 +135,7 @@ define <4 x double> @test_4xdouble_dup_low(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -149,7 +149,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq @@ -161,7 +161,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec, <4 define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -175,7 +175,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq @@ -187,7 +187,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec, <4 define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -201,7 +201,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq @@ -213,7 +213,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec, <4 define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -227,7 +227,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq @@ -239,7 +239,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec, <4 define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -253,7 +253,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x define <4 x double> @test_masked_z_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq @@ -274,7 +274,7 @@ define <4 x double> @test_4xdouble_dup_low_mem(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -288,7 +288,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, < define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -301,7 +301,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -315,7 +315,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, < define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -328,7 +328,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -342,7 +342,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, < define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -355,7 +355,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -369,7 +369,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, < define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -382,7 +382,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -396,7 +396,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, < define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -417,7 +417,7 @@ define <8 x double> @test_8xdouble_dup_low(<8 x double> %vec) { define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -431,7 +431,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -443,7 +443,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec, <8 define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -457,7 +457,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -469,7 +469,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec, <8 define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -483,7 +483,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -495,7 +495,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec, <8 define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -509,7 +509,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -521,7 +521,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec, <8 define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -535,7 +535,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -556,7 +556,7 @@ define <8 x double> @test_8xdouble_dup_low_mem(<8 x double>* %vp) { define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -570,7 +570,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, < define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -583,7 +583,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -597,7 +597,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, < define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -610,7 +610,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -624,7 +624,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, < define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -637,7 +637,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -651,7 +651,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, < define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -664,7 +664,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -678,7 +678,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, < define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -699,7 +699,7 @@ define <4 x float> @test_4xfloat_dup_low(<4 x float> %vec) { define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -713,7 +713,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x flo define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq @@ -725,7 +725,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x f define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -739,7 +739,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x flo define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq @@ -751,7 +751,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x f define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -765,7 +765,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x flo define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq @@ -777,7 +777,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x f define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -791,7 +791,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x flo define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq @@ -803,7 +803,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x f define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -817,7 +817,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x flo define <4 x float> @test_masked_z_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq @@ -838,7 +838,7 @@ define <4 x float> @test_4xfloat_dup_low_mem(<4 x float>* %vp) { define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -852,7 +852,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -865,7 +865,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -879,7 +879,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -892,7 +892,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -906,7 +906,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -919,7 +919,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -933,7 +933,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -946,7 +946,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -960,7 +960,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq @@ -981,7 +981,7 @@ define <8 x float> @test_8xfloat_dup_low(<8 x float> %vec) { define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -995,7 +995,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x flo define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1007,7 +1007,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x f define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1021,7 +1021,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x flo define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1033,7 +1033,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x f define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1047,7 +1047,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x flo define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1059,7 +1059,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x f define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1073,7 +1073,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x flo define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1085,7 +1085,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x f define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1099,7 +1099,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x flo define <8 x float> @test_masked_z_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1120,7 +1120,7 @@ define <8 x float> @test_8xfloat_dup_low_mem(<8 x float>* %vp) { define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1134,7 +1134,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1147,7 +1147,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1161,7 +1161,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1174,7 +1174,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1188,7 +1188,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1201,7 +1201,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1215,7 +1215,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1228,7 +1228,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1242,7 +1242,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq @@ -1263,7 +1263,7 @@ define <16 x float> @test_16xfloat_dup_low(<16 x float> %vec) { define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -1277,7 +1277,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1289,7 +1289,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec, <16 define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -1303,7 +1303,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1315,7 +1315,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec, <16 define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -1329,7 +1329,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1341,7 +1341,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec, <16 define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -1355,7 +1355,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1367,7 +1367,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec, <16 define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -1381,7 +1381,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x define <16 x float> @test_masked_z_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1402,7 +1402,7 @@ define <16 x float> @test_16xfloat_dup_low_mem(<16 x float>* %vp) { define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1416,7 +1416,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, < define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1429,7 +1429,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1443,7 +1443,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, < define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1456,7 +1456,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1470,7 +1470,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, < define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1483,7 +1483,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1497,7 +1497,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, < define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1510,7 +1510,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq @@ -1524,7 +1524,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, < define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll index 24b387d9..4697e47 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll @@ -14,7 +14,7 @@ define <4 x float> @test_4xfloat_perm_mask0(<4 x float> %vec) { define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[2,1,3,1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -28,7 +28,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,3,1] ; CHECK-NEXT: retq @@ -40,7 +40,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec, <4 x floa define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -54,7 +54,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2] ; CHECK-NEXT: retq @@ -66,7 +66,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec, <4 x floa define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,3,2,1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -80,7 +80,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,2,1] ; CHECK-NEXT: retq @@ -100,7 +100,7 @@ define <4 x float> @test_4xfloat_perm_mask3(<4 x float> %vec) { define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -114,7 +114,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> define <4 x float> @test_masked_z_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2] ; CHECK-NEXT: retq @@ -135,7 +135,7 @@ define <4 x float> @test_4xfloat_perm_mem_mask0(<4 x float>* %vp) { define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,3,1,3] ; CHECK-NEXT: retq @@ -149,7 +149,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x fl define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,3,1,3] ; CHECK-NEXT: retq @@ -163,7 +163,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[1,3,2,0] ; CHECK-NEXT: retq @@ -177,7 +177,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x fl define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[1,3,2,0] ; CHECK-NEXT: retq @@ -191,7 +191,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[2,1,3,2] ; CHECK-NEXT: retq @@ -205,7 +205,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x fl define <4 x float> @test_masked_z_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2] ; CHECK-NEXT: retq @@ -228,7 +228,7 @@ define <4 x float> @test_4xfloat_perm_mem_mask3(<4 x float>* %vp) { define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[0,1,3,0] ; CHECK-NEXT: retq @@ -242,7 +242,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x fl define <4 x float> @test_masked_z_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,0] ; CHECK-NEXT: retq @@ -264,7 +264,7 @@ define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) { define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,6,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -278,7 +278,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,6,6,6] ; CHECK-NEXT: retq @@ -290,7 +290,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x floa define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,7,6,7,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -304,7 +304,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,7,6,7,6] ; CHECK-NEXT: retq @@ -316,7 +316,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,2,1,6,5,4,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -330,7 +330,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,2,1,6,5,4,4] ; CHECK-NEXT: retq @@ -350,7 +350,7 @@ define <8 x float> @test_8xfloat_perm_imm_mask3(<8 x float> %vec) { define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,2,1,0,6,6,5,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -364,7 +364,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,1,0,6,6,5,4] ; CHECK-NEXT: retq @@ -376,7 +376,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,3,7,7,6,5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -390,7 +390,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,3,7,7,6,5] ; CHECK-NEXT: retq @@ -402,7 +402,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec, <8 x floa define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3,6,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -416,7 +416,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3,6,5,7,7] ; CHECK-NEXT: retq @@ -436,7 +436,7 @@ define <8 x float> @test_8xfloat_perm_mask6(<8 x float> %vec) { define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask6: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,5,6,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -450,7 +450,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask6: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,5,6,7,7] ; CHECK-NEXT: retq @@ -462,7 +462,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec, <8 x floa define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,0,2,1,7,4,6,5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -476,7 +476,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x fl define <8 x float> @test_masked_z_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,2,1,7,4,6,5] ; CHECK-NEXT: retq @@ -499,7 +499,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[3,0,0,2,4,6,7,6] ; CHECK-NEXT: retq @@ -514,7 +514,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[3,0,0,2,4,6,7,6] ; CHECK-NEXT: retq @@ -528,7 +528,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,2,2,6,4,6,6] ; CHECK-NEXT: retq @@ -542,7 +542,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,2,2,6,4,6,6] ; CHECK-NEXT: retq @@ -557,7 +557,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[2,1,1,3,4,4,7,4] ; CHECK-NEXT: retq @@ -572,7 +572,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[2,1,1,3,4,4,7,4] ; CHECK-NEXT: retq @@ -595,7 +595,7 @@ define <8 x float> @test_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp) { define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,0,3,3,4,4,7,7] ; CHECK-NEXT: retq @@ -609,7 +609,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,0,3,3,4,4,7,7] ; CHECK-NEXT: retq @@ -624,7 +624,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,0,1,4,6,5,4] ; CHECK-NEXT: retq @@ -639,7 +639,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,0,1,4,6,5,4] ; CHECK-NEXT: retq @@ -653,7 +653,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,0,3,6,4,4,7] ; CHECK-NEXT: retq @@ -667,7 +667,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,0,3,6,4,4,7] ; CHECK-NEXT: retq @@ -692,7 +692,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,2,3,7,4,6,7] ; CHECK-NEXT: retq @@ -707,7 +707,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,2,3,7,4,6,7] ; CHECK-NEXT: retq @@ -721,7 +721,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,2,3,1,4,6,7,5] ; CHECK-NEXT: retq @@ -735,7 +735,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,1,4,6,7,5] ; CHECK-NEXT: retq @@ -757,7 +757,7 @@ define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) { define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -771,7 +771,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15] ; CHECK-NEXT: retq @@ -783,7 +783,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -797,7 +797,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13] ; CHECK-NEXT: retq @@ -809,7 +809,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec, <1 define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -823,7 +823,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12] ; CHECK-NEXT: retq @@ -843,7 +843,7 @@ define <16 x float> @test_16xfloat_perm_imm_mask3(<16 x float> %vec) { define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -857,7 +857,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14] ; CHECK-NEXT: retq @@ -869,7 +869,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec, <1 define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -883,7 +883,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x fl define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15] ; CHECK-NEXT: retq @@ -895,7 +895,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec, <16 x define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -909,7 +909,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12] ; CHECK-NEXT: retq @@ -929,7 +929,7 @@ define <16 x float> @test_16xfloat_perm_mask6(<16 x float> %vec) { define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask6: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -943,7 +943,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x fl define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask6: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13] ; CHECK-NEXT: retq @@ -955,7 +955,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec, <16 x define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -969,7 +969,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14] ; CHECK-NEXT: retq @@ -992,7 +992,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12] ; CHECK-NEXT: retq @@ -1007,7 +1007,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12] ; CHECK-NEXT: retq @@ -1021,7 +1021,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13] ; CHECK-NEXT: retq @@ -1035,7 +1035,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13] ; CHECK-NEXT: retq @@ -1050,7 +1050,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13] ; CHECK-NEXT: retq @@ -1065,7 +1065,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13] ; CHECK-NEXT: retq @@ -1088,7 +1088,7 @@ define <16 x float> @test_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp) { define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13] ; CHECK-NEXT: retq @@ -1102,7 +1102,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13] ; CHECK-NEXT: retq @@ -1117,7 +1117,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12] ; CHECK-NEXT: retq @@ -1132,7 +1132,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask4(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12] ; CHECK-NEXT: retq @@ -1146,7 +1146,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask4(<16 x float>* %vp, <1 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13] ; CHECK-NEXT: retq @@ -1160,7 +1160,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13] ; CHECK-NEXT: retq @@ -1185,7 +1185,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15] ; CHECK-NEXT: retq @@ -1200,7 +1200,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask6(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps (%rdi), %zmm1 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15] ; CHECK-NEXT: retq @@ -1214,7 +1214,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask6(<16 x float>* %vp, <1 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13] ; CHECK-NEXT: retq @@ -1228,7 +1228,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13] ; CHECK-NEXT: retq @@ -1250,7 +1250,7 @@ define <2 x double> @test_2xdouble_perm_mask0(<2 x double> %vec) { define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -1264,7 +1264,7 @@ define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x dou define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0] ; CHECK-NEXT: retq @@ -1276,7 +1276,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec, <2 x d define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -1290,7 +1290,7 @@ define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x dou define <2 x double> @test_masked_z_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0] ; CHECK-NEXT: retq @@ -1311,7 +1311,7 @@ define <2 x double> @test_2xdouble_perm_mem_mask0(<2 x double>* %vp) { define <2 x double> @test_masked_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0] ; CHECK-NEXT: retq @@ -1325,7 +1325,7 @@ define <2 x double> @test_masked_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x define <2 x double> @test_masked_z_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0] ; CHECK-NEXT: retq @@ -1339,7 +1339,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 define <2 x double> @test_masked_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0] ; CHECK-NEXT: retq @@ -1353,7 +1353,7 @@ define <2 x double> @test_masked_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x define <2 x double> @test_masked_z_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0] ; CHECK-NEXT: retq @@ -1375,7 +1375,7 @@ define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,0,2,3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1389,7 +1389,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,2,3] ; CHECK-NEXT: retq @@ -1401,7 +1401,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x d define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1415,7 +1415,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2] ; CHECK-NEXT: retq @@ -1427,7 +1427,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x d define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[0,1,3,3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1441,7 +1441,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,3,3] ; CHECK-NEXT: retq @@ -1461,7 +1461,7 @@ define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1475,7 +1475,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2] ; CHECK-NEXT: retq @@ -1496,7 +1496,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,2,2] ; CHECK-NEXT: retq @@ -1510,7 +1510,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,2] ; CHECK-NEXT: retq @@ -1524,7 +1524,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,3,3] ; CHECK-NEXT: retq @@ -1538,7 +1538,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,3,3] ; CHECK-NEXT: retq @@ -1552,7 +1552,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,3] ; CHECK-NEXT: retq @@ -1566,7 +1566,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,3] ; CHECK-NEXT: retq @@ -1589,7 +1589,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,2] ; CHECK-NEXT: retq @@ -1603,7 +1603,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,2] ; CHECK-NEXT: retq @@ -1625,7 +1625,7 @@ define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) { define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,3,2,4,5,7,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1639,7 +1639,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x dou define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,3,2,4,5,7,6] ; CHECK-NEXT: retq @@ -1651,7 +1651,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x d define <8 x double> @test_masked_8xdouble_perm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,7,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1665,7 +1665,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask1(<8 x double> %vec, <8 x dou define <8 x double> @test_masked_z_8xdouble_perm_mask1(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,7,6] ; CHECK-NEXT: retq @@ -1677,7 +1677,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask1(<8 x double> %vec, <8 x d define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,3,5,5,6,7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1691,7 +1691,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x dou define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,3,5,5,6,7] ; CHECK-NEXT: retq @@ -1711,7 +1711,7 @@ define <8 x double> @test_8xdouble_perm_mask3(<8 x double> %vec) { define <8 x double> @test_masked_8xdouble_perm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,2,4,4,6,7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1725,7 +1725,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask3(<8 x double> %vec, <8 x dou define <8 x double> @test_masked_z_8xdouble_perm_mask3(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,7] ; CHECK-NEXT: retq @@ -1746,7 +1746,7 @@ define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,5,4,7,6] ; CHECK-NEXT: retq @@ -1760,7 +1760,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,5,4,7,6] ; CHECK-NEXT: retq @@ -1774,7 +1774,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,3,3,4,5,7,7] ; CHECK-NEXT: retq @@ -1788,7 +1788,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x define <8 x double> @test_masked_z_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,3,3,4,5,7,7] ; CHECK-NEXT: retq @@ -1802,7 +1802,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,4,7,6] ; CHECK-NEXT: retq @@ -1816,7 +1816,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,4,7,6] ; CHECK-NEXT: retq @@ -1839,7 +1839,7 @@ define <8 x double> @test_8xdouble_perm_mem_mask3(<8 x double>* %vp) { define <8 x double> @test_masked_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,0,3,2,4,5,6,7] ; CHECK-NEXT: retq @@ -1853,7 +1853,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x define <8 x double> @test_masked_z_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,2,4,5,6,7] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index e4704ff..56319dc 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2758,7 +2758,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask0(<8 x float> %vec, ; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[0,3],xmm3[0,1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -2774,7 +2774,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask0(<8 x float> %vec ; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3],xmm2[0,1] ; CHECK-NEXT: vzeroupper @@ -2789,7 +2789,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec, ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 ; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[0,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],xmm3[0,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -2806,7 +2806,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask1(<8 x float> %vec ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm2[0,2] ; CHECK-NEXT: vzeroupper @@ -2821,7 +2821,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec, ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 ; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,0],xmm0[0,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,2],xmm3[0,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -2838,7 +2838,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask2(<8 x float> %vec ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[0,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,2],xmm2[0,2] ; CHECK-NEXT: vzeroupper @@ -2862,9 +2862,9 @@ define <4 x float> @test_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec) { define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[3,3,1,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -2879,9 +2879,9 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mask3(<8 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2 -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,1,2] ; CHECK-NEXT: vzeroupper @@ -2910,7 +2910,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask0(<8 x float>* % ; CHECK-NEXT: vmovaps (%rdi), %ymm2 ; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm3[2,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm2[2,0],xmm3[0,1] ; CHECK-NEXT: vzeroupper @@ -2928,7 +2928,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask0(<8 x float>* ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm0, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,0],xmm2[0,1] ; CHECK-NEXT: vzeroupper @@ -2943,10 +2943,10 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask0(<8 x float>* define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vmovaps (%rdi), %ymm2 +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3] +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2] ; CHECK-NEXT: vzeroupper @@ -2961,10 +2961,10 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* % define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vmovaps (%rdi), %ymm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 +; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2] ; CHECK-NEXT: vzeroupper @@ -2982,7 +2982,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask2(<8 x float>* % ; CHECK-NEXT: vmovaps (%rdi), %ymm2 ; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 ; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,0],xmm2[3,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm2[3,1],xmm3[2,0] ; CHECK-NEXT: vzeroupper @@ -3000,7 +3000,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask2(<8 x float>* ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[3,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm0, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm1[3,1],xmm2[2,0] ; CHECK-NEXT: vzeroupper @@ -3031,7 +3031,7 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask3(<8 x float>* % ; CHECK-NEXT: vmovaps (%rdi), %ymm2 ; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 ; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm2[1,3],xmm3[0,2] ; CHECK-NEXT: vzeroupper @@ -3049,7 +3049,7 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask3(<8 x float>* ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[3,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm0, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3],xmm2[0,2] ; CHECK-NEXT: vzeroupper @@ -3078,7 +3078,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mask0(<16 x float> %vec ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,12,10,8,2,11,7] ; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3093,7 +3093,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask0(<16 x float> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,12,10,8,2,11,7] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -3109,7 +3109,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mask1(<16 x float> %vec ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [2,4,11,4,12,7,9,6] ; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3124,7 +3124,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask1(<16 x float> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,4,11,4,12,7,9,6] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -3141,7 +3141,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mask2(<16 x float> %vec ; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0] ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,10,11,6,1,4,4] ; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3157,7 +3157,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask2(<16 x float> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 ; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm2[0,0] ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,10,11,6,1,4,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -3184,7 +3184,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mask3(<16 x float> %vec ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [4,6,1,8,4,12,13,0] ; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3199,7 +3199,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask3(<16 x float> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,6,1,8,4,12,13,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -3227,7 +3227,7 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask0(<16 x float> %vec ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = <12,0,1,2,u,u,u,u> ; CHECK-NEXT: vpermi2ps %ymm0, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqps %xmm0, %xmm2, %k1 ; CHECK-NEXT: vblendmps %xmm4, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3244,7 +3244,7 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask0(<16 x float> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <12,0,1,2,u,u,u,u> ; CHECK-NEXT: vpermi2ps %ymm0, %ymm2, %ymm3 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqps %xmm0, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3257,11 +3257,11 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask0(<16 x float> %v define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3] +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3275,11 +3275,11 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3294,7 +3294,7 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask2(<16 x float> %vec ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm3[0,0],ymm0[0,1],ymm3[4,4],ymm0[4,5] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -3311,7 +3311,7 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask2(<16 x float> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm2[0,0],ymm0[0,1],ymm2[4,4],ymm0[4,5] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3337,12 +3337,12 @@ define <4 x float> @test_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec) { define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7] -; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm3 -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7] +; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3] +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3356,12 +3356,12 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] -; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm2 -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3390,7 +3390,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask0(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [7,6,7,11,5,10,0,4] ; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3407,7 +3407,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask0(<16 x float ; CHECK-NEXT: vmovaps (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,7,11,5,10,0,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -3426,7 +3426,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask1(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [11,0,9,0,7,14,0,8] ; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3443,7 +3443,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask1(<16 x float ; CHECK-NEXT: vmovaps (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [11,0,9,0,7,14,0,8] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -3463,7 +3463,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask2(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [8,5,2,3,2,9,10,1] ; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3481,7 +3481,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2(<16 x float ; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[1,0,0,3] ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [8,5,2,3,2,9,10,1] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -3512,7 +3512,7 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask3(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [7,5,3,3,11,4,12,9] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovaps %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3529,7 +3529,7 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask3(<16 x float ; CHECK-NEXT: vmovaps (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [7,5,3,3,11,4,12,9] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -3559,13 +3559,13 @@ define <4 x float> @test_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp) { define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 -; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,2,3,3] -; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm2 -; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vmovaps (%rdi), %zmm2 +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 +; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[0,2,3,3] +; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2 +; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[3,1,2,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3580,13 +3580,13 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,3,3] -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vmovaps (%rdi), %zmm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 +; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3] +; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1 +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3605,7 +3605,7 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask1(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,10,6,15,4,14,6,15] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3624,7 +3624,7 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask1(<16 x float ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,10,6,15,4,14,6,15] ; CHECK-NEXT: vpermi2ps %ymm1, %ymm2, %ymm3 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3643,7 +3643,7 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask2(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [4,14,4,14,4,14,6,7] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3662,7 +3662,7 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask2(<16 x float ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,14,4,14,4,14,6,7] ; CHECK-NEXT: vpermi2ps %ymm1, %ymm2, %ymm3 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3695,7 +3695,7 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float>* ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = <3,3,15,9,u,u,u,u> ; CHECK-NEXT: vpermi2ps %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3714,7 +3714,7 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask3(<16 x float ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = <3,3,15,9,u,u,u,u> ; CHECK-NEXT: vpermi2ps %ymm2, %ymm1, %ymm3 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3740,7 +3740,7 @@ define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mask0(<4 x double> %v ; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm3[0],xmm0[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -3756,7 +3756,7 @@ define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mask0(<4 x double> ; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm0[0] ; CHECK-NEXT: vzeroupper @@ -3770,7 +3770,7 @@ define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mask1(<4 x double> %v ; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],xmm3[1] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -3786,7 +3786,7 @@ define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mask1(<4 x double> ; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm2[1] ; CHECK-NEXT: vzeroupper @@ -3811,10 +3811,10 @@ define <2 x double> @test_4xdouble_to_2xdouble_perm_mem_mask0(<4 x double>* %vp) define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mem_mask0(<4 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_to_2xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vmovapd (%rdi), %ymm2 +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 +; CHECK-NEXT: vblendpd {{.*#+}} xmm2 = xmm3[0],xmm2[1] +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm1, %k1 ; CHECK-NEXT: vmovapd %xmm2, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -3829,10 +3829,10 @@ define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mem_mask0(<4 x double define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask0(<4 x double>* %vp, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_to_2xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vmovapd (%rdi), %ymm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 +; CHECK-NEXT: vblendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm0, %k1 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -3849,7 +3849,7 @@ define <2 x double> @test_masked_4xdouble_to_2xdouble_perm_mem_mask1(<4 x double ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd (%rdi), %ymm2 ; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} = xmm3[0],xmm2[0] ; CHECK-NEXT: vzeroupper @@ -3866,7 +3866,7 @@ define <2 x double> @test_masked_z_4xdouble_to_2xdouble_perm_mem_mask1(<4 x doub ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd (%rdi), %ymm1 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm0, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm1[0] ; CHECK-NEXT: vzeroupper @@ -3895,7 +3895,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask0(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,7,3,7] ; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3910,7 +3910,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask0(<8 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [3,7,3,7] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -3926,7 +3926,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask1(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [2,0,7,6] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -3941,7 +3941,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask1(<8 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,0,7,6] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -3954,7 +3954,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask1(<8 x double> define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask2(<8 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,3,2,0] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -3968,7 +3968,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask2(<8 x double> %v define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,2,0] ; CHECK-NEXT: retq @@ -3994,7 +3994,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4009,7 +4009,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -4022,9 +4022,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm3[1] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm3[1] +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,0,1,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4038,9 +4038,9 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %v define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm2[1] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,1,1] ; CHECK-NEXT: retq @@ -4055,7 +4055,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask5(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [2,6,2,2] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4070,7 +4070,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask5(<8 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,6,2,2] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -4097,7 +4097,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask6(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,4,3,4] ; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4112,7 +4112,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask6(<8 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [1,4,3,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm0, %ymm3, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -4128,7 +4128,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask7(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,5,0,6] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 ; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4143,7 +4143,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask7(<8 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [3,5,0,6] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -4171,7 +4171,7 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask0(<8 x double> %v ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,6,2,6] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %xmm0, %xmm2, %k1 ; CHECK-NEXT: vblendmpd %xmm4, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -4188,7 +4188,7 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask0(<8 x double> ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [0,6,2,6] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %xmm0, %xmm1, %k1 ; CHECK-NEXT: vmovapd %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -4201,10 +4201,10 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask0(<8 x double> define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %vec, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_to_2xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3] -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -4218,10 +4218,10 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %v define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask1(<8 x double> %vec, <2 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_to_2xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -4250,7 +4250,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask0(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,6,7,2] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4267,7 +4267,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask0(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [1,6,7,2] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4286,7 +4286,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [3,4,2,4] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4303,7 +4303,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4322,7 +4322,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask2(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,2,3,4] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4339,7 +4339,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask2(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [1,2,3,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4370,7 +4370,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask3(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [4,2,1,0] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4387,7 +4387,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask3(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [4,2,1,0] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4406,7 +4406,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask4(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [2,4,1,5] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4423,7 +4423,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask4(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [2,4,1,5] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4442,7 +4442,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask5(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [6,1,1,1] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4459,7 +4459,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask5(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [6,1,1,1] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4492,7 +4492,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask6(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,6,1] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4510,7 +4510,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6(<8 x doub ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,6,1] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm2, %ymm3, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4529,7 +4529,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask7(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,5,2,5] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vmovapd %ymm4, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -4546,7 +4546,7 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask7(<8 x doub ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,5,2,5] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm0, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -4579,7 +4579,7 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mem_mask0(<8 x double ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3 ; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,6,3,6] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm2, %ymm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovapd %xmm4, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper @@ -4598,7 +4598,7 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask0(<8 x doub ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2 ; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [1,6,3,6] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm1, %ymm3 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vmovapd %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper @@ -4615,7 +4615,7 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mem_mask1(<8 x double ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd (%rdi), %zmm2 ; CHECK-NEXT: vextractf32x4 $2, %zmm2, %xmm3 -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} = xmm2[1],xmm3[0] ; CHECK-NEXT: vzeroupper @@ -4632,7 +4632,7 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd (%rdi), %zmm1 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm2 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm0, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm1[1],xmm2[0] ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll index 5be6ab8..a360eaa 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll @@ -1735,7 +1735,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1750,7 +1750,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x floa ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1763,7 +1763,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1778,7 +1778,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x floa ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1791,7 +1791,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1806,7 +1806,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x floa ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1828,7 +1828,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1843,7 +1843,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x floa ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1866,7 +1866,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1881,7 +1881,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1896,7 +1896,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1911,7 +1911,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1926,7 +1926,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1941,7 +1941,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -1966,7 +1966,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -1981,7 +1981,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2005,7 +2005,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl ; CHECK-LABEL: test_masked_16xfloat_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2020,7 +2020,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2033,7 +2033,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x fl ; CHECK-LABEL: test_masked_16xfloat_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2048,7 +2048,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2061,7 +2061,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl ; CHECK-LABEL: test_masked_16xfloat_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2076,7 +2076,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2098,7 +2098,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x fl ; CHECK-LABEL: test_masked_16xfloat_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2113,7 +2113,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2136,7 +2136,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2151,7 +2151,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2166,7 +2166,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2181,7 +2181,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2196,7 +2196,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2211,7 +2211,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2236,7 +2236,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2251,7 +2251,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <1 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2273,7 +2273,7 @@ define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2287,7 +2287,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] ; CHECK-NEXT: retq @@ -2299,7 +2299,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x d define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2313,7 +2313,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] ; CHECK-NEXT: retq @@ -2325,7 +2325,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x d define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2339,7 +2339,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] ; CHECK-NEXT: retq @@ -2359,7 +2359,7 @@ define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2373,7 +2373,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] ; CHECK-NEXT: retq @@ -2394,7 +2394,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] ; CHECK-NEXT: retq @@ -2408,7 +2408,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] ; CHECK-NEXT: retq @@ -2422,7 +2422,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] ; CHECK-NEXT: retq @@ -2436,7 +2436,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] ; CHECK-NEXT: retq @@ -2450,7 +2450,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] ; CHECK-NEXT: retq @@ -2464,7 +2464,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] ; CHECK-NEXT: retq @@ -2487,7 +2487,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] ; CHECK-NEXT: retq @@ -2501,7 +2501,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] ; CHECK-NEXT: retq @@ -2525,7 +2525,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2540,7 +2540,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x d ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2552,7 +2552,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x d define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2566,7 +2566,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] ; CHECK-NEXT: retq @@ -2579,7 +2579,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2594,7 +2594,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x d ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2614,7 +2614,7 @@ define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2628,7 +2628,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] ; CHECK-NEXT: retq @@ -2641,7 +2641,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2656,7 +2656,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x d ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2668,7 +2668,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x d define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2682,7 +2682,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] ; CHECK-NEXT: retq @@ -2704,7 +2704,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2719,7 +2719,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x d ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2731,7 +2731,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x d define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2745,7 +2745,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] ; CHECK-NEXT: retq @@ -2768,7 +2768,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2783,7 +2783,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2797,7 +2797,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] ; CHECK-NEXT: retq @@ -2811,7 +2811,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] ; CHECK-NEXT: retq @@ -2826,7 +2826,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2841,7 +2841,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2864,7 +2864,7 @@ define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] ; CHECK-NEXT: retq @@ -2878,7 +2878,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] ; CHECK-NEXT: retq @@ -2893,7 +2893,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2908,7 +2908,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2922,7 +2922,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] ; CHECK-NEXT: retq @@ -2936,7 +2936,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] ; CHECK-NEXT: retq @@ -2961,7 +2961,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq @@ -2976,7 +2976,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -2990,7 +2990,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] ; CHECK-NEXT: retq @@ -3004,7 +3004,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll index ff840e6..992d434 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll @@ -12,7 +12,7 @@ define <4 x float> @test_4xfloat_shuff_mask0(<4 x float> %vec1, <4 x float> %vec define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[2,1],xmm1[3,1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -26,7 +26,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x floa define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],xmm1[3,1] ; CHECK-NEXT: retq @@ -38,7 +38,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,2],xmm1[3,2] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -52,7 +52,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x floa define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2],xmm1[3,2] ; CHECK-NEXT: retq @@ -64,7 +64,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,3],xmm1[2,1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -78,7 +78,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x floa define <4 x float> @test_4xfloat_zero_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm1[2,1] ; CHECK-NEXT: retq @@ -98,7 +98,7 @@ define <4 x float> @test_4xfloat_shuff_mask3(<4 x float> %vec1, <4 x float> %vec define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[3,3],xmm1[3,3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -112,7 +112,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x floa define <4 x float> @test_4xfloat_zero_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],xmm1[3,3] ; CHECK-NEXT: retq @@ -133,7 +133,7 @@ define <4 x float> @test_4xfloat_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,0],mem[1,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -148,7 +148,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0],mem[1,2] ; CHECK-NEXT: retq @@ -162,7 +162,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,3],mem[1,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -177,7 +177,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],mem[1,3] ; CHECK-NEXT: retq @@ -191,7 +191,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],mem[2,0] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -206,7 +206,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],mem[2,0] ; CHECK-NEXT: retq @@ -229,7 +229,7 @@ define <4 x float> @test_4xfloat_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[2,1],mem[3,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -244,7 +244,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],mem[3,2] ; CHECK-NEXT: retq @@ -266,7 +266,7 @@ define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -280,7 +280,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] ; CHECK-NEXT: retq @@ -292,7 +292,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -306,7 +306,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5] ; CHECK-NEXT: retq @@ -318,7 +318,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -332,7 +332,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6] ; CHECK-NEXT: retq @@ -352,7 +352,7 @@ define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -366,7 +366,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] ; CHECK-NEXT: retq @@ -387,7 +387,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -402,7 +402,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] ; CHECK-NEXT: retq @@ -416,7 +416,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -431,7 +431,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4] ; CHECK-NEXT: retq @@ -445,7 +445,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -460,7 +460,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7] ; CHECK-NEXT: retq @@ -483,7 +483,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -498,7 +498,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] ; CHECK-NEXT: retq @@ -520,7 +520,7 @@ define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -534,7 +534,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] ; CHECK-NEXT: retq @@ -546,7 +546,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -560,7 +560,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15] ; CHECK-NEXT: retq @@ -572,7 +572,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -586,7 +586,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13] ; CHECK-NEXT: retq @@ -606,7 +606,7 @@ define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -620,7 +620,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] ; CHECK-NEXT: retq @@ -641,7 +641,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -656,7 +656,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] ; CHECK-NEXT: retq @@ -670,7 +670,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -685,7 +685,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14] ; CHECK-NEXT: retq @@ -699,7 +699,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -714,7 +714,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14] ; CHECK-NEXT: retq @@ -737,7 +737,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -752,7 +752,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] ; CHECK-NEXT: retq @@ -774,7 +774,7 @@ define <2 x double> @test_2xdouble_shuff_mask0(<2 x double> %vec1, <2 x double> define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -788,7 +788,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x d define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] ; CHECK-NEXT: retq @@ -800,7 +800,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, < define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -814,7 +814,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x d define <2 x double> @test_2xdouble_zero_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] ; CHECK-NEXT: retq @@ -835,7 +835,7 @@ define <2 x double> @test_2xdouble_shuff_mem_mask0(<2 x double> %vec1, <2 x doub define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -850,7 +850,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2 define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0] ; CHECK-NEXT: retq @@ -864,7 +864,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -879,7 +879,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2 define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0] ; CHECK-NEXT: retq @@ -901,7 +901,7 @@ define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -915,7 +915,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[3] ; CHECK-NEXT: retq @@ -927,7 +927,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -941,7 +941,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: retq @@ -953,7 +953,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -967,7 +967,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: retq @@ -987,7 +987,7 @@ define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -1001,7 +1001,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[3] ; CHECK-NEXT: retq @@ -1022,7 +1022,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1037,7 +1037,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[2] ; CHECK-NEXT: retq @@ -1051,7 +1051,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1066,7 +1066,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: retq @@ -1080,7 +1080,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[3],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1095,7 +1095,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[3],mem[2] ; CHECK-NEXT: retq @@ -1118,7 +1118,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1133,7 +1133,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: retq @@ -1155,7 +1155,7 @@ define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1169,7 +1169,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq @@ -1181,7 +1181,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1195,7 +1195,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7] ; CHECK-NEXT: retq @@ -1207,7 +1207,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1221,7 +1221,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6] ; CHECK-NEXT: retq @@ -1241,7 +1241,7 @@ define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1255,7 +1255,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] ; CHECK-NEXT: retq @@ -1276,7 +1276,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1291,7 +1291,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] ; CHECK-NEXT: retq @@ -1305,7 +1305,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1320,7 +1320,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7] ; CHECK-NEXT: retq @@ -1334,7 +1334,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1349,7 +1349,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq @@ -1372,7 +1372,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1387,7 +1387,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll index e6a1c05..47b8ea4 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll @@ -14,7 +14,7 @@ define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -28,7 +28,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] ; CHECK-NEXT: retq @@ -40,7 +40,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -54,7 +54,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] ; CHECK-NEXT: retq @@ -66,7 +66,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -80,7 +80,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] ; CHECK-NEXT: retq @@ -100,7 +100,7 @@ define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -114,7 +114,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x floa define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] ; CHECK-NEXT: retq @@ -135,7 +135,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -150,7 +150,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] ; CHECK-NEXT: retq @@ -164,7 +164,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -179,7 +179,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] ; CHECK-NEXT: retq @@ -193,7 +193,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -208,7 +208,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] ; CHECK-NEXT: retq @@ -231,7 +231,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -246,7 +246,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] ; CHECK-NEXT: retq @@ -268,7 +268,7 @@ define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -282,7 +282,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] ; CHECK-NEXT: retq @@ -294,7 +294,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -308,7 +308,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] ; CHECK-NEXT: retq @@ -320,7 +320,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -334,7 +334,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq @@ -354,7 +354,7 @@ define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -368,7 +368,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] ; CHECK-NEXT: retq @@ -389,7 +389,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -404,7 +404,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: retq @@ -418,7 +418,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -433,7 +433,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: retq @@ -447,7 +447,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -462,7 +462,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] ; CHECK-NEXT: retq @@ -485,7 +485,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -500,7 +500,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] ; CHECK-NEXT: retq @@ -522,7 +522,7 @@ define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -536,7 +536,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: retq @@ -548,7 +548,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -562,7 +562,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: retq @@ -574,7 +574,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -588,7 +588,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: retq @@ -608,7 +608,7 @@ define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -622,7 +622,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: retq @@ -643,7 +643,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -658,7 +658,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] ; CHECK-NEXT: retq @@ -672,7 +672,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -687,7 +687,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] ; CHECK-NEXT: retq @@ -701,7 +701,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -716,7 +716,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] ; CHECK-NEXT: retq @@ -739,7 +739,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -754,7 +754,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] ; CHECK-NEXT: retq @@ -776,7 +776,7 @@ define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -790,7 +790,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] ; CHECK-NEXT: retq @@ -802,7 +802,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -816,7 +816,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] ; CHECK-NEXT: retq @@ -828,7 +828,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -842,7 +842,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] ; CHECK-NEXT: retq @@ -862,7 +862,7 @@ define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -876,7 +876,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] ; CHECK-NEXT: retq @@ -897,7 +897,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -912,7 +912,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] ; CHECK-NEXT: retq @@ -926,7 +926,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -941,7 +941,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] ; CHECK-NEXT: retq @@ -955,7 +955,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -970,7 +970,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] ; CHECK-NEXT: retq @@ -993,7 +993,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1008,7 +1008,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll b/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll index 5eca7f0..d449eb6 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll @@ -12,7 +12,7 @@ define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -26,7 +26,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq @@ -38,7 +38,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -52,7 +52,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq @@ -64,7 +64,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -78,7 +78,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq @@ -98,7 +98,7 @@ define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -112,7 +112,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq @@ -133,7 +133,7 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x fl define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -148,7 +148,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq @@ -162,7 +162,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %v define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -177,7 +177,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq @@ -191,7 +191,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %v define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -206,7 +206,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq @@ -229,7 +229,7 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x fl define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -244,7 +244,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq @@ -266,7 +266,7 @@ define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -280,7 +280,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq @@ -292,7 +292,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -306,7 +306,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq @@ -318,7 +318,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -332,7 +332,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq @@ -352,7 +352,7 @@ define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -366,7 +366,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq @@ -387,7 +387,7 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x fl define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -402,7 +402,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq @@ -416,7 +416,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %v define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -431,7 +431,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq @@ -445,7 +445,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %v define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -460,7 +460,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq @@ -483,7 +483,7 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x fl define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -498,7 +498,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq @@ -520,7 +520,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x fl define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -534,7 +534,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq @@ -546,7 +546,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %ve define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -560,7 +560,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq @@ -572,7 +572,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %ve define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -586,7 +586,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq @@ -606,7 +606,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x fl define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -620,7 +620,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq @@ -641,7 +641,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -656,7 +656,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq @@ -670,7 +670,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -685,7 +685,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq @@ -699,7 +699,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -714,7 +714,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq @@ -737,7 +737,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -752,7 +752,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq @@ -774,7 +774,7 @@ define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x dou define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -788,7 +788,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, < define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -800,7 +800,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %ve define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -814,7 +814,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, < define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] ; CHECK-NEXT: retq @@ -835,7 +835,7 @@ define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -850,7 +850,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] ; CHECK-NEXT: retq @@ -864,7 +864,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -879,7 +879,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] ; CHECK-NEXT: retq @@ -901,7 +901,7 @@ define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x dou define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -915,7 +915,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq @@ -927,7 +927,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %ve define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -941,7 +941,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq @@ -953,7 +953,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %ve define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -967,7 +967,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq @@ -987,7 +987,7 @@ define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x dou define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -1001,7 +1001,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq @@ -1022,7 +1022,7 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1037,7 +1037,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq @@ -1051,7 +1051,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1066,7 +1066,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq @@ -1080,7 +1080,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1095,7 +1095,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq @@ -1118,7 +1118,7 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -1133,7 +1133,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq @@ -1155,7 +1155,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x dou define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1169,7 +1169,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq @@ -1181,7 +1181,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %ve define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1195,7 +1195,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq @@ -1207,7 +1207,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %ve define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1221,7 +1221,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq @@ -1241,7 +1241,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x dou define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -1255,7 +1255,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq @@ -1276,7 +1276,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1291,7 +1291,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq @@ -1305,7 +1305,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1320,7 +1320,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq @@ -1334,7 +1334,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1349,7 +1349,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq @@ -1372,7 +1372,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -1387,7 +1387,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq @@ -1409,7 +1409,7 @@ define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -1423,7 +1423,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq @@ -1435,7 +1435,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1 define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -1449,7 +1449,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq @@ -1461,7 +1461,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1 define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -1475,7 +1475,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq @@ -1495,7 +1495,7 @@ define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 @@ -1509,7 +1509,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq @@ -1530,7 +1530,7 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x f define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -1545,7 +1545,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq @@ -1559,7 +1559,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> % define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -1574,7 +1574,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq @@ -1588,7 +1588,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> % define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -1603,7 +1603,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq @@ -1626,7 +1626,7 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x f define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 @@ -1641,7 +1641,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) { ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq @@ -1663,7 +1663,7 @@ define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -1677,7 +1677,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq @@ -1689,7 +1689,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1 define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -1703,7 +1703,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq @@ -1715,7 +1715,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1 define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -1729,7 +1729,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq @@ -1749,7 +1749,7 @@ define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 @@ -1763,7 +1763,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq @@ -1784,7 +1784,7 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x f define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1799,7 +1799,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq @@ -1813,7 +1813,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> % define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1828,7 +1828,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq @@ -1842,7 +1842,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> % define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1857,7 +1857,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq @@ -1880,7 +1880,7 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x f define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -1895,7 +1895,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) { ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq @@ -1917,7 +1917,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x f define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -1931,7 +1931,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq @@ -1943,7 +1943,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %v define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -1957,7 +1957,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq @@ -1969,7 +1969,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %v define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -1983,7 +1983,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq @@ -2003,7 +2003,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x f define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 @@ -2017,7 +2017,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq @@ -2038,7 +2038,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2053,7 +2053,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq @@ -2067,7 +2067,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2082,7 +2082,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq @@ -2096,7 +2096,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2111,7 +2111,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq @@ -2134,7 +2134,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 @@ -2149,7 +2149,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) { ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq @@ -2171,7 +2171,7 @@ define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x do define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2185,7 +2185,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] ; CHECK-NEXT: retq @@ -2197,7 +2197,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %v define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 @@ -2211,7 +2211,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] ; CHECK-NEXT: retq @@ -2232,7 +2232,7 @@ define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -2247,7 +2247,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %ve define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] ; CHECK-NEXT: retq @@ -2261,7 +2261,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 @@ -2276,7 +2276,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %ve define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) { ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] ; CHECK-NEXT: retq @@ -2298,7 +2298,7 @@ define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x do define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -2312,7 +2312,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq @@ -2324,7 +2324,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %v define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -2338,7 +2338,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq @@ -2350,7 +2350,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %v define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -2364,7 +2364,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq @@ -2384,7 +2384,7 @@ define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x do define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 @@ -2398,7 +2398,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq @@ -2419,7 +2419,7 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2434,7 +2434,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq @@ -2448,7 +2448,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2463,7 +2463,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq @@ -2477,7 +2477,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2492,7 +2492,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq @@ -2515,7 +2515,7 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 @@ -2530,7 +2530,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) { ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq @@ -2552,7 +2552,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x do define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -2566,7 +2566,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq @@ -2578,7 +2578,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %v define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -2592,7 +2592,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq @@ -2604,7 +2604,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %v define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -2618,7 +2618,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq @@ -2638,7 +2638,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x do define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 @@ -2652,7 +2652,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq @@ -2673,7 +2673,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2688,7 +2688,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq @@ -2702,7 +2702,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2717,7 +2717,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq @@ -2731,7 +2731,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2746,7 +2746,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq @@ -2769,7 +2769,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 @@ -2784,7 +2784,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) { ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll index 2bf69cf..3ca4f9a 100644 --- a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll +++ b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -184,26 +184,12 @@ define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind { } define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { -; X64-AVX512VL-LABEL: PR29088: -; X64-AVX512VL: ## %bb.0: -; X64-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi) -; X64-AVX512VL-NEXT: retq -; -; X64-AVX512BWVL-LABEL: PR29088: -; X64-AVX512BWVL: ## %bb.0: -; X64-AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX512BWVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi) -; X64-AVX512BWVL-NEXT: retq -; -; X64-AVX512DQVL-LABEL: PR29088: -; X64-AVX512DQVL: ## %bb.0: -; X64-AVX512DQVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi) -; X64-AVX512DQVL-NEXT: retq +; X64-AVX512-LABEL: PR29088: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] +; X64-AVX512-NEXT: vmovaps %ymm1, (%rsi) +; X64-AVX512-NEXT: retq %ld = load <4 x i32>, <4 x i32>* %p0 store <8 x float> zeroinitializer, <8 x float>* %p1 %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 62fc47a..35dff11 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -2488,7 +2488,7 @@ define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i6 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_and_epi32_rr_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] +; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -2534,12 +2534,12 @@ define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; X86-LABEL: test_mask_and_epi32_rm_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpand (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x00] +; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_and_epi32_rm_128: ; X64: # %bb.0: -; X64-NEXT: vpand (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x07] +; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -2653,7 +2653,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i3 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_and_epi32_rr_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0xc1] +; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -2699,12 +2699,12 @@ define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; X86-LABEL: test_mask_and_epi32_rm_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpand (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0x00] +; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_and_epi32_rm_256: ; X64: # %bb.0: -; X64-NEXT: vpand (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0x07] +; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -2818,7 +2818,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i3 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_or_epi32_rr_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0xc1] +; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -2864,12 +2864,12 @@ define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; X86-LABEL: test_mask_or_epi32_rm_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpor (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0x00] +; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_or_epi32_rm_128: ; X64: # %bb.0: -; X64-NEXT: vpor (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0x07] +; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -2983,7 +2983,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_or_epi32_rr_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0xc1] +; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -3029,12 +3029,12 @@ define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; X86-LABEL: test_mask_or_epi32_rm_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpor (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0x00] +; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_or_epi32_rm_256: ; X64: # %bb.0: -; X64-NEXT: vpor (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0x07] +; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -3148,7 +3148,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_xor_epi32_rr_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc1] +; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -3194,12 +3194,12 @@ define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; X86-LABEL: test_mask_xor_epi32_rm_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpxor (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0x00] +; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_xor_epi32_rm_128: ; X64: # %bb.0: -; X64-NEXT: vpxor (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0x07] +; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -3313,7 +3313,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i3 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_xor_epi32_rr_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0xc1] +; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -3359,12 +3359,12 @@ define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; X86-LABEL: test_mask_xor_epi32_rm_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpxor (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0x00] +; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_xor_epi32_rm_256: ; X64: # %bb.0: -; X64-NEXT: vpxor (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0x07] +; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -3478,7 +3478,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i3 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_andnot_epi32_rr_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1] +; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -3524,12 +3524,12 @@ define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) ; X86-LABEL: test_mask_andnot_epi32_rm_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpandn (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x00] +; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_andnot_epi32_rm_128: ; X64: # %bb.0: -; X64-NEXT: vpandn (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x07] +; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -3643,7 +3643,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_andnot_epi32_rr_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1] +; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -3689,12 +3689,12 @@ define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) ; X86-LABEL: test_mask_andnot_epi32_rm_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpandn (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x00] +; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_andnot_epi32_rm_256: ; X64: # %bb.0: -; X64-NEXT: vpandn (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x07] +; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -3808,7 +3808,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_mask_andnot_epi64_rr_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1] +; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) ret <2 x i64> %res @@ -3854,12 +3854,12 @@ define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) ; X86-LABEL: test_mask_andnot_epi64_rm_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpandn (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x00] +; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_andnot_epi64_rm_128: ; X64: # %bb.0: -; X64-NEXT: vpandn (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x07] +; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) @@ -3976,7 +3976,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_mask_andnot_epi64_rr_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1] +; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) ret <4 x i64> %res @@ -4022,12 +4022,12 @@ define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) ; X86-LABEL: test_mask_andnot_epi64_rm_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpandn (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x00] +; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_andnot_epi64_rm_256: ; X64: # %bb.0: -; X64-NEXT: vpandn (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x07] +; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07] ; X64-NEXT: retq # encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) @@ -4078,10 +4078,10 @@ define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { ; X86-LABEL: test_mask_andnot_epi64_rmb_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] ; X86-NEXT: # xmm1 = mem[0],zero -; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] -; X86-NEXT: vpandn %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1] +; X86-NEXT: vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9] +; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mask_andnot_epi64_rmb_256: diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 0a4cd93..fa3a995 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5392,7 +5392,7 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, < ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; X86-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05] -; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] +; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4] ; X86-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04] ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc] ; X86-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03] @@ -5404,7 +5404,7 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, < ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; X64-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05] -; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] +; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4] ; X64-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04] ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc] ; X64-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03] @@ -5427,7 +5427,7 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; X86-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05] -; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2] ; X86-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03] ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -5437,7 +5437,7 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; X64-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05] -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2] ; X64-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03] ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -5458,7 +5458,7 @@ define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, < ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; X86-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04] -; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] +; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4] ; X86-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05] ; X86-NEXT: vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc] ; X86-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] @@ -5470,7 +5470,7 @@ define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, < ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; X64-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04] -; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] +; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4] ; X64-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05] ; X64-NEXT: vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc] ; X64-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] @@ -5493,7 +5493,7 @@ define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; X86-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05] -; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] +; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4] ; X86-NEXT: vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8] ; X86-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04] ; X86-NEXT: vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd] @@ -5506,7 +5506,7 @@ define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; X64-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05] -; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] +; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4] ; X64-NEXT: vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8] ; X64-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04] ; X64-NEXT: vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd] @@ -5532,7 +5532,7 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05] ; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] -; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] ; X86-NEXT: vaddps %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4] @@ -5545,7 +5545,7 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05] ; X64-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xe2,0x05] -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] ; X64-NEXT: vaddps %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0] ; X64-NEXT: vaddps %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] @@ -5569,7 +5569,7 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05] ; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] -; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] ; X86-NEXT: vaddps %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4] @@ -5582,7 +5582,7 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05] ; X64-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xe2,0x05] -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] ; X64-NEXT: vaddps %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0] ; X64-NEXT: vaddps %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] @@ -5606,7 +5606,7 @@ define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05] ; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] -; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] ; X86-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] ; X86-NEXT: vaddps %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4] @@ -5619,7 +5619,7 @@ define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05] ; X64-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xe2,0x05] -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] ; X64-NEXT: vaddps %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0] ; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] @@ -5643,7 +5643,7 @@ define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05] ; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] -; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] ; X86-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] ; X86-NEXT: vaddps %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4] @@ -5656,7 +5656,7 @@ define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05] ; X64-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xe2,0x05] -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe8,0x57,0xd2] ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] ; X64-NEXT: vaddps %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0] ; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] diff --git a/llvm/test/CodeGen/X86/avx512vl-mov.ll b/llvm/test/CodeGen/X86/avx512vl-mov.ll index 90d9ff3..b508bb1 100644 --- a/llvm/test/CodeGen/X86/avx512vl-mov.ll +++ b/llvm/test/CodeGen/X86/avx512vl-mov.ll @@ -268,7 +268,7 @@ define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { ; CHECK-LABEL: test_256_25: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] ; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -282,7 +282,7 @@ define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { ; CHECK-LABEL: test_256_26: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] ; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -296,7 +296,7 @@ define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { ; CHECK-LABEL: test_256_27: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -310,7 +310,7 @@ define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { ; CHECK-LABEL: test_256_28: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index ebe7def..30bba8d 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -97,20 +97,10 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { ; fold (abs x) -> x iff not-negative define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) { -; AVX2-LABEL: combine_v16i8_abs_constant: -; AVX2: # %bb.0: -; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: combine_v16i8_abs_constant: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: combine_v16i8_abs_constant: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; CHECK-LABEL: combine_v16i8_abs_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: retq %1 = insertelement <16 x i8> undef, i8 15, i32 0 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer %3 = and <16 x i8> %a, %2 diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll index 47c1f7c..5b39cb1 100644 --- a/llvm/test/CodeGen/X86/nontemporal-2.ll +++ b/llvm/test/CodeGen/X86/nontemporal-2.ll @@ -116,8 +116,8 @@ define void @test_zero_v4f32(<4 x float>* %dst) { ; ; VLX-LABEL: test_zero_v4f32: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %xmm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %xmm0, (%rdi) ; VLX-NEXT: retq store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1 ret void @@ -138,8 +138,8 @@ define void @test_zero_v4i32(<4 x i32>* %dst) { ; ; VLX-LABEL: test_zero_v4i32: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %xmm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %xmm0, (%rdi) ; VLX-NEXT: retq store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 @@ -161,8 +161,8 @@ define void @test_zero_v2f64(<2 x double>* %dst) { ; ; VLX-LABEL: test_zero_v2f64: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %xmm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %xmm0, (%rdi) ; VLX-NEXT: retq store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1 ret void @@ -183,8 +183,8 @@ define void @test_zero_v2i64(<2 x i64>* %dst) { ; ; VLX-LABEL: test_zero_v2i64: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %xmm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %xmm0, (%rdi) ; VLX-NEXT: retq store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1 ret void @@ -205,8 +205,8 @@ define void @test_zero_v8i16(<8 x i16>* %dst) { ; ; VLX-LABEL: test_zero_v8i16: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %xmm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %xmm0, (%rdi) ; VLX-NEXT: retq store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1 ret void @@ -227,8 +227,8 @@ define void @test_zero_v16i8(<16 x i8>* %dst) { ; ; VLX-LABEL: test_zero_v16i8: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %xmm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %xmm0, (%rdi) ; VLX-NEXT: retq store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1 ret void @@ -253,8 +253,8 @@ define void @test_zero_v8f32(<8 x float>* %dst) { ; ; VLX-LABEL: test_zero_v8f32: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %ymm0, (%rdi) ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1 @@ -278,8 +278,8 @@ define void @test_zero_v8i32(<8 x i32>* %dst) { ; ; VLX-LABEL: test_zero_v8i32: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %ymm0, (%rdi) ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1 @@ -303,8 +303,8 @@ define void @test_zero_v4f64(<4 x double>* %dst) { ; ; VLX-LABEL: test_zero_v4f64: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %ymm0, (%rdi) ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1 @@ -328,8 +328,8 @@ define void @test_zero_v4i64(<4 x i64>* %dst) { ; ; VLX-LABEL: test_zero_v4i64: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %ymm0, (%rdi) ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1 @@ -353,8 +353,8 @@ define void @test_zero_v16i16(<16 x i16>* %dst) { ; ; VLX-LABEL: test_zero_v16i16: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %ymm0, (%rdi) ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1 @@ -378,8 +378,8 @@ define void @test_zero_v32i8(<32 x i8>* %dst) { ; ; VLX-LABEL: test_zero_v32i8: ; VLX: # %bb.0: -; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; VLX-NEXT: vmovntdq %ymm0, (%rdi) +; VLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vmovntps %ymm0, (%rdi) ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1 diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll index 218ce26..55bf437 100644 --- a/llvm/test/CodeGen/X86/subvector-broadcast.ll +++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll @@ -756,32 +756,14 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X32-AVX-NEXT: vmovaps %xmm1, (%eax) ; X32-AVX-NEXT: retl ; -; X32-AVX512F-LABEL: test_broadcast_4i32_8i32_chain: -; X32-AVX512F: # %bb.0: -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512F-NEXT: retl -; -; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: -; X32-AVX512BW: # %bb.0: -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512BW-NEXT: retl -; -; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: -; X32-AVX512DQ: # %bb.0: -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) -; X32-AVX512DQ-NEXT: retl +; X32-AVX512-LABEL: test_broadcast_4i32_8i32_chain: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] +; X32-AVX512-NEXT: vmovaps %xmm1, (%eax) +; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX: # %bb.0: @@ -790,26 +772,12 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X64-AVX-NEXT: vmovaps %xmm1, (%rsi) ; X64-AVX-NEXT: retq ; -; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512F-NEXT: retq -; -; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: -; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) -; X64-AVX512DQ-NEXT: retq +; X64-AVX512-LABEL: test_broadcast_4i32_8i32_chain: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] +; X64-AVX512-NEXT: vmovaps %xmm1, (%rsi) +; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 store <4 x float> zeroinitializer, <4 x float>* %p1 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> @@ -827,32 +795,14 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>* ; X32-AVX-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX-NEXT: retl ; -; X32-AVX512F-LABEL: test_broadcast_4i32_16i32_chain: -; X32-AVX512F: # %bb.0: -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512F-NEXT: retl -; -; X32-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain: -; X32-AVX512BW: # %bb.0: -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512BW-NEXT: retl -; -; X32-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain: -; X32-AVX512DQ: # %bb.0: -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) -; X32-AVX512DQ-NEXT: retl +; X32-AVX512-LABEL: test_broadcast_4i32_16i32_chain: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; X32-AVX512-NEXT: vmovaps %xmm1, (%eax) +; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_4i32_16i32_chain: ; X64-AVX: # %bb.0: @@ -862,26 +812,12 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>* ; X64-AVX-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX-NEXT: retq ; -; X64-AVX512F-LABEL: test_broadcast_4i32_16i32_chain: -; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512F-NEXT: retq -; -; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain: -; X64-AVX512BW: # %bb.0: -; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain: -; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) -; X64-AVX512DQ-NEXT: retq +; X64-AVX512-LABEL: test_broadcast_4i32_16i32_chain: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vmovaps %xmm1, (%rsi) +; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 store <4 x float> zeroinitializer, <4 x float>* %p1 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll index 9aa9b52..6fb0033 100644 --- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll @@ -57,19 +57,12 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind { } define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { -; AVX512VL-LABEL: v2f64: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512VLDQ-LABEL: v2f64: -; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX512VLDQ-NEXT: retq +; CHECK-LABEL: v2f64: +; CHECK: ## %bb.0: +; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b ) ret <2 x double> %tmp } diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 892599a..c73913d 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -10,35 +10,15 @@ ; 2013. define <2 x double> @fabs_v2f64(<2 x double> %p) { -; X32_AVX-LABEL: fabs_v2f64: -; X32_AVX: # %bb.0: -; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32_AVX-NEXT: retl -; -; X32_AVX512VL-LABEL: fabs_v2f64: -; X32_AVX512VL: # %bb.0: -; X32_AVX512VL-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32_AVX512VL-NEXT: retl -; -; X32_AVX512VLDQ-LABEL: fabs_v2f64: -; X32_AVX512VLDQ: # %bb.0: -; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32_AVX512VLDQ-NEXT: retl -; -; X64_AVX-LABEL: fabs_v2f64: -; X64_AVX: # %bb.0: -; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64_AVX-NEXT: retq -; -; X64_AVX512VL-LABEL: fabs_v2f64: -; X64_AVX512VL: # %bb.0: -; X64_AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64_AVX512VL-NEXT: retq +; X32-LABEL: fabs_v2f64: +; X32: # %bb.0: +; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-NEXT: retl ; -; X64_AVX512VLDQ-LABEL: fabs_v2f64: -; X64_AVX512VLDQ: # %bb.0: -; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64_AVX512VLDQ-NEXT: retq +; X64-LABEL: fabs_v2f64: +; X64: # %bb.0: +; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: retq %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) ret <2 x double> %t } diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index f419b30..1974ad5 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -25,7 +25,7 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq %c = fcmp ogt <2 x double> %a0, %a1 @@ -62,9 +62,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vandpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -153,9 +153,9 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq %c = fcmp ogt <4 x float> %a0, %a1 @@ -196,11 +196,11 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index dcf01b6..92c2d0b 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -25,7 +25,7 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq %c = fcmp ogt <2 x double> %a0, %a1 @@ -60,9 +60,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vorpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vorpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -145,9 +145,9 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq %c = fcmp ogt <4 x float> %a0, %a1 @@ -184,11 +184,11 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll index fbf9dbc..c38ec37 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll @@ -359,21 +359,13 @@ define float @test_v2f32_zero(<2 x float> %a0) { ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v2f32_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v2f32_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v2f32_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0) ret float %1 } @@ -418,29 +410,17 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v4f32_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v4f32_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v4f32_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0) ret float %1 } @@ -511,47 +491,26 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v8f32_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v8f32_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v8f32_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0) ret float %1 } @@ -671,79 +630,42 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v16f32_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512BW-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v16f32_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddss %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512VL-NEXT: vaddss %xmm3, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v16f32_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] +; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] +; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] +; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] +; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0) ret float %1 } @@ -1304,21 +1226,13 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v2f64_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v2f64_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v2f64_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0) ret double %1 } @@ -1348,31 +1262,18 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v4f64_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v4f64_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v4f64_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0) ret double %1 } @@ -1415,47 +1316,26 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v8f64_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v8f64_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v8f64_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0) ret double %1 } @@ -1524,77 +1404,41 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512BW-LABEL: test_v16f64_zero: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm2 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm3 -; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512BW-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vaddsd %xmm0, %xmm2, %xmm2 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512BW-NEXT: vaddsd %xmm0, %xmm2, %xmm0 -; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm1, %xmm2 -; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm1, %xmm1 -; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512BW-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VL-LABEL: test_v16f64_zero: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm2 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm3 -; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512VL-NEXT: vaddsd %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VL-NEXT: vaddsd %xmm0, %xmm2, %xmm2 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vaddsd %xmm0, %xmm2, %xmm0 -; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vextractf32x4 $2, %zmm1, %xmm2 -; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vaddsd %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vextractf32x4 $3, %zmm1, %xmm1 -; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512VL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq +; AVX512-LABEL: test_v16f64_zero: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm2 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm2 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2 +; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0) ret double %1 } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index 2ff7ef4..4983737 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -490,15 +490,10 @@ define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz( ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: +; AVX: # %bb.0: +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -1287,19 +1282,12 @@ define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) { ; SSE-NEXT: movaps %xmm0, (%rsi) ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: constant_gets_selected: -; AVX1OR2: # %bb.0: # %entry -; AVX1OR2-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovaps %xmm0, (%rdi) -; AVX1OR2-NEXT: vmovaps %xmm0, (%rsi) -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: constant_gets_selected: -; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) -; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VL-NEXT: retq +; AVX-LABEL: constant_gets_selected: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovaps %xmm0, (%rdi) +; AVX-NEXT: vmovaps %xmm0, (%rsi) +; AVX-NEXT: retq entry: %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8> %shuffle.i = shufflevector <16 x i8> , <16 x i8> %weird_zero, <16 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index c3bc7a9..0869a6a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -746,23 +746,11 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_z1: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_z1: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_z1: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_z1: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> ret <2 x i64> %shuffle } @@ -788,23 +776,11 @@ define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2f64_1z: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2f64_1z: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2f64_1z: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2f64_1z: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle } @@ -817,23 +793,11 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2f64_z0: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2f64_z0: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2f64_z0: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2f64_z0: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle } @@ -863,23 +827,11 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2f64_z1: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2f64_z1: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2f64_z1: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2f64_z1: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle } @@ -891,23 +843,11 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v2f64_bitcast_1z: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2f64_bitcast_1z: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2f64_bitcast_1z: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: retq %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> @@ -937,23 +877,11 @@ define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_bitcast_z123: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_bitcast_z123: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_bitcast_z123: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX-NEXT: retq %bitcast32 = bitcast <2 x i64> %x to <4 x float> %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> , <4 x i32> %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 4e796f0..18aa4b3 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -587,7 +587,7 @@ define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) { ; AVX512VL-SLOW-LABEL: shuffle_v4f64_0z3z: ; AVX512VL-SLOW: # %bb.0: ; AVX512VL-SLOW-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2] -; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] ; AVX512VL-SLOW-NEXT: retq ; @@ -624,9 +624,9 @@ define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) { ; ; AVX512VL-SLOW-LABEL: shuffle_v4f64_1z2z: ; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] -; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0] +; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] ; AVX512VL-SLOW-NEXT: retq ; ; AVX512VL-FAST-LABEL: shuffle_v4f64_1z2z: @@ -1628,9 +1628,9 @@ define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-SLOW-LABEL: shuffle_v4i64_z0z3: ; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3] -; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] +; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3] +; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] ; AVX512VL-SLOW-NEXT: retq ; ; AVX512VL-FAST-LABEL: shuffle_v4i64_z0z3: @@ -1666,9 +1666,9 @@ define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-SLOW-LABEL: shuffle_v4i64_1z2z: ; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] -; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0] +; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] ; AVX512VL-SLOW-NEXT: retq ; ; AVX512VL-FAST-LABEL: shuffle_v4i64_1z2z: @@ -1884,65 +1884,41 @@ entry: } define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize { -; AVX1OR2-LABEL: shuffle_v4f64_0zzz_optsize: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_0zzz_optsize: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_0zzz_optsize: +; ALL: # %bb.0: +; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; ALL-NEXT: retq %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> ret <4 x double> %b } define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize { -; AVX1OR2-LABEL: shuffle_v4i64_0zzz_optsize: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4i64_0zzz_optsize: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4i64_0zzz_optsize: +; ALL: # %bb.0: +; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; ALL-NEXT: retq %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> ret <4 x i64> %b } define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize { -; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_optsize: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_optsize: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_0zzzzzzz_optsize: +; ALL: # %bb.0: +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; ALL-NEXT: retq %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> ret <8 x float> %b } define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize { -; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_optsize: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_optsize: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_0zzzzzzz_optsize: +; ALL: # %bb.0: +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; ALL-NEXT: retq %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> ret <8 x i32> %b } diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index aee7337..3470649 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -1896,25 +1896,10 @@ define <16 x i8> @trunc16i64_16i8_const() { ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: trunc16i64_16i8_const: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc16i64_16i8_const: -; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc16i64_16i8_const: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc16i64_16i8_const: -; AVX512BWVL: # %bb.0: # %entry -; AVX512BWVL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc16i64_16i8_const: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq entry: %0 = trunc <16 x i64> zeroinitializer to <16 x i8> diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll index d9938a3..4db1b18 100644 --- a/llvm/test/CodeGen/X86/vselect-pcmp.ll +++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll @@ -380,7 +380,7 @@ define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x ; ; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq -- 2.7.4