From 5c913e84df88d563bbabb41cdebb8088c1a942c6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 18 Jul 2016 06:14:34 +0000 Subject: [PATCH] [AVX512] Use VMOVAPSZ128rr/VMOVAPS256rr for VR128X/VR256X physreg moves when VLX is supported. Ideally we would use VEX encoded moves instead of EVEX if the high 16 registers aren't referenced, but this a good first step. llvm-svn: 275763 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 21 +- llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 2 +- llvm/test/CodeGen/X86/avx512-calling-conv.ll | 25 ++- llvm/test/CodeGen/X86/avx512-fma.ll | 48 +++-- .../CodeGen/X86/avx512-gather-scatter-intrin.ll | 28 +-- llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 228 ++++++++++----------- llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 60 +++--- llvm/test/CodeGen/X86/avx512ifmavl-intrinsics.ll | 32 +-- llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll | 12 +- .../CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 60 +++--- llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 154 +++++++------- llvm/test/CodeGen/X86/fma_patterns.ll | 36 ++-- llvm/test/CodeGen/X86/masked_gather_scatter.ll | 36 ++-- llvm/test/CodeGen/X86/masked_memop.ll | 14 +- 14 files changed, 396 insertions(+), 360 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 50476d4..6195b33 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4432,13 +4432,22 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg, if (Subtarget.hasBWI()) if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg)) return Opc; - if (X86::VR128XRegClass.contains(DestReg, SrcReg) || - X86::VR256XRegClass.contains(DestReg, SrcReg) || - X86::VR512RegClass.contains(DestReg, SrcReg)) { - DestReg = get512BitSuperRegister(DestReg); - SrcReg = get512BitSuperRegister(SrcReg); + if (X86::VR128XRegClass.contains(DestReg, SrcReg)) { + if (Subtarget.hasVLX()) + return X86::VMOVAPSZ128rr; + DestReg = get512BitSuperRegister(DestReg); + SrcReg = get512BitSuperRegister(SrcReg); + return X86::VMOVAPSZrr; + } + if (X86::VR256XRegClass.contains(DestReg, SrcReg)) { + if (Subtarget.hasVLX()) + return X86::VMOVAPSZ256rr; + DestReg = get512BitSuperRegister(DestReg); + SrcReg = get512BitSuperRegister(SrcReg); + return X86::VMOVAPSZrr; + } + if (X86::VR512RegClass.contains(DestReg, SrcReg)) return X86::VMOVAPSZrr; - } if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) return X86::KMOVWkk; if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) { diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 3c6b595..2a04de5 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1828,7 +1828,7 @@ define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512VL-NEXT: vmovaps %zmm2, %zmm3 +; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ; AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ; AVX512VL-NEXT: vmovups %ymm2, (%eax) ; AVX512VL-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index 35e7448..fce592a 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -340,14 +340,23 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { } define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) { -; ALL_X64-LABEL: test8: -; ALL_X64: ## BB#0: -; ALL_X64-NEXT: testb $1, %dil -; ALL_X64-NEXT: jne LBB8_2 -; ALL_X64-NEXT: ## BB#1: -; ALL_X64-NEXT: vmovaps %zmm1, %zmm0 -; ALL_X64-NEXT: LBB8_2: -; ALL_X64-NEXT: retq +; KNL-LABEL: test8: +; KNL: ## BB#0: +; KNL-NEXT: testb $1, %dil +; KNL-NEXT: jne LBB8_2 +; KNL-NEXT: ## BB#1: +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: LBB8_2: +; KNL-NEXT: retq +; +; SKX-LABEL: test8: +; SKX: ## BB#0: +; SKX-NEXT: testb $1, %dil +; SKX-NEXT: jne LBB8_2 +; SKX-NEXT: ## BB#1: +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: LBB8_2: +; SKX-NEXT: retq ; ; KNL_X32-LABEL: test8: ; KNL_X32: ## BB#0: diff --git a/llvm/test/CodeGen/X86/avx512-fma.ll b/llvm/test/CodeGen/X86/avx512-fma.ll index 73a1c4f..b2d0835 100644 --- a/llvm/test/CodeGen/X86/avx512-fma.ll +++ b/llvm/test/CodeGen/X86/avx512-fma.ll @@ -67,22 +67,34 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 } define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { -; ALL-LABEL: test_x86_fmsub_213: -; ALL: ## BB#0: -; ALL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 -; ALL-NEXT: vmovaps %zmm1, %zmm0 -; ALL-NEXT: retq +; KNL-LABEL: test_x86_fmsub_213: +; KNL: ## BB#0: +; KNL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_x86_fmsub_213: +; SKX: ## BB#0: +; SKX-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %x = fmul double %a0, %a1 %res = fsub double %x, %a2 ret double %res } define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { -; ALL-LABEL: test_x86_fmsub_213_m: -; ALL: ## BB#0: -; ALL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 -; ALL-NEXT: vmovaps %zmm1, %zmm0 -; ALL-NEXT: retq +; KNL-LABEL: test_x86_fmsub_213_m: +; KNL: ## BB#0: +; KNL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_x86_fmsub_213_m: +; SKX: ## BB#0: +; SKX-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -90,11 +102,17 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { } define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { -; ALL-LABEL: test_x86_fmsub_231_m: -; ALL: ## BB#0: -; ALL-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 -; ALL-NEXT: vmovaps %zmm1, %zmm0 -; ALL-NEXT: retq +; KNL-LABEL: test_x86_fmsub_231_m: +; KNL: ## BB#0: +; KNL-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_x86_fmsub_231_m: +; SKX: ## BB#0: +; SKX-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a2 %res = fsub double %x, %a1 diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index 9ba1819..d6bc66b 100644 --- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -183,7 +183,7 @@ define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) ret <8 x float> %res; @@ -281,7 +281,7 @@ define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1} @@ -314,7 +314,7 @@ define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %ymm0, %ymm2 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1} @@ -332,7 +332,7 @@ define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %ymm0, %ymm2 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} @@ -350,7 +350,7 @@ define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1} @@ -369,7 +369,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 @@ -386,7 +386,7 @@ define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1} @@ -404,7 +404,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1} @@ -422,7 +422,7 @@ define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1} @@ -455,7 +455,7 @@ define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %ymm0, %ymm2 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1} @@ -488,7 +488,7 @@ define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1} @@ -507,7 +507,7 @@ define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 @@ -524,7 +524,7 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %ymm0, %ymm2 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1} @@ -542,7 +542,7 @@ define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm2 +; CHECK-NEXT: vmovaps %ymm0, %ymm2 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index e1d9351b..534d5c8 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -937,7 +937,7 @@ define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xda] ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -954,7 +954,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd9] ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -971,7 +971,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xda] ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -986,7 +986,7 @@ define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xda] ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1003,7 +1003,7 @@ define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd9] ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1020,7 +1020,7 @@ define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xda] ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1035,7 +1035,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xda] ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1052,7 +1052,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd9] ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1069,7 +1069,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa8,0xda] ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1084,7 +1084,7 @@ define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xda] ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1101,7 +1101,7 @@ define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd9] ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1118,7 +1118,7 @@ define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xda] ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1136,7 +1136,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd9] ; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xaa,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1154,7 +1154,7 @@ define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd9] ; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xaa,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1171,7 +1171,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd9] ; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xaa,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1188,7 +1188,7 @@ define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd9] ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xaa,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1300,7 +1300,7 @@ define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xda] ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xae,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1317,7 +1317,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd9] ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xae,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1332,7 +1332,7 @@ define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xda] ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xae,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1349,7 +1349,7 @@ define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd9] ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xae,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1364,7 +1364,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xda] ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xae,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1381,7 +1381,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd9] ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xae,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1396,7 +1396,7 @@ define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xda] ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xae,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1413,7 +1413,7 @@ define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd9] ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xae,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1428,7 +1428,7 @@ define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xda] ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xac,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1443,7 +1443,7 @@ define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xda] ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xac,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1458,7 +1458,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xda] ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xac,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1473,7 +1473,7 @@ define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xda] ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xac,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1536,7 +1536,7 @@ define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1553,7 +1553,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd9] ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1570,7 +1570,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1585,7 +1585,7 @@ define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1602,7 +1602,7 @@ define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd9] ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1619,7 +1619,7 @@ define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1634,7 +1634,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1651,7 +1651,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd9] ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1668,7 +1668,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1683,7 +1683,7 @@ define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1700,7 +1700,7 @@ define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd9] ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1717,7 +1717,7 @@ define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xda] ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -1734,7 +1734,7 @@ define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd9] ; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa7,0xc2] ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] @@ -1751,7 +1751,7 @@ define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd9] ; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa7,0xc2] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] @@ -1768,7 +1768,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] ; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd9] ; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa7,0xc2] ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] @@ -1785,7 +1785,7 @@ define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] ; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd9] ; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa7,0xc2] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] @@ -2009,7 +2009,7 @@ define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2040,7 +2040,7 @@ define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -2074,7 +2074,7 @@ define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2105,7 +2105,7 @@ define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -2139,7 +2139,7 @@ define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2170,7 +2170,7 @@ define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -2204,7 +2204,7 @@ define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2235,7 +2235,7 @@ define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -2464,7 +2464,7 @@ define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2495,7 +2495,7 @@ define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -2529,7 +2529,7 @@ define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2560,7 +2560,7 @@ define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -2595,7 +2595,7 @@ define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2626,7 +2626,7 @@ define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) @@ -2661,7 +2661,7 @@ define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2699,7 +2699,7 @@ define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2730,7 +2730,7 @@ define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) @@ -2765,7 +2765,7 @@ define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2803,7 +2803,7 @@ define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -2834,7 +2834,7 @@ define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) @@ -2868,7 +2868,7 @@ define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <3 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -2899,7 +2899,7 @@ define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) @@ -2934,7 +2934,7 @@ define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2965,7 +2965,7 @@ define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) @@ -3000,7 +3000,7 @@ define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -3038,7 +3038,7 @@ define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <1 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3069,7 +3069,7 @@ define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) @@ -3104,7 +3104,7 @@ define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <1 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -3142,7 +3142,7 @@ define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3173,7 +3173,7 @@ define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) @@ -3207,7 +3207,7 @@ define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -3238,7 +3238,7 @@ define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) @@ -3272,7 +3272,7 @@ define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -3303,7 +3303,7 @@ define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -3337,7 +3337,7 @@ define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <1 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3368,7 +3368,7 @@ define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -3402,7 +3402,7 @@ define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -3433,7 +3433,7 @@ define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -3467,7 +3467,7 @@ define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <1 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3498,7 +3498,7 @@ define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -3532,7 +3532,7 @@ define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -3563,7 +3563,7 @@ define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -3597,7 +3597,7 @@ define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <1 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3628,7 +3628,7 @@ define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -3662,7 +3662,7 @@ define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -3693,7 +3693,7 @@ define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -3727,7 +3727,7 @@ define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <1 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3758,7 +3758,7 @@ define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -3792,7 +3792,7 @@ define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3823,7 +3823,7 @@ define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -3857,7 +3857,7 @@ define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -3888,7 +3888,7 @@ define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -3922,7 +3922,7 @@ define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3953,7 +3953,7 @@ define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -3987,7 +3987,7 @@ define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -4018,7 +4018,7 @@ define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -4052,7 +4052,7 @@ define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -4083,7 +4083,7 @@ define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -4117,7 +4117,7 @@ define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -4148,7 +4148,7 @@ define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -4182,7 +4182,7 @@ define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -4213,7 +4213,7 @@ define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -4247,7 +4247,7 @@ define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -4278,7 +4278,7 @@ define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -4560,7 +4560,7 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xda] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] ; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] @@ -4577,7 +4577,7 @@ define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xda] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] ; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] @@ -4594,7 +4594,7 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xda] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] ; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] @@ -4611,7 +4611,7 @@ define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <1 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xda] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] ; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] @@ -4628,7 +4628,7 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xda] ; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xca] ; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] @@ -4645,7 +4645,7 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xda] ; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xca] ; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 12f37a0..689eb16 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -118,7 +118,7 @@ define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) ret <4 x i64> %res @@ -149,7 +149,7 @@ define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) @@ -184,7 +184,7 @@ define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -222,7 +222,7 @@ define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) ret <2 x i64> %res @@ -253,7 +253,7 @@ define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) @@ -288,7 +288,7 @@ define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 @@ -326,7 +326,7 @@ define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -357,7 +357,7 @@ define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %pt ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -392,7 +392,7 @@ define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -430,7 +430,7 @@ define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -461,7 +461,7 @@ define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %pt ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -496,7 +496,7 @@ define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -638,7 +638,7 @@ define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -669,7 +669,7 @@ define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -704,7 +704,7 @@ define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -742,7 +742,7 @@ define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -773,7 +773,7 @@ define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -808,7 +808,7 @@ define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -950,7 +950,7 @@ define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -981,7 +981,7 @@ define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -1016,7 +1016,7 @@ define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -1054,7 +1054,7 @@ define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -1085,7 +1085,7 @@ define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -1120,7 +1120,7 @@ define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -1262,7 +1262,7 @@ define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -1293,7 +1293,7 @@ define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -1328,7 +1328,7 @@ define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -1366,7 +1366,7 @@ define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -1397,7 +1397,7 @@ define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -1432,7 +1432,7 @@ define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics.ll index 10ce8f5..1d5febf 100644 --- a/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics.ll @@ -7,9 +7,9 @@ define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} @@ -35,9 +35,9 @@ define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} @@ -63,9 +63,9 @@ define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} @@ -91,9 +91,9 @@ define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} @@ -119,9 +119,9 @@ define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} @@ -147,9 +147,9 @@ define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} @@ -175,9 +175,9 @@ define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} @@ -203,9 +203,9 @@ define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z} -; CHECK-NEXT: vmovaps %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} diff --git a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll index a09523b..b68e711 100644 --- a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -86,7 +86,7 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xda] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca] ; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] @@ -108,7 +108,7 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] @@ -130,7 +130,7 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xda] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca] ; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] @@ -152,7 +152,7 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] @@ -175,7 +175,7 @@ define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xca] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) ret <16 x i8> %res @@ -188,7 +188,7 @@ define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xca] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) ret <32 x i8> %res diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 9ce8998..f9126b4 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -1509,7 +1509,7 @@ define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1540,7 +1540,7 @@ define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1575,7 +1575,7 @@ define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -1613,7 +1613,7 @@ define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -1644,7 +1644,7 @@ define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -1679,7 +1679,7 @@ define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -1717,7 +1717,7 @@ define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1748,7 +1748,7 @@ define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1783,7 +1783,7 @@ define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i3 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -1821,7 +1821,7 @@ define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i3 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -1852,7 +1852,7 @@ define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -1887,7 +1887,7 @@ define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i3 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -1925,7 +1925,7 @@ define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1956,7 +1956,7 @@ define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1991,7 +1991,7 @@ define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2029,7 +2029,7 @@ define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -2060,7 +2060,7 @@ define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -2095,7 +2095,7 @@ define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2133,7 +2133,7 @@ define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -2164,7 +2164,7 @@ define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -2199,7 +2199,7 @@ define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2237,7 +2237,7 @@ define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -2268,7 +2268,7 @@ define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -2303,7 +2303,7 @@ define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2341,7 +2341,7 @@ define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) ret <2 x i64> %res @@ -2372,7 +2372,7 @@ define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) @@ -2407,7 +2407,7 @@ define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 @@ -2445,7 +2445,7 @@ define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) ret <4 x i64> %res @@ -2476,7 +2476,7 @@ define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) @@ -2511,7 +2511,7 @@ define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index cbd0d5e..41376cf 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -834,7 +834,7 @@ define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) ret <4 x double> %res @@ -950,7 +950,7 @@ define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) ret <4 x double> %res @@ -1025,7 +1025,7 @@ define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) ret < 2 x i64> %res @@ -1056,7 +1056,7 @@ define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 4 x i32>, < 4 x i32>* %ptr_b %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) @@ -1092,7 +1092,7 @@ define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 @@ -1132,7 +1132,7 @@ define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) ret < 4 x i64> %res @@ -1163,7 +1163,7 @@ define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 8 x i32>, < 8 x i32>* %ptr_b %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) @@ -1199,7 +1199,7 @@ define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 @@ -1239,7 +1239,7 @@ define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) ret < 2 x i64> %res @@ -1270,7 +1270,7 @@ define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 4 x i32>, < 4 x i32>* %ptr_b %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) @@ -1306,7 +1306,7 @@ define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 @@ -1346,7 +1346,7 @@ define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) ret < 4 x i64> %res @@ -1377,7 +1377,7 @@ define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 8 x i32>, < 8 x i32>* %ptr_b %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) @@ -1413,7 +1413,7 @@ define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 @@ -1453,7 +1453,7 @@ define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1484,7 +1484,7 @@ define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1519,7 +1519,7 @@ define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -1557,7 +1557,7 @@ define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1588,7 +1588,7 @@ define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1623,7 +1623,7 @@ define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -1661,7 +1661,7 @@ define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -1692,7 +1692,7 @@ define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -1727,7 +1727,7 @@ define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -1765,7 +1765,7 @@ define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -1796,7 +1796,7 @@ define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -1831,7 +1831,7 @@ define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -1918,7 +1918,7 @@ define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -1949,7 +1949,7 @@ define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -1980,7 +1980,7 @@ define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -2011,7 +2011,7 @@ define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -2042,7 +2042,7 @@ define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -2073,7 +2073,7 @@ define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -2104,7 +2104,7 @@ define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -2135,7 +2135,7 @@ define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -2166,7 +2166,7 @@ define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -2197,7 +2197,7 @@ define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -2228,7 +2228,7 @@ define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -2259,7 +2259,7 @@ define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -2581,7 +2581,7 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xda] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] ; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1] @@ -2598,7 +2598,7 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xda] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] ; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1] @@ -2615,7 +2615,7 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xda] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] ; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1] @@ -2632,7 +2632,7 @@ define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xda] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] ; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1] @@ -2649,7 +2649,7 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xda] ; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xca] ; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] @@ -2666,7 +2666,7 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xda] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xca] ; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] @@ -2683,7 +2683,7 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] ; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xda] ; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xca] ; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] @@ -2700,7 +2700,7 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xda] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xca] ; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] @@ -4813,7 +4813,7 @@ define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0] @@ -4830,7 +4830,7 @@ define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i3 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0] @@ -4847,7 +4847,7 @@ define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] @@ -4864,7 +4864,7 @@ define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i3 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] @@ -4881,7 +4881,7 @@ define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0] @@ -4898,7 +4898,7 @@ define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i6 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0] @@ -4915,7 +4915,7 @@ define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0] @@ -4932,7 +4932,7 @@ define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i6 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0] @@ -4957,7 +4957,7 @@ define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask) ret <4 x float> %res @@ -4990,7 +4990,7 @@ define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res @@ -5072,7 +5072,7 @@ define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 % ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res @@ -5102,7 +5102,7 @@ define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 % ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) ret <4 x float> %res @@ -5135,7 +5135,7 @@ define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %ma ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res @@ -5165,7 +5165,7 @@ define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %ma ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) ret <4 x float> %res @@ -5198,7 +5198,7 @@ define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) ret <4 x double> %res @@ -5228,7 +5228,7 @@ define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) ret <2 x double> %res @@ -5261,7 +5261,7 @@ define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) ret <4 x double> %res @@ -5291,7 +5291,7 @@ define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) ret <2 x double> %res @@ -6889,7 +6889,7 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05] ; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] ; CHECK-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04] @@ -6911,7 +6911,7 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05] ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03] @@ -6931,7 +6931,7 @@ define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05] @@ -6953,10 +6953,10 @@ define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] -; CHECK-NEXT: vmovaps %zmm0, %zmm5 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe8] +; CHECK-NEXT: vmovaps %ymm0, %ymm5 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe8] ; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04] ; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] ; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd] @@ -6976,9 +6976,9 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] @@ -6999,9 +6999,9 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] @@ -7022,9 +7022,9 @@ define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] @@ -7045,9 +7045,9 @@ define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 5457c9f..62d1b82 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -23,7 +23,7 @@ define float @test_f32_fmadd(float %a0, float %a1, float %a2) { ; AVX512-LABEL: test_f32_fmadd: ; AVX512: # BB#0: ; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul float %a0, %a1 %res = fadd float %x, %a2 @@ -84,7 +84,7 @@ define double @test_f64_fmadd(double %a0, double %a1, double %a2) { ; AVX512-LABEL: test_f64_fmadd: ; AVX512: # BB#0: ; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul double %a0, %a1 %res = fadd double %x, %a2 @@ -149,7 +149,7 @@ define float @test_f32_fmsub(float %a0, float %a1, float %a2) { ; AVX512-LABEL: test_f32_fmsub: ; AVX512: # BB#0: ; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul float %a0, %a1 %res = fsub float %x, %a2 @@ -210,7 +210,7 @@ define double @test_f64_fmsub(double %a0, double %a1, double %a2) { ; AVX512-LABEL: test_f64_fmsub: ; AVX512: # BB#0: ; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -275,7 +275,7 @@ define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { ; AVX512-LABEL: test_f32_fnmadd: ; AVX512: # BB#0: ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul float %a0, %a1 %res = fsub float %a2, %x @@ -336,7 +336,7 @@ define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { ; AVX512-LABEL: test_f64_fnmadd: ; AVX512: # BB#0: ; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul double %a0, %a1 %res = fsub double %a2, %x @@ -401,7 +401,7 @@ define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { ; AVX512-LABEL: test_f32_fnmsub: ; AVX512: # BB#0: ; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul float %a0, %a1 %y = fsub float -0.000000e+00, %x @@ -465,7 +465,7 @@ define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { ; AVX512-LABEL: test_f64_fnmsub: ; AVX512: # BB#0: ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %x = fmul double %a0, %a1 %y = fsub double -0.000000e+00, %x @@ -534,7 +534,7 @@ define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x ; AVX512: # BB#0: ; AVX512-NEXT: vmovaps (%rdi), %xmm2 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovaps %zmm2, %zmm0 +; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %x = load <4 x float>, <4 x float>* %a0 %y = fmul <4 x float> %x, %a1 @@ -557,7 +557,7 @@ define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, < ; AVX512: # BB#0: ; AVX512-NEXT: vmovapd (%rdi), %xmm2 ; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovaps %zmm2, %zmm0 +; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %x = load <2 x double>, <2 x double>* %a0 %y = fmul <2 x double> %x, %a1 @@ -830,7 +830,7 @@ define float @test_f32_interp(float %x, float %y, float %t) { ; AVX512: # BB#0: ; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovaps %zmm2, %zmm0 +; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %t1 = fsub float 1.0, %t %tx = fmul float %x, %t @@ -854,7 +854,7 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float ; ; AVX512-LABEL: test_v4f32_interp: ; AVX512: # BB#0: -; AVX512-NEXT: vmovaps %zmm2, %zmm3 +; AVX512-NEXT: vmovaps %xmm2, %xmm3 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3 ; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0 ; AVX512-NEXT: retq @@ -880,7 +880,7 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float ; ; AVX512-LABEL: test_v8f32_interp: ; AVX512: # BB#0: -; AVX512-NEXT: vmovaps %zmm2, %zmm3 +; AVX512-NEXT: vmovaps %ymm2, %ymm3 ; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3 ; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0 ; AVX512-NEXT: retq @@ -908,7 +908,7 @@ define double @test_f64_interp(double %x, double %y, double %t) { ; AVX512: # BB#0: ; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovaps %zmm2, %zmm0 +; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %t1 = fsub double 1.0, %t %tx = fmul double %x, %t @@ -932,7 +932,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do ; ; AVX512-LABEL: test_v2f64_interp: ; AVX512: # BB#0: -; AVX512-NEXT: vmovaps %zmm2, %zmm3 +; AVX512-NEXT: vmovaps %xmm2, %xmm3 ; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3 ; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0 ; AVX512-NEXT: retq @@ -958,7 +958,7 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ; ; AVX512-LABEL: test_v4f64_interp: ; AVX512: # BB#0: -; AVX512-NEXT: vmovaps %zmm2, %zmm3 +; AVX512-NEXT: vmovaps %ymm2, %ymm3 ; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3 ; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0 ; AVX512-NEXT: retq @@ -1102,7 +1102,7 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: ; AVX512: # BB#0: ; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %m0 = fmul <4 x float> %x, %m1 = fmul <4 x float> %m0, @@ -1129,7 +1129,7 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 { ; AVX512: # BB#0: ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq %m = fmul nsz double %x, %y %n = fsub double -0.0, %m diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 04505a3..72a6f3c 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -265,7 +265,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: vmovaps %ymm2, %ymm0 ; SKX-NEXT: retq %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> , <8 x i32> undef) @@ -305,7 +305,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { ; SKX-NEXT: kmovb %esi, %k1 ; SKX-NEXT: kmovw %k1, %k2 ; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2} -; SKX-NEXT: vmovaps %zmm1, %zmm2 +; SKX-NEXT: vmovaps %ymm1, %ymm2 ; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1} ; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0 ; SKX-NEXT: retq @@ -701,7 +701,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test15: @@ -710,7 +710,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { ; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 +; SKX_32-NEXT: vmovaps %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <4 x i32> %ind to <4 x i64> @@ -756,7 +756,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 ; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1} -; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: vmovaps %ymm2, %ymm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test16: @@ -765,7 +765,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1} -; SKX_32-NEXT: vmovaps %zmm2, %zmm0 +; SKX_32-NEXT: vmovaps %ymm2, %ymm0 ; SKX_32-NEXT: retl %sext_ind = sext <4 x i32> %ind to <4 x i64> @@ -802,7 +802,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test17: @@ -811,7 +811,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovaps %zmm2, %zmm0 +; SKX_32-NEXT: vmovaps %xmm2, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> @@ -1048,7 +1048,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX-NEXT: kshiftlb $6, %k0, %k0 ; SKX-NEXT: kshiftrb $6, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1} -; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test22: @@ -1060,7 +1060,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX_32-NEXT: kshiftrb $6, %k0, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1} -; SKX_32-NEXT: vmovaps %zmm2, %zmm0 +; SKX_32-NEXT: vmovaps %xmm2, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind @@ -1099,7 +1099,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test23: @@ -1108,7 +1108,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovaps %zmm2, %zmm0 +; SKX_32-NEXT: vmovaps %xmm2, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind @@ -1140,7 +1140,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; SKX: # BB#0: ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test24: @@ -1148,7 +1148,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 +; SKX_32-NEXT: vmovaps %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind @@ -1184,7 +1184,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test25: @@ -1193,7 +1193,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovaps %zmm2, %zmm0 +; SKX_32-NEXT: vmovaps %xmm2, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind @@ -1226,7 +1226,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; SKX: # BB#0: ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test26: @@ -1234,7 +1234,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 +; SKX_32-NEXT: vmovaps %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 4220308..2b5e80a 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -237,7 +237,7 @@ define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %mask = icmp eq <2 x i64> %trigger, zeroinitializer %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) @@ -266,7 +266,7 @@ define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %d ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst) @@ -303,7 +303,7 @@ define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) @@ -379,7 +379,7 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %ymm1, %ymm0 ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst) @@ -462,7 +462,7 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> ; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2 ; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 ; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %ymm1, %ymm0 ; SKX-NEXT: retq %mask = icmp eq <8 x i32> %trigger, zeroinitializer %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst) @@ -509,7 +509,7 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %ymm1, %ymm0 ; SKX-NEXT: retq %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst) ret <8 x i32> %res @@ -812,7 +812,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> % ; SKX-NEXT: kshiftlw $14, %k0, %k0 ; SKX-NEXT: kshiftrw $14, %k0, %k1 ; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) -- 2.7.4