From: Craig Topper Date: Tue, 2 Aug 2016 05:11:15 +0000 (+0000) Subject: [AVX-512] Correct ExeDomain for many AVX-512 instructions. X-Git-Tag: llvmorg-4.0.0-rc1~13605 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=05948fb36c5f61554844ba3f31a0674a655549a6;p=platform%2Fupstream%2Fllvm.git [AVX-512] Correct ExeDomain for many AVX-512 instructions. llvm-svn: 277416 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 98527cd..3830491 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4224,6 +4224,7 @@ defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8X //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { defm ri : AVX512_maskable opc, Format ImmFormR, Format ImmFormM, (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i8 imm:$src2))), SSE_INTSHIFT_ITINS_P.rm>; + } } multiclass avx512_shift_rmbi opc, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in defm mbi : AVX512_maskable opc, Format ImmFormM, multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { // src2 is always 128-bit + let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))), SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V; + } } multiclass avx512_shift_sizes opc, string OpcodeStr, SDNode OpNode, @@ -4353,6 +4358,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>; //===-------------------------------------------------------------------===// multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (_.VT (bitconvert (_.LdFrag addr:$src2))))), SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + } } multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable OpcImm, bits<8> OpcVar, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; } +let ExeDomain = SSEPackedSingle in defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, avx512vl_i32_info>; +let ExeDomain = SSEPackedDouble in defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// @@ -7196,7 +7206,8 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, // op(broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + X86VectorVTInfo _>{ + let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable opc, string OpcodeStr, SDNode OpNo (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), (i32 imm:$src2), (i32 FROUND_CURRENT))>, EVEX_B; + } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ + let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + X86VectorVTInfo _>{ + let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (i32 imm:$src3), (i32 FROUND_CURRENT))>, EVEX_B; + } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ - + let ExeDomain = DestInfo.ExeDomain in { defm rri : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), (i8 imm:$src3)))>; + } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) @@ -7302,6 +7318,7 @@ multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>: avx512_3Op_rm_imm8{ + let ExeDomain = _.ExeDomain in defm rmbi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, // op(reg_vec2,mem_scalar,imm) //all instruction created with FROUND_CURRENT multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - + X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", []>; } + } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ + let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable, EVEX_4V; multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ - let Constraints = "$src1 = $dst" in { + X86VectorVTInfo _>{ + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src, VEX_W; //===----------------------------------------------------------------------===// multiclass avx512_fixupimm_packed opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ - let Constraints = "$src1 = $dst" in { + X86VectorVTInfo _>{ + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_fixupimm_packed_sae opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _>{ -let Constraints = "$src1 = $dst" in { + SDNode OpNode, X86VectorVTInfo _>{ +let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rrib : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, X86VectorVTInfo _src3VT> { - let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in { + let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], + ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src_scalar o, Format F, dag outs, dag ins, string asm, : Ii8, TAPD, Requires<[HasAVX512]>; class AVX512AIi8Base : TAPD { - Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } class AVX512Ii8 o, Format F, dag outs, dag ins, string asm, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index d0dfb62..63f50ef 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4956,7 +4956,7 @@ define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x d ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm2, %zmm3 +; CHECK-NEXT: vmovapd %zmm2, %zmm3 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z} ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 @@ -5995,7 +5995,7 @@ define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovapd %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1} ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z} @@ -6017,10 +6017,10 @@ define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovapd %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z} ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5 +; CHECK-NEXT: vmovapd %zmm0, %zmm5 ; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z} ; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1 @@ -6041,10 +6041,10 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x fl ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5 +; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vmovaps %zmm0, %zmm5 ; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1} ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1 @@ -6065,11 +6065,11 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x f ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %zmm0, %zmm4 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 @@ -6088,10 +6088,10 @@ define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5 +; CHECK-NEXT: vmovaps %zmm0, %zmm5 ; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1} ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1 @@ -6111,9 +6111,9 @@ define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 +; CHECK-NEXT: vmovaps %zmm0, %zmm4 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} @@ -6135,11 +6135,11 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovapd %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 +; CHECK-NEXT: vmovapd %zmm0, %zmm4 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 @@ -6159,10 +6159,10 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 +; CHECK-NEXT: vmovapd %zmm0, %zmm3 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5 +; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vmovapd %zmm0, %zmm5 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z} ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1 diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 3079f0e..f0c7a3e 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -6889,7 +6889,7 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05] ; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] ; CHECK-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04] @@ -6911,7 +6911,7 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05] ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03] @@ -6931,7 +6931,7 @@ define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, < ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05] @@ -6953,10 +6953,10 @@ define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe8] +; CHECK-NEXT: vmovapd %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xe8] ; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04] ; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] ; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd] @@ -6976,9 +6976,9 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm4 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xe0] +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] @@ -6999,9 +6999,9 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm4 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xe0] +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] @@ -7022,9 +7022,9 @@ define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm4 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe0] +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] @@ -7045,9 +7045,9 @@ define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm4 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe0] +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index aeb93a2..6fc6395 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -2997,12 +2997,12 @@ define <4 x i16> @cvt_4f32_to_4i16(<4 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bx ; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %eax @@ -3109,12 +3109,12 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bx ; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %eax @@ -3225,12 +3225,12 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bx ; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %eax @@ -3410,12 +3410,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %r15d ; AVX512VL-NEXT: orl %ebx, %r15d -; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bx ; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %r14d @@ -3433,12 +3433,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %r15d ; AVX512VL-NEXT: orl %ebx, %r15d -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bx ; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %eax @@ -3677,7 +3677,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: # kill: %AX %AX %EAX ; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: # kill: %AX %AX %EAX @@ -3691,12 +3691,12 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: # kill: %AX %AX %EAX ; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: # kill: %AX %AX %EAX ; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: # kill: %AX %AX %EAX @@ -3720,7 +3720,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: # kill: %AX %AX %EAX ; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %r13w @@ -3731,11 +3731,11 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bp -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %r14w -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %r15w @@ -3888,11 +3888,11 @@ define void @store_cvt_4f32_to_4i16(<4 x float> %a0, <4 x i16>* %a1) nounwind { ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r14d -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r15d -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %ebp @@ -4009,12 +4009,12 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bp ; AVX512VL-NEXT: shll $16, %ebp -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %eax @@ -4133,12 +4133,12 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, %bp ; AVX512VL-NEXT: shll $16, %ebp -; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movzwl %ax, %eax @@ -4286,11 +4286,11 @@ define void @store_cvt_8f32_to_8i16(<8 x float> %a0, <8 x i16>* %a1) nounwind { ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r12d @@ -4300,11 +4300,11 @@ define void @store_cvt_8f32_to_8i16(<8 x float> %a0, <8 x i16>* %a1) nounwind { ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r13d -; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %ebp -; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r14d @@ -4554,11 +4554,11 @@ define void @store_cvt_16f32_to_16i16(<16 x float> %a0, <16 x i16>* %a1) nounwin ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill @@ -4568,11 +4568,11 @@ define void @store_cvt_16f32_to_16i16(<16 x float> %a0, <16 x i16>* %a1) nounwin ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill @@ -4582,11 +4582,11 @@ define void @store_cvt_16f32_to_16i16(<16 x float> %a0, <16 x i16>* %a1) nounwin ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill @@ -4596,11 +4596,11 @@ define void @store_cvt_16f32_to_16i16(<16 x float> %a0, <16 x i16>* %a1) nounwin ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r14d -; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: callq __gnu_f2h_ieee ; AVX512VL-NEXT: movl %eax, %r15d @@ -4671,77 +4671,23 @@ define i16 @cvt_f64_to_i16(double %a0) nounwind { } define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind { -; AVX1-LABEL: cvt_2f64_to_2i16: -; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: subq $16, %rsp -; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX1-NEXT: callq __truncdfhf2 -; AVX1-NEXT: movw %ax, %bx -; AVX1-NEXT: shll $16, %ebx -; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2 -; AVX1-NEXT: movzwl %ax, %eax -; AVX1-NEXT: orl %ebx, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: addq $16, %rsp -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_2f64_to_2i16: -; AVX2: # BB#0: -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: subq $16, %rsp -; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX2-NEXT: callq __truncdfhf2 -; AVX2-NEXT: movw %ax, %bx -; AVX2-NEXT: shll $16, %ebx -; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2 -; AVX2-NEXT: movzwl %ax, %eax -; AVX2-NEXT: orl %ebx, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: addq $16, %rsp -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: retq -; -; AVX512F-LABEL: cvt_2f64_to_2i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $16, %rsp -; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %eax -; AVX512F-NEXT: orl %ebx, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: addq $16, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: cvt_2f64_to_2i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $16, %rsp -; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %eax -; AVX512VL-NEXT: orl %ebx, %eax -; AVX512VL-NEXT: vmovd %eax, %xmm0 -; AVX512VL-NEXT: addq $16, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: retq +; ALL-LABEL: cvt_2f64_to_2i16: +; ALL: # BB#0: +; ALL-NEXT: pushq %rbx +; ALL-NEXT: subq $16, %rsp +; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill +; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; ALL-NEXT: callq __truncdfhf2 +; ALL-NEXT: movw %ax, %bx +; ALL-NEXT: shll $16, %ebx +; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; ALL-NEXT: callq __truncdfhf2 +; ALL-NEXT: movzwl %ax, %eax +; ALL-NEXT: orl %ebx, %eax +; ALL-NEXT: vmovd %eax, %xmm0 +; ALL-NEXT: addq $16, %rsp +; ALL-NEXT: popq %rbx +; ALL-NEXT: retq %1 = fptrunc <2 x double> %a0 to <2 x half> %2 = bitcast <2 x half> %1 to <2 x i16> ret <2 x i16> %2 @@ -4861,7 +4807,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: pushq %r14 ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $40, %rsp -; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill +; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, %bx @@ -5011,7 +4957,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: pushq %r14 ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $40, %rsp -; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill +; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, %bx @@ -5165,7 +5111,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: pushq %r14 ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $40, %rsp -; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill +; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, %bx @@ -5410,7 +5356,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { ; AVX512VL-NEXT: pushq %r14 ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $96, %rsp -; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill +; AVX512VL-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, %bx @@ -5491,81 +5437,24 @@ define void @store_cvt_f64_to_i16(double %a0, i16* %a1) nounwind { } define void @store_cvt_2f64_to_2i16(<2 x double> %a0, <2 x i16>* %a1) nounwind { -; AVX1-LABEL: store_cvt_2f64_to_2i16: -; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: subq $24, %rsp -; AVX1-NEXT: movq %rdi, %rbx -; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX1-NEXT: callq __truncdfhf2 -; AVX1-NEXT: movl %eax, %ebp -; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2 -; AVX1-NEXT: movw %ax, (%rbx) -; AVX1-NEXT: movw %bp, 2(%rbx) -; AVX1-NEXT: addq $24, %rsp -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: popq %rbp -; AVX1-NEXT: retq -; -; AVX2-LABEL: store_cvt_2f64_to_2i16: -; AVX2: # BB#0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: subq $24, %rsp -; AVX2-NEXT: movq %rdi, %rbx -; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX2-NEXT: callq __truncdfhf2 -; AVX2-NEXT: movl %eax, %ebp -; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2 -; AVX2-NEXT: movw %ax, (%rbx) -; AVX2-NEXT: movw %bp, 2(%rbx) -; AVX2-NEXT: addq $24, %rsp -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %rbp -; AVX2-NEXT: retq -; -; AVX512F-LABEL: store_cvt_2f64_to_2i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $24, %rsp -; AVX512F-NEXT: movq %rdi, %rbx -; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %ebp -; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, (%rbx) -; AVX512F-NEXT: movw %bp, 2(%rbx) -; AVX512F-NEXT: addq $24, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: popq %rbp -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: store_cvt_2f64_to_2i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %rbp -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $24, %rsp -; AVX512VL-NEXT: movq %rdi, %rbx -; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %ebp -; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, (%rbx) -; AVX512VL-NEXT: movw %bp, 2(%rbx) -; AVX512VL-NEXT: addq $24, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: popq %rbp -; AVX512VL-NEXT: retq +; ALL-LABEL: store_cvt_2f64_to_2i16: +; ALL: # BB#0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: pushq %rbx +; ALL-NEXT: subq $24, %rsp +; ALL-NEXT: movq %rdi, %rbx +; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill +; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; ALL-NEXT: callq __truncdfhf2 +; ALL-NEXT: movl %eax, %ebp +; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; ALL-NEXT: callq __truncdfhf2 +; ALL-NEXT: movw %ax, (%rbx) +; ALL-NEXT: movw %bp, 2(%rbx) +; ALL-NEXT: addq $24, %rsp +; ALL-NEXT: popq %rbx +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq %1 = fptrunc <2 x double> %a0 to <2 x half> %2 = bitcast <2 x half> %1 to <2 x i16> store <2 x i16> %2, <2 x i16>* %a1 @@ -5692,7 +5581,7 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) nounwind { ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $88, %rsp ; AVX512VL-NEXT: movq %rdi, %rbx -; AVX512VL-NEXT: vmovdqu64 %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %r14d @@ -5855,7 +5744,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $32, %rsp ; AVX512VL-NEXT: movq %rdi, %r14 -; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill +; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, %bp @@ -6026,7 +5915,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $32, %rsp ; AVX512VL-NEXT: movq %rdi, %r14 -; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill +; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, %bp @@ -6281,7 +6170,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { ; AVX512VL-NEXT: pushq %rbx ; AVX512VL-NEXT: subq $200, %rsp ; AVX512VL-NEXT: movq %rdi, %rbx -; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill +; AVX512VL-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index d853655..22b6e28 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -389,45 +389,21 @@ define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_3254: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_3254: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_3254: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_3254: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_3276: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_3276: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_3276: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_3276: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index 4d441aa..6cd0366 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -190,7 +190,7 @@ define <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_2 define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) { ; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01: ; ALL: # BB#0: -; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1> +; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1> ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 ; ALL-NEXT: retq %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index da8ea83..6e2df62 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -51,13 +51,13 @@ define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00000010: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00000010: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -67,13 +67,13 @@ define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00000200: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00000200: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -83,13 +83,13 @@ define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00003000: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00003000: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -99,13 +99,13 @@ define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00040000: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00040000: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -115,13 +115,13 @@ define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00500000: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00500000: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -131,13 +131,13 @@ define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_06000000: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_06000000: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -184,13 +184,13 @@ define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00112233: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00112233: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -200,13 +200,13 @@ define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00001111: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00001111: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -624,13 +624,13 @@ define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_00015444: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00015444: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -641,13 +641,13 @@ define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_00204644: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00204644: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -658,13 +658,13 @@ define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_03004474: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_03004474: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -675,13 +675,13 @@ define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_10004444: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_10004444: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -692,13 +692,13 @@ define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_22006446: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_22006446: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -709,13 +709,13 @@ define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_33307474: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_33307474: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -726,13 +726,13 @@ define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_32104567: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_32104567: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -743,13 +743,13 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_00236744: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00236744: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -760,13 +760,13 @@ define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_00226644: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4] +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4] ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00226644: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0] +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0] ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -837,13 +837,13 @@ define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_002u6u44: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4> +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4> ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_002u6u44: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0> +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0> ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -854,13 +854,13 @@ define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_00uu66uu: ; AVX512F: # BB#0: -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u> +; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u> ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u> +; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u> ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 3e0c3e4..d57e1fe 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -29,11 +29,11 @@ define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x d ; CHECK-LABEL: combine_permvar_8f64_identity_mask: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] +; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] +; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] ; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> , <8 x double> %x1, i8 %m) %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> , <8 x double> %res0, i8 %m) @@ -433,7 +433,7 @@ define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4] -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> , <8 x double> %x1, i8 %m) ret <8 x double> %1