From: Craig Topper Date: Mon, 25 Jul 2016 07:20:31 +0000 (+0000) Subject: [AVX512] Add some additional patterns so that we can fold broadcast loads in the... X-Git-Tag: llvmorg-4.0.0-rc1~14312 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=318e40b6f71559f97f604951cebcfbaf7ee86e6a;p=platform%2Fupstream%2Fllvm.git [AVX512] Add some additional patterns so that we can fold broadcast loads in the first argument of an FMADD/FMSUB/FNMADD/FNMSUB/FMADDSUB/FMSUBADD node. Also add patterns to support all combinations of the broadcast input and the preserved input for masked versions. llvm-svn: 276614 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 822097a..0f32954 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4756,10 +4756,9 @@ let Predicates = [HasAVX512] in { // FMA - Fused Multiply Operations // -let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + X86VectorVTInfo _, string Suff> { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm r: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; } + + // Additional pattern for folding broadcast nodes in other orders. + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode _.RC:$src1, _.RC:$src2, + (X86VBroadcast (_.ScalarLdFrag addr:$src3))), + _.RC:$src1)), + (!cast(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1, + _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; } multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + X86VectorVTInfo _, string Suff> { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC; } -} // Constraints = "$src1 = $dst" multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + SDNode OpNodeRnd, AVX512VLVectorVTInfo _, + string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_213_rm, - avx512_fma3_213_round, - EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + defm Z : avx512_fma3p_213_rm, + avx512_fma3_213_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_213_rm, + defm Z256 : avx512_fma3p_213_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_213_rm, + defm Z128 : avx512_fma3p_213_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd > { + SDNode OpNodeRnd > { defm PS : avx512_fma3p_213_common; + avx512vl_f32_info, "PS">; defm PD : avx512_fma3p_213_common, VEX_W; + avx512vl_f64_info, "PD">, VEX_W; } defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; @@ -4824,10 +4831,9 @@ defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddR defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; -let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + X86VectorVTInfo _, string Suff> { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm r: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), _.RC:$src1))>, AVX512FMA3Base, EVEX_B; } + + // Additional patterns for folding broadcast nodes in other orders. + def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), + _.RC:$src2, _.RC:$src1)), + (!cast(NAME#Suff#_.ZSuffix#mb) _.RC:$src1, + _.RC:$src2, addr:$src3)>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), + _.RC:$src2, _.RC:$src1), + _.RC:$src1)), + (!cast(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1, + _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), + _.RC:$src2, _.RC:$src1), + _.ImmAllZerosV)), + (!cast(NAME#Suff#_.ZSuffix#mbkz) _.RC:$src1, + _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; } multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + X86VectorVTInfo _, string Suff> { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC; } -} // Constraints = "$src1 = $dst" multiclass avx512_fma3p_231_common opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + SDNode OpNodeRnd, AVX512VLVectorVTInfo _, + string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_231_rm, - avx512_fma3_231_round, - EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + defm Z : avx512_fma3p_231_rm, + avx512_fma3_231_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_231_rm, + defm Z256 : avx512_fma3p_231_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_231_rm, + defm Z128 : avx512_fma3p_231_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd > { + SDNode OpNodeRnd > { defm PS : avx512_fma3p_231_common; + avx512vl_f32_info, "PS">; defm PD : avx512_fma3p_231_common, VEX_W; + avx512vl_f64_info, "PD">, VEX_W; } defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; @@ -4891,10 +4915,9 @@ defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86Fms defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; -let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + X86VectorVTInfo _, string Suff> { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm r: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), _.RC:$src2))>, AVX512FMA3Base, EVEX_B; } + + // Additional patterns for folding broadcast nodes in other orders. + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), + _.RC:$src1, _.RC:$src2), + _.RC:$src1)), + (!cast(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1, + _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; } multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + X86VectorVTInfo _, string Suff> { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, EVEX_RC; } -} // Constraints = "$src1 = $dst" multiclass avx512_fma3p_132_common opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, AVX512VLVectorVTInfo _> { + SDNode OpNodeRnd, AVX512VLVectorVTInfo _, + string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_132_rm, - avx512_fma3_132_round, - EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; + defm Z : avx512_fma3p_132_rm, + avx512_fma3_132_round, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_132_rm, + defm Z256 : avx512_fma3p_132_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_132_rm, + defm Z128 : avx512_fma3p_132_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd > { + SDNode OpNodeRnd > { defm PS : avx512_fma3p_132_common; + avx512vl_f32_info, "PS">; defm PD : avx512_fma3p_132_common, VEX_W; + avx512vl_f64_info, "PD">, VEX_W; } defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 6d9a016..81bdd94 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1864,8 +1864,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0x17] -; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] +; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_a2 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -1880,8 +1879,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0x17] -; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] +; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_a2, align 4 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0