From ead11e4d4bef4e291faf47a0e5b514abbfde4dbd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 11 May 2018 12:46:54 +0000 Subject: [PATCH] [X86] Added scheduler helper classes to split move/load/store by size Nothing uses this yet but this will allow us to specialize MMX/XMM/YMM/ZMM vector moves. llvm-svn: 332090 --- llvm/lib/Target/X86/X86InstrAVX512.td | 202 ++++++++++++++++---------------- llvm/lib/Target/X86/X86InstrMMX.td | 2 +- llvm/lib/Target/X86/X86InstrSSE.td | 211 +++++++++++++++++++--------------- llvm/lib/Target/X86/X86Schedule.td | 44 ++++++- 4 files changed, 261 insertions(+), 198 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c18dc51..85c6f87 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -878,6 +878,7 @@ multiclass vextract_for_type; defm VEXTRACTI : vextract_for_type; @@ -3167,16 +3168,14 @@ defm : operation_subvector_mask_lowering; // AVX-512 - Aligned and unaligned load and store // - multiclass avx512_load opc, string OpcodeStr, X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, - SchedWrite SchedRR, SchedWrite SchedRM, - bit NoRMPattern = 0, + X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX, Sched<[SchedRR]>; + _.ExeDomain>, EVEX, Sched<[Sched.RR]>; def rrkz : AVX512PI opc, string OpcodeStr, [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, (_.VT _.RC:$src), _.ImmAllZerosV)))], _.ExeDomain>, - EVEX, EVEX_KZ, Sched<[SchedRR]>; + EVEX, EVEX_KZ, Sched<[Sched.RR]>; let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def rm : AVX512PI opc, string OpcodeStr, !if(NoRMPattern, [], [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))]), - _.ExeDomain>, EVEX, Sched<[SchedRM]>; + _.ExeDomain>, EVEX, Sched<[Sched.RM]>; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { def rrk : AVX512PI opc, string OpcodeStr, [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, (_.VT _.RC:$src1), (_.VT _.RC:$src0))))], _.ExeDomain>, - EVEX, EVEX_K, Sched<[SchedRR]>; + EVEX, EVEX_K, Sched<[Sched.RR]>; def rmk : AVX512PI opc, string OpcodeStr, (vselect _.KRCWM:$mask, (_.VT (bitconvert (ld_frag addr:$src1))), (_.VT _.RC:$src0))))], _.ExeDomain>, - EVEX, EVEX_K, Sched<[SchedRM]>; + EVEX, EVEX_K, Sched<[Sched.RM]>; } def rmkz : AVX512PI opc, string OpcodeStr, "${dst} {${mask}} {z}, $src}", [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))], - _.ExeDomain>, EVEX, EVEX_KZ, Sched<[SchedRM]>; + _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; } def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), (!cast(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; @@ -3233,65 +3232,65 @@ multiclass avx512_load opc, string OpcodeStr, } multiclass avx512_alignedload_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, - Predicate prd, SchedWrite SchedRR, - SchedWrite SchedRM, bit NoRMPattern = 0> { + AVX512VLVectorVTInfo _, Predicate prd, + X86SchedWriteMoveLSWidths Sched, + bit NoRMPattern = 0> { let Predicates = [prd] in defm Z : avx512_load, EVEX_V512; + Sched.ZMM, NoRMPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load, EVEX_V256; + Sched.YMM, NoRMPattern>, EVEX_V256; defm Z128 : avx512_load, EVEX_V128; + Sched.XMM, NoRMPattern>, EVEX_V128; } } multiclass avx512_load_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, - Predicate prd, SchedWrite SchedRR, - SchedWrite SchedRM, bit NoRMPattern = 0, - SDPatternOperator SelectOprr = vselect> { + AVX512VLVectorVTInfo _, Predicate prd, + X86SchedWriteMoveLSWidths Sched, + bit NoRMPattern = 0, + SDPatternOperator SelectOprr = vselect> { let Predicates = [prd] in defm Z : avx512_load, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load, EVEX_V256; defm Z128 : avx512_load, EVEX_V128; } } multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, - string Name, SchedWrite SchedRR, SchedWrite SchedMR, + string Name, X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { let hasSideEffects = 0 in { def rr_REV : AVX512PI, EVEX, FoldGenData, - Sched<[SchedRR]>; + Sched<[Sched.RR]>; def rrk_REV : AVX512PI, EVEX, EVEX_K, - FoldGenData, Sched<[SchedRR]>; + FoldGenData, Sched<[Sched.RR]>; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ, - FoldGenData, Sched<[SchedRR]>; + FoldGenData, Sched<[Sched.RR]>; } let hasSideEffects = 0, mayStore = 1 in @@ -3299,122 +3298,116 @@ multiclass avx512_store opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), !if(NoMRPattern, [], [(st_frag (_.VT _.RC:$src), addr:$dst)]), - _.ExeDomain>, EVEX, Sched<[SchedMR]>; + _.ExeDomain>, EVEX, Sched<[Sched.MR]>; def mrk : AVX512PI, EVEX, EVEX_K, Sched<[SchedMR]>; + [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>; def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), (!cast(NAME#_.ZSuffix##mrk) addr:$ptr, _.KRCWM:$mask, _.RC:$src)>; } - multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, - string Name, SchedWrite SchedRR, SchedWrite SchedMR, + string Name, X86SchedWriteMoveLSWidths Sched, bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_unaligned, Name#Z256, Sched.YMM, + NoMRPattern>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_unaligned, Name#Z128, Sched.XMM, + NoMRPattern>, EVEX_V128; } } multiclass avx512_alignedstore_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd, - string Name, SchedWrite SchedRR, - SchedWrite SchedMR, bit NoMRPattern = 0> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name, X86SchedWriteMoveLSWidths Sched, + bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_aligned256, Name#Z256, Sched.YMM, + NoMRPattern>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_aligned128, Name#Z128, Sched.XMM, + NoMRPattern>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, - HasAVX512, WriteFMove, WriteFLoad>, + HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512, "VMOVAPS", WriteFMove, - WriteFStore>, + HasAVX512, "VMOVAPS", + SchedWriteFMoveLS>, PS, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, - HasAVX512, WriteFMove, WriteFLoad>, + HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512, "VMOVAPD", WriteFMove, - WriteFStore>, + HasAVX512, "VMOVAPD", + SchedWriteFMoveLS>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, - WriteFMove, WriteFLoad, 0, null_frag>, + SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, - "VMOVUPS", WriteFMove, WriteFStore>, - PS, EVEX_CD8<32, CD8VF>; + "VMOVUPS", SchedWriteFMoveLS>, + PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, - WriteFMove, WriteFLoad, 0, null_frag>, + SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, - "VMOVUPD", WriteFMove, WriteFStore>, + "VMOVUPD", SchedWriteFMoveLS>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, WriteVecMove, WriteVecLoad, - 1>, + HasAVX512, SchedWriteVecMoveLS, 1>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, "VMOVDQA32", WriteVecMove, - WriteVecStore, 1>, + HasAVX512, "VMOVDQA32", + SchedWriteVecMoveLS, 1>, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, WriteVecMove, WriteVecLoad>, + HasAVX512, SchedWriteVecMoveLS>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, "VMOVDQA64", WriteVecMove, - WriteVecStore>, + HasAVX512, "VMOVDQA64", + SchedWriteVecMoveLS>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, - WriteVecMove, WriteVecLoad, 1>, - avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, - HasBWI, "VMOVDQU8", WriteVecMove, - WriteVecStore, 1>, + SchedWriteVecMoveLS, 1>, + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, + "VMOVDQU8", SchedWriteVecMoveLS, 1>, XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, - WriteVecMove, WriteVecLoad, 1>, - avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, - HasBWI, "VMOVDQU16", WriteVecMove, - WriteVecStore, 1>, + SchedWriteVecMoveLS, 1>, + avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, + "VMOVDQU16", SchedWriteVecMoveLS, 1>, XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - WriteVecMove, WriteVecLoad, 1, null_frag>, - avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512, "VMOVDQU32", WriteVecMove, - WriteVecStore, 1>, + SchedWriteVecMoveLS, 1, null_frag>, + avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, + "VMOVDQU32", SchedWriteVecMoveLS, 1>, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, - WriteVecMove, WriteVecLoad, 0, null_frag>, - avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, - HasAVX512, "VMOVDQU64", WriteVecMove, - WriteVecStore>, + SchedWriteVecMoveLS, 0, null_frag>, + avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, + "VMOVDQU64", SchedWriteVecMoveLS>, XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need @@ -4338,33 +4331,34 @@ let Predicates = [HasAVX512] in { def : Pat<(v8i64 (X86vzload addr:$src)), (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; } + //===----------------------------------------------------------------------===// // AVX-512 - Non-temporals //===----------------------------------------------------------------------===// -let SchedRW = [WriteVecLoad] in { - def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), - (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", - [], SSEPackedInt>, EVEX, T8PD, EVEX_V512, - EVEX_CD8<64, CD8VF>; - let Predicates = [HasVLX] in { - def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), - (ins i256mem:$src), - "vmovntdqa\t{$src, $dst|$dst, $src}", - [], SSEPackedInt>, EVEX, T8PD, EVEX_V256, - EVEX_CD8<64, CD8VF>; +def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", + [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, + EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; - def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), - (ins i128mem:$src), - "vmovntdqa\t{$src, $dst|$dst, $src}", - [], SSEPackedInt>, EVEX, T8PD, EVEX_V128, - EVEX_CD8<64, CD8VF>; - } +let Predicates = [HasVLX] in { + def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), + (ins i256mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, + EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; + + def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), + (ins i128mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, + EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; } multiclass avx512_movnt opc, string OpcodeStr, X86VectorVTInfo _, + X86SchedWriteMoveLS Sched, PatFrag st_frag = alignednontemporalstore> { - let SchedRW = [WriteVecStore], AddedComplexity = 400 in + let SchedRW = [Sched.MR], AddedComplexity = 400 in def mr : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, } multiclass avx512_movnt_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo VTInfo> { + AVX512VLVectorVTInfo VTInfo, + X86SchedWriteMoveLSWidths Sched> { let Predicates = [HasAVX512] in - defm Z : avx512_movnt, EVEX_V512; + defm Z : avx512_movnt, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_movnt, EVEX_V256; - defm Z128 : avx512_movnt, EVEX_V128; + defm Z256 : avx512_movnt, EVEX_V256; + defm Z128 : avx512_movnt, EVEX_V128; } } -defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD; -defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W; -defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS; +defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, + SchedWriteVecMoveLS>, PD; +defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, + SchedWriteFMoveLS>, PD, VEX_W; +defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, + SchedWriteFMoveLS>, PS; let Predicates = [HasAVX512], AddedComplexity = 400 in { def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index d41641a..0b4ad80 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -242,7 +242,7 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (x86mmx VR64:$src), addr:$dst)]>; -let SchedRW = [WriteVecMove] in { +let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 473bea5..50c1203 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -406,64 +406,66 @@ def : InstAlias<"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", multiclass sse12_mov_packed opc, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { + string asm, Domain d, + X86SchedWriteMoveLS sched> { let hasSideEffects = 0 in def rr : PI, - Sched<[WriteFMove]>; + Sched<[sched.RR]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def rm : PI, - Sched<[WriteFLoad]>; + Sched<[sched.RM]>; } let Predicates = [HasAVX, NoVLX] in { -defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, - PS, VEX, VEX_WIG; -defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, - PD, VEX, VEX_WIG; -defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, - PS, VEX, VEX_WIG; -defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble>, - PD, VEX, VEX_WIG; - -defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, - "movaps", SSEPackedSingle>, - PS, VEX, VEX_L, VEX_WIG; -defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, - "movapd", SSEPackedDouble>, - PD, VEX, VEX_L, VEX_WIG; -defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, - "movups", SSEPackedSingle>, - PS, VEX, VEX_L, VEX_WIG; -defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, - "movupd", SSEPackedDouble>, - PD, VEX, VEX_L, VEX_WIG; +defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", + SSEPackedSingle, SchedWriteFMoveLS.XMM>, + PS, VEX, VEX_WIG; +defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", + SSEPackedDouble, SchedWriteFMoveLS.XMM>, + PD, VEX, VEX_WIG; +defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", + SSEPackedSingle, SchedWriteFMoveLS.XMM>, + PS, VEX, VEX_WIG; +defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", + SSEPackedDouble, SchedWriteFMoveLS.XMM>, + PD, VEX, VEX_WIG; + +defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", + SSEPackedSingle, SchedWriteFMoveLS.YMM>, + PS, VEX, VEX_L, VEX_WIG; +defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", + SSEPackedDouble, SchedWriteFMoveLS.YMM>, + PD, VEX, VEX_L, VEX_WIG; +defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", + SSEPackedSingle, SchedWriteFMoveLS.YMM>, + PS, VEX, VEX_L, VEX_WIG; +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", + SSEPackedDouble, SchedWriteFMoveLS.YMM>, + PD, VEX, VEX_L, VEX_WIG; } let Predicates = [UseSSE1] in { -defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, - PS; -defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, - PS; +defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", + SSEPackedSingle, SchedWriteFMoveLS.XMM>, + PS; +defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", + SSEPackedSingle, SchedWriteFMoveLS.XMM>, + PS; } let Predicates = [UseSSE2] in { -defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, - PD; -defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble>, - PD; +defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", + SSEPackedDouble, SchedWriteFMoveLS.XMM>, + PD; +defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", + SSEPackedDouble, SchedWriteFMoveLS.XMM>, + PD; } -let SchedRW = [WriteFStore], Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX] in { +let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, @@ -480,6 +482,9 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)]>, VEX, VEX_WIG; +} // SchedRW + +let SchedRW = [SchedWriteFMoveLS.YMM.MR] in { def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, @@ -497,10 +502,11 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), [(store (v4f64 VR256:$src), addr:$dst)]>, VEX, VEX_L, VEX_WIG; } // SchedRW +} // Predicate // For disassembler -let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, - SchedRW = [WriteFMove] in { +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { +let SchedRW = [SchedWriteFMoveLS.XMM.RR] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>, @@ -517,6 +523,9 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; +} // SchedRW + +let SchedRW = [SchedWriteFMoveLS.YMM.RR] in { def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", []>, @@ -533,7 +542,8 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; -} +} // SchedRW +} // Predicate // Aliases to help the assembler pick two byte VEX encodings by swapping the // operands relative to the normal instructions to use VEX.R instead of VEX.B. @@ -554,7 +564,7 @@ def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}", def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}", (VMOVUPDYrr_REV VR256L:$dst, VR256H:$src), 0>; -let SchedRW = [WriteFStore] in { +let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; @@ -571,7 +581,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, - SchedRW = [WriteFMove] in { + SchedRW = [SchedWriteFMoveLS.XMM.RR] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>, FoldGenData<"MOVAPSrr">; @@ -2982,7 +2992,7 @@ defm : scalar_unary_math_patterns, VEX, VEX_WIG; +} // SchedRW +let SchedRW = [SchedWriteFMoveLS.YMM.MR] in { def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntps\t{$src, $dst|$dst, $src}", @@ -3006,21 +3018,23 @@ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), addr:$dst)]>, VEX, VEX_L, VEX_WIG; } // SchedRW -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { +let ExeDomain = SSEPackedInt in { def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)]>, VEX, VEX_WIG; + addr:$dst)]>, VEX, VEX_WIG, + Sched<[SchedWriteVecMoveLS.XMM.MR]>; def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)]>, VEX, VEX_L, VEX_WIG; -} // ExeDomain, SchedRW + addr:$dst)]>, VEX, VEX_L, VEX_WIG, + Sched<[SchedWriteVecMoveLS.YMM.MR]>; +} // ExeDomain } // Predicates -let SchedRW = [WriteFStore] in { +let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; @@ -3029,7 +3043,7 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; } // SchedRW -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in +let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; @@ -3142,73 +3156,82 @@ def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let hasSideEffects = 0, SchedRW = [WriteVecMove] in { +let hasSideEffects = 0 in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; -def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L, VEX_WIG; + "movdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; + "movdqu\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; +def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L, VEX_WIG; + "movdqu\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; } // For Disassembler -let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, - SchedRW = [WriteVecMove] in { +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; + "movdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.XMM.RR]>, + VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; + "movdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.YMM.RR]>, + VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; + "movdqu\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.XMM.RR]>, + VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; + "movdqu\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.YMM.RR]>, + VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - hasSideEffects = 0, SchedRW = [WriteVecLoad], Predicates = [HasAVX,NoVLX] in { + hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, - VEX, VEX_WIG; + Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "movdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv2i64 addr:$src))]>, + Sched<[SchedWriteVecMoveLS.XMM.RM]>, XS, VEX, VEX_WIG; def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.YMM.RM]>, XS, VEX, VEX_L, VEX_WIG; } -let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteVecStore], - Predicates = [HasAVX,NoVLX] in { +let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, - VEX, VEX_WIG; + Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG; def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG; def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(store (v2i64 VR128:$src), addr:$dst)]>, - XS, VEX, VEX_WIG; + Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG; def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[]>, - XS, VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG; } -let SchedRW = [WriteVecMove] in { +let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { let hasSideEffects = 0 in { def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", []>; @@ -3231,7 +3254,7 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), } // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - hasSideEffects = 0, SchedRW = [WriteVecLoad] in { + hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; @@ -3241,7 +3264,8 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteVecStore] in { +let mayStore = 1, hasSideEffects = 0, + SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; @@ -3882,8 +3906,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { - +let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -4396,21 +4419,21 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// -let SchedRW = [WriteVecLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, - VEX, VEX_WIG; + Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, - VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; } // Predicates + def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; -} // SchedRW + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, + Sched<[SchedWriteVecMoveLS.XMM.RM]>; //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -6357,18 +6380,18 @@ let Predicates = [UseSSE41] in { } let AddedComplexity = 400 in { // Prefer non-temporal versions -let SchedRW = [WriteVecLoad] in { + let Predicates = [HasAVX, NoVLX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovntdqa\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_WIG; + "vmovntdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; let Predicates = [HasAVX2, NoVLX] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", []>, - VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movntdqa\t{$src, $dst|$dst, $src}", []>; -} // SchedRW + "movntdqa\t{$src, $dst|$dst, $src}", []>, + Sched<[SchedWriteVecMoveLS.XMM.RM]>; let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8f32 (alignednontemporalload addr:$src)), @@ -7710,7 +7733,7 @@ let hasSideEffects = 0, mayStore = 1 in def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, u8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - Sched<[WriteVecStore]>, VEX, VEX_L; + Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; let Predicates = [HasAVX2, NoVLX] in { defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 937c349..e974c3a 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -68,6 +68,27 @@ class X86SchedWriteSizes { + SchedWrite RR = MoveRR; + SchedWrite RM = LoadRM; + SchedWrite MR = StoreMR; +} + +// Multiclass that wraps X86SchedWriteMoveLS for each vector width. +class X86SchedWriteMoveLSWidths { + X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations. + X86SchedWriteMoveLS MMX = sScl; // MMX operations. + X86SchedWriteMoveLS XMM = s128; // XMM operations. + X86SchedWriteMoveLS YMM = s256; // YMM operations. + X86SchedWriteMoveLS ZMM = s512; // ZMM operations. +} + // Loads, stores, and moves, not folded with other operations. def WriteLoad : SchedWrite; def WriteStore : SchedWrite; @@ -75,7 +96,7 @@ def WriteMove : SchedWrite; // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. -def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>; +def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>; defm WriteIMul : X86SchedWritePair; // Integer multiplication. defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication. def WriteIMulH : SchedWrite; // Integer multiplication, high part. @@ -307,6 +328,27 @@ def WriteFence : SchedWrite; // Nop, not very useful expect it provides a model for nops! def WriteNop : SchedWrite; +// Move/Load/Store wrappers. +def WriteFMoveLS + : X86SchedWriteMoveLS; +def WriteFMoveLSX + : X86SchedWriteMoveLS; +def WriteFMoveLSY + : X86SchedWriteMoveLS; +def SchedWriteFMoveLS + : X86SchedWriteMoveLSWidths; + +def WriteVecMoveLS + : X86SchedWriteMoveLS; +def WriteVecMoveLSX + : X86SchedWriteMoveLS; +def WriteVecMoveLSY + : X86SchedWriteMoveLS; +def SchedWriteVecMoveLS + : X86SchedWriteMoveLSWidths; + // Vector width wrappers. def SchedWriteFAdd : X86SchedWriteWidths; -- 2.7.4