From 05242bf691063b4491783ef407d778403365304f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 21 Apr 2018 18:07:36 +0000 Subject: [PATCH] [X86] Add SchedWrites for LDMXCSR/STMXCSR. llvm-svn: 330517 --- llvm/lib/Target/X86/X86InstrSSE.td | 8 ++++---- llvm/lib/Target/X86/X86SchedBroadwell.td | 14 +++++--------- llvm/lib/Target/X86/X86SchedHaswell.td | 14 +++++--------- llvm/lib/Target/X86/X86SchedSandyBridge.td | 12 ++++-------- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 14 +++++--------- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 15 +++++---------- llvm/lib/Target/X86/X86Schedule.td | 4 ++++ llvm/lib/Target/X86/X86ScheduleAtom.td | 12 +++++++++--- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 5 +++++ llvm/lib/Target/X86/X86ScheduleSLM.td | 5 +++++ llvm/lib/Target/X86/X86ScheduleZnver1.td | 8 ++------ 11 files changed, 53 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index cf8b987..83af6cb 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3117,17 +3117,17 @@ def : Pat<(X86MFence), (MFENCE)>; def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, - VEX, Sched<[WriteLoad]>, VEX_WIG; + VEX, Sched<[WriteLDMXCSR]>, VEX_WIG; def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, - VEX, Sched<[WriteStore]>, VEX_WIG; + VEX, Sched<[WriteSTMXCSR]>, VEX_WIG; def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, - TB, Sched<[WriteLoad]>; + TB, Sched<[WriteLDMXCSR]>; def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, - TB, Sched<[WriteStore]>; + TB, Sched<[WriteSTMXCSR]>; //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 1143501..69447ce 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -303,6 +303,10 @@ def : WriteRes { let Latency = 100; } // def Writ // Fence instructions. def : WriteRes; +// Load/store MXCSR. +def : WriteRes { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } +def : WriteRes { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } + // Nop, not very useful expect it provides a model for nops! def : WriteRes; @@ -673,8 +677,7 @@ def: InstRW<[BWWriteResGroup21], (instregex "(V?)EXTRACTPSmr", "(V?)PEXTRBmr", "(V?)PEXTRDmr", "(V?)PEXTRQmr", - "(V?)PEXTRWmr", - "(V?)STMXCSR")>; + "(V?)PEXTRWmr")>; def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> { let Latency = 2; @@ -1535,13 +1538,6 @@ def BWWriteResGroup82 : SchedWriteRes<[BWPort0,BWPort01,BWPort23]> { } def: InstRW<[BWWriteResGroup82], (instregex "FLDCW16m")>; -def BWWriteResGroup83 : SchedWriteRes<[BWPort0,BWPort23,BWPort0156]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup83], (instregex "(V?)LDMXCSR")>; - def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index d91fe09..3e5a41d 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -290,6 +290,10 @@ def : WriteRes { let ResourceCycles = [2,1,1]; } +// Load/store MXCSR. +def : WriteRes { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } +def : WriteRes { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } + def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } def : WriteRes; @@ -1269,8 +1273,7 @@ def: InstRW<[HWWriteResGroup20], (instregex "(V?)EXTRACTPSmr", "(V?)PEXTRBmr", "(V?)PEXTRDmr", "(V?)PEXTRQmr", - "(V?)PEXTRWmr", - "(V?)STMXCSR")>; + "(V?)PEXTRWmr")>; def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> { let Latency = 2; @@ -1508,13 +1511,6 @@ def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> { } def: InstRW<[HWWriteResGroup39], (instregex "FLDCW16m")>; -def HWWriteResGroup40 : SchedWriteRes<[HWPort0,HWPort23,HWPort0156]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup40], (instregex "(V?)LDMXCSR")>; - def HWWriteResGroup41 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 1dd782c..476708a 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -273,6 +273,10 @@ def : WriteRes { let ResourceCycles = [17, 1]; } +// Load/store MXCSR. +// FIXME: This is probably wrong. Only STMXCSR should require Port4. +def : WriteRes { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } +def : WriteRes { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } @@ -870,14 +874,6 @@ def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { } def: InstRW<[SBWriteResGroup43], (instregex "SET(A|BE)m")>; -def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup44], (instregex "(V?)LDMXCSR", - "(V?)STMXCSR")>; - def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { let Latency = 5; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index c5d63d0..0baf0f7 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -309,6 +309,10 @@ def : WriteRes { let Latency = 100; } // def Wri // Fence instructions. def : WriteRes; +// Load/store MXCSR. +def : WriteRes { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } +def : WriteRes { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } + // Nop, not very useful expect it provides a model for nops! def : WriteRes; @@ -736,8 +740,7 @@ def: InstRW<[SKLWriteResGroup24], (instregex "(V?)EXTRACTPSmr", "(V?)PEXTRBmr", "(V?)PEXTRDmr", "(V?)PEXTRQmr", - "(V?)PEXTRWmr", - "(V?)STMXCSR")>; + "(V?)PEXTRWmr")>; def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> { let Latency = 2; @@ -1599,13 +1602,6 @@ def SKLWriteResGroup96 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { } def: InstRW<[SKLWriteResGroup96], (instregex "FLDCW16m")>; -def SKLWriteResGroup97 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort0156]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup97], (instregex "(V?)LDMXCSR")>; - def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 5749104..c7fe003 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -309,6 +309,10 @@ def : WriteRes { let Latency = 100; } // def Wri // Fence instructions. def : WriteRes; +// Load/store MXCSR. +def : WriteRes { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } +def : WriteRes { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } + // Nop, not very useful expect it provides a model for nops! def : WriteRes; @@ -1371,7 +1375,6 @@ def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr", "PEXTRDmr", "PEXTRQmr", "PEXTRWmr", - "STMXCSR", "VEXTRACTPSZmr(b?)", "VEXTRACTPSmr", "VPEXTRBZmr(b?)", @@ -1381,8 +1384,7 @@ def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr", "VPEXTRQZmr(b?)", "VPEXTRQmr", "VPEXTRWZmr(b?)", - "VPEXTRWmr", - "VSTMXCSR")>; + "VPEXTRWmr")>; def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { let Latency = 2; @@ -3248,13 +3250,6 @@ def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { } def: InstRW<[SKXWriteResGroup101], (instregex "FLDCW16m")>; -def SKXWriteResGroup102 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort0156]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup102], (instregex "(V?)LDMXCSR")>; - def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 3ded412..15243f9 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -148,6 +148,10 @@ defm WriteAESKeyGen : X86SchedWritePair; // Key Generation. // Carry-less multiplication instructions. defm WriteCLMul : X86SchedWritePair; +// Load/store MXCSR +def WriteLDMXCSR : SchedWrite; +def WriteSTMXCSR : SchedWrite; + // Catch-all for expensive system instructions. def WriteSystem : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 48463c0..201ab04 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -290,6 +290,13 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. //////////////////////////////////////////////////////////////////////////////// +// Load/store MXCSR. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 5; let ResourceCycles = [5]; } +def : WriteRes { let Latency = 15; let ResourceCycles = [15]; } + +//////////////////////////////////////////////////////////////////////////////// // Special Cases. //////////////////////////////////////////////////////////////////////////////// @@ -452,7 +459,7 @@ def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; } -def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, LDMXCSR, +def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, MMX_EMMS)>; def : InstRW<[AtomWrite01_5], (instregex "ST_FP80m", "MMX_PH(ADD|SUB)S?Wrr")>; @@ -558,8 +565,7 @@ def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; } -def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr, - STMXCSR)>; +def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>; def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> { let Latency = 17; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 14d60e5..2f2cca3 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -254,6 +254,11 @@ def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; +// Load/store MXCSR. +// FIXME: These are copy and pasted from WriteLoad/Store. +def : WriteRes { let Latency = 5; } +def : WriteRes; + // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index d62286d..73c86808 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -84,6 +84,11 @@ def : WriteRes { let Latency = 3; } def : WriteRes; def : WriteRes; +// Load/store MXCSR. +// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load. +def : WriteRes; +def : WriteRes { let Latency = 3; } + // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 4447f0b..fb0a7f7 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -267,6 +267,8 @@ let Latency = 100 in { def : WriteRes; def : WriteRes; def : WriteRes; + def : WriteRes; + def : WriteRes; } //=== Regex based InstRW ===// @@ -1669,10 +1671,4 @@ def : InstRW<[WriteMicrocoded], (instregex "VZEROUPPER")>; // VZEROALL. def : InstRW<[WriteMicrocoded], (instregex "VZEROALL")>; -// LDMXCSR. -def : InstRW<[WriteMicrocoded], (instregex "(V)?LDMXCSR")>; - -// STMXCSR. -def : InstRW<[WriteMicrocoded], (instregex "(V)?STMXCSR")>; - } // SchedModel -- 2.7.4