From c3c767bf505ed825a2d90d24560fe7e305cb2ff5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 27 Apr 2018 16:11:57 +0000 Subject: [PATCH] [X86] Split WriteFHadd into XMM and YMM/ZMM scheduler classes This removes all the HADD/HSUB PS/PD InstRW overrides. llvm-svn: 331054 --- llvm/lib/Target/X86/X86InstrSSE.td | 8 ++++---- llvm/lib/Target/X86/X86SchedBroadwell.td | 11 +---------- llvm/lib/Target/X86/X86SchedHaswell.td | 11 +---------- llvm/lib/Target/X86/X86SchedSandyBridge.td | 12 +++--------- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 13 ++----------- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 9 +++------ llvm/lib/Target/X86/X86Schedule.td | 3 ++- llvm/lib/Target/X86/X86ScheduleAtom.td | 5 +++-- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 16 +--------------- llvm/lib/Target/X86/X86ScheduleSLM.td | 1 + llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 + 11 files changed, 22 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a3b75de..6f6afe7 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4473,9 +4473,9 @@ let Predicates = [HasAVX] in { defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, WriteFHAdd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, WriteFHAdd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, @@ -4483,9 +4483,9 @@ let Predicates = [HasAVX] in { defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, WriteFHAdd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, WriteFHAdd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; } } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index d21b9bd..2c180f5 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -345,6 +345,7 @@ def : WriteRes; //////////////////////////////////////////////////////////////////////////////// defm : BWWriteResPair; +defm : BWWriteResPair; defm : BWWriteResPair; // Remaining instrs. @@ -1633,16 +1634,6 @@ def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { } def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>; -def BWWriteResGroup129 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[BWWriteResGroup129], (instregex "VHADDPDYrm", - "VHADDPSYrm", - "VHSUBPDYrm", - "VHSUBPSYrm")>; - def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> { let Latency = 11; let NumMicroOps = 6; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index fb4d9b5..e2c1853 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -587,6 +587,7 @@ def : InstRW<[HWWriteFXTRACT], (instrs FXTRACT)>; //////////////////////////////////////////////////////////////////////////////// defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; //=== Floating Point XMM and YMM Instructions ===// @@ -1958,16 +1959,6 @@ def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { } def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>; -def HWWriteResGroup96_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[HWWriteResGroup96_1], (instregex "VHADDPDYrm", - "VHADDPSYrm", - "VHSUBPDYrm", - "VHSUBPSYrm")>; - def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { let Latency = 10; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 478d886..f59bd57 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -199,8 +199,9 @@ def : WriteRes { // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; //////////////////////////////////////////////////////////////////////////////// // String instructions. @@ -1565,13 +1566,6 @@ def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { } def: InstRW<[SBWriteResGroup112], (instregex "(V?)DPPS(Y?)rri")>; -def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup113], (instregex "VH(ADD|SUB)(PD|PS)Yrm")>; - def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { let Latency = 13; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 4b51cc8..ee4e4dd 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -350,7 +350,8 @@ def : WriteRes; // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : SKLWriteResPair; // Remaining instrs. @@ -2118,16 +2119,6 @@ def SKLWriteResGroup164 : SchedWriteRes<[SKLPort5,SKLPort01]> { } def: InstRW<[SKLWriteResGroup164], (instregex "(V?)DPPS(Y?)rri")>; -def SKLWriteResGroup165 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPDYrm", - "VHADDPSYrm", - "VHSUBPDYrm", - "VHSUBPSYrm")>; - def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 14; let NumMicroOps = 1; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 2c25e91..085f440 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -350,7 +350,8 @@ def : WriteRes; // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : SKXWriteResPair; // Remaining instrs. @@ -4239,11 +4240,7 @@ def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } -def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPDYrm", - "VHADDPSYrm", - "VHSUBPDYrm", - "VHSUBPSYrm", - "VPERMI2W128rm(b?)", +def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)", "VPERMT2W128rm(b?)")>; def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index e87475a..20241bf 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -102,7 +102,8 @@ class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Horizontal Add/Sub (float and integer) defm WriteFHAdd : X86SchedWritePair; -defm WritePHAdd : X86SchedWritePair; +defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM. +defm WritePHAdd : X86SchedWritePair; // Vector integer operations. def WriteVecLoad : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 23ede68..51d046f 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -293,8 +293,9 @@ defm : AtomWriteResPair; // NOTE: Do // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 28d54b9..09cb530 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -446,23 +446,9 @@ defm : JWriteResFpuPair; //////////////////////////////////////////////////////////////////////////////// defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; -def JWriteFHAddY: SchedWriteRes<[JFPU0, JFPA]> { - let Latency = 3; - let ResourceCycles = [2, 2]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteFHAddY], (instrs VHADDPDYrr, VHADDPSYrr, VHSUBPDYrr, VHSUBPSYrr)>; - -def JWriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { - let Latency = 8; - let ResourceCycles = [2, 2, 2]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteFHAddYLd, ReadAfterLd], (instrs VHADDPDYrm, VHADDPSYrm, - VHSUBPDYrm, VHSUBPSYrm)>; - //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 0504519..c682130 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -182,6 +182,7 @@ def : WriteRes { //////////////////////////////////////////////////////////////////////////////// defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; // String instructions. diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 83358a7..f5fdf14 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -192,6 +192,7 @@ def : WriteRes; def : WriteRes { let Latency = 8; } defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -- 2.7.4