From: Craig Topper Date: Mon, 26 Mar 2018 05:05:10 +0000 (+0000) Subject: [X86] Merge the SSE and AVX versions of fp divs and sqrts in the SandyBridge/Haswell... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cdfcf8ecda8065fda495d73ed16277668b3b56dc;p=platform%2Fupstream%2Fllvm.git [X86] Merge the SSE and AVX versions of fp divs and sqrts in the SandyBridge/Haswell/Broadwell/Skylake scheduler models. I've used Agner's data as best I could to get the values to converge on. llvm-svn: 328473 --- diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 0aa0700..d67ee32 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -2402,12 +2402,12 @@ def BWWriteResGroup136 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { def: InstRW<[BWWriteResGroup136], (instregex "(V?)MPSADBWrmi")>; def BWWriteResGroup137 : SchedWriteRes<[BWPort0]> { - let Latency = 13; + let Latency = 11; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup137], (instregex "SQRTPSr", - "SQRTSSr")>; +def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr", + "(V?)SQRTSSr")>; def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { let Latency = 13; @@ -2422,9 +2422,7 @@ def BWWriteResGroup139 : SchedWriteRes<[BWPort0]> { let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr", - "(V?)DIVSDrr", - "VSQRTPSr", - "VSQRTSSr")>; + "(V?)DIVSDrr")>; def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 14; @@ -2537,12 +2535,12 @@ def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm", "VRSQRTPSYm")>; def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23]> { - let Latency = 18; + let Latency = 16; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup157], (instregex "SQRTPSm", - "SQRTSSm")>; +def: InstRW<[BWWriteResGroup157], (instregex "(V?)SQRTPSm", + "(V?)SQRTSSm")>; def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> { let Latency = 18; @@ -2565,9 +2563,7 @@ def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm", - "(V?)DIVSDrm", - "VSQRTPSm", - "VSQRTSSm")>; + "(V?)DIVSDrm")>; def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { let Latency = 19; @@ -2583,9 +2579,7 @@ def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> { } def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0", "DIV_FST0r", - "DIV_FrST0", - "SQRTPDr", - "SQRTSDr")>; + "DIV_FrST0")>; def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { let Latency = 20; @@ -2604,12 +2598,12 @@ def: InstRW<[BWWriteResGroup167], (instregex "INSB", "INSW")>; def BWWriteResGroup168 : SchedWriteRes<[BWPort0]> { - let Latency = 21; + let Latency = 16; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup168], (instregex "VSQRTPDr", - "VSQRTSDr")>; +def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr", + "(V?)SQRTSDr")>; def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 21; @@ -2670,12 +2664,12 @@ def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI16m", "DIV_FI32m")>; def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23]> { - let Latency = 25; + let Latency = 21; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup179], (instregex "SQRTPDm", - "SQRTSDm")>; +def: InstRW<[BWWriteResGroup179], (instregex "(V?)SQRTPDm", + "(V?)SQRTSDm")>; def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 26; @@ -2683,9 +2677,7 @@ def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F32m", - "DIVR_F64m", - "VSQRTPDm", - "VSQRTSDm")>; + "DIVR_F64m")>; def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { let Latency = 27; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index a2c4dd1..4532ddc 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -2408,12 +2408,18 @@ def: InstRW<[HWWriteResGroup91], (instregex "MMX_PMADDUBSWrm", "(V?)RSQRTSSm")>; def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup91_1], (instregex "(V?)SQRTSSm")>; + +def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 18; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup91_1], (instregex "SQRTSSm", - "VDIVSSrm")>; +def: InstRW<[HWWriteResGroup91_4], (instregex "(V?)DIVSSrm")>; def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 11; @@ -2717,26 +2723,12 @@ def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>; def HWWriteResGroup121 : SchedWriteRes<[HWPort0]> { - let Latency = 11; + let Latency = 13; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup121], (instregex "DIVPSrr", - "DIVSSrr")>; - -def HWWriteResGroup122 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 17; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup122], (instregex "DIVPSrm")>; - -def HWWriteResGroup122_1 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 16; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup122_1], (instregex "DIVSSrm")>; +def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr", + "(V?)DIVSSrr")>; def HWWriteResGroup125 : SchedWriteRes<[HWPort0,HWPort015]> { let Latency = 11; @@ -2784,24 +2776,19 @@ def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPo def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>; def HWWriteResGroup133 : SchedWriteRes<[HWPort0]> { - let Latency = 13; + let Latency = 11; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup133], (instregex "SQRTPSr", - "SQRTSSr", - "VDIVPSrr", - "VDIVSSrr")>; +def: InstRW<[HWWriteResGroup133], (instregex "(V?)SQRTPSr", + "(V?)SQRTSSr")>; def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 19; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup134], (instregex "DIVSDrm", - "SQRTPSm", - "VDIVPSrm", - "VSQRTSSm")>; +def: InstRW<[HWWriteResGroup134], (instregex "(V?)DIVPSrm")>; def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { let Latency = 19; @@ -2810,23 +2797,12 @@ def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>; -def HWWriteResGroup136 : SchedWriteRes<[HWPort0]> { - let Latency = 14; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[HWWriteResGroup136], (instregex "DIVPDrr", - "DIVSDrr", - "VSQRTPSr", - "VSQRTSSr")>; - def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 20; + let Latency = 17; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup138], (instregex "DIVPDrm", - "VSQRTPSm")>; +def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>; def HWWriteResGroup140 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { let Latency = 14; @@ -2916,10 +2892,8 @@ def HWWriteResGroup154 : SchedWriteRes<[HWPort0]> { def: InstRW<[HWWriteResGroup154], (instregex "DIV_FPrST0", "DIV_FST0r", "DIV_FrST0", - "SQRTPDr", - "SQRTSDr", - "VDIVPDrr", - "VDIVSDrr")>; + "(V?)DIVPDrr", + "(V?)DIVSDrr")>; def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 27; @@ -2927,25 +2901,35 @@ def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F32m", - "DIVR_F64m", - "VSQRTPDm")>; + "DIVR_F64m")>; def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 26; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup155_1], (instregex "SQRTPDm", - "VDIVPDrm", - "VSQRTSDm")>; +def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>; def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup155_2], (instregex "(V?)SQRTSDm")>; + +def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 22; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup155_3], (instregex "(V?)SQRTPDm")>; + +def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 25; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup155_2], (instregex "SQRTSDm", - "VDIVSDrm")>; +def: InstRW<[HWWriteResGroup155_4], (instregex "(V?)DIVSDrm")>; def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { let Latency = 20; @@ -2955,12 +2939,12 @@ def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>; def HWWriteResGroup157 : SchedWriteRes<[HWPort0]> { - let Latency = 21; + let Latency = 16; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup157], (instregex "VSQRTPDr", - "VSQRTSDr")>; +def: InstRW<[HWWriteResGroup157], (instregex "(V?)SQRTPDr", + "(V?)SQRTSDr")>; def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort015]> { let Latency = 21; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 86cfce1..7797e23 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -1931,18 +1931,11 @@ def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr", +def: InstRW<[SBWriteResGroup116], (instregex "(V?)SQRTSSr", "(V?)DIVPSrr", "(V?)DIVSSrr", "(V?)SQRTPSr")>; -def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 14; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>; - def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> { let Latency = 14; let NumMicroOps = 4; @@ -1971,7 +1964,7 @@ def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm", +def: InstRW<[SBWriteResGroup123], (instregex "(V?)SQRTSSm", "(V?)DIVPSrm", "(V?)DIVSSrm", "(V?)SQRTPSm")>; @@ -1981,24 +1974,24 @@ def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>; +def: InstRW<[SBWriteResGroup124], (instregex "(V?)SQRTPDr", + "(V?)SQRTSDr")>; def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 21; + let Latency = 27; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>; +def: InstRW<[SBWriteResGroup125], (instregex "(V?)SQRTPDm", + "(V?)SQRTSDm")>; def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> { let Latency = 22; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr", - "(V?)DIVPDrr", - "(V?)DIVSDrr", - "(V?)SQRTPDr")>; +def: InstRW<[SBWriteResGroup126], (instregex "(V?)DIVPDrr", + "(V?)DIVSDrr")>; def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> { let Latency = 24; @@ -2017,10 +2010,8 @@ def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm", - "(V?)DIVPDrm", - "(V?)DIVSDrm", - "(V?)SQRTPDm")>; +def: InstRW<[SBWriteResGroup128], (instregex "(V?)DIVPDrm", + "(V?)DIVSDrm")>; def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05]> { let Latency = 29; @@ -2074,11 +2065,4 @@ def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> { def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm", "VSQRTPDYm")>; -def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> { - let Latency = 114; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>; - } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 724c003..408858e9 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -2493,8 +2493,8 @@ def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTPS(Y?)r", - "VSQRTSSr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPS(Y?)r", + "(V?)SQRTSSr")>; def SKLWriteResGroup159 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 12; @@ -2513,14 +2513,6 @@ def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort01]> } def: InstRW<[SKLWriteResGroup160], (instregex "CVTTSS2SI64rm")>; -def SKLWriteResGroup161 : SchedWriteRes<[SKLPort0]> { - let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup161], (instregex "SQRTPSr", - "SQRTSSr")>; - def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 13; let NumMicroOps = 3; @@ -2662,7 +2654,7 @@ def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm", - "VSQRTSSm")>; + "(V?)SQRTSSm")>; def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 17; @@ -2676,17 +2668,16 @@ def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTPD(Y?)r", - "VSQRTSDr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPD(Y?)r", + "(V?)SQRTSDr")>; def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 18; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup182], (instregex "SQRTSSm", - "VDIVPSYrm", - "VSQRTPSm")>; +def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm", + "(V?)SQRTPSm")>; def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 18; @@ -2707,8 +2698,7 @@ def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup186], (instregex "SQRTPSm", - "(V?)DIVSDrm", +def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm", "VSQRTPSYm")>; def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { @@ -2725,9 +2715,7 @@ def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { } def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FPrST0", "DIV_FST0r", - "DIV_FrST0", - "SQRTPDr", - "SQRTSDr")>; + "DIV_FrST0")>; def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 20; @@ -2807,7 +2795,7 @@ def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup197], (instregex "VSQRTSDm")>; +def: InstRW<[SKLWriteResGroup197], (instregex "(V?)SQRTSDm")>; def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 23; @@ -2821,15 +2809,14 @@ def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDm")>; +def: InstRW<[SKLWriteResGroup199], (instregex "(V?)SQRTPDm")>; def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 25; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup201], (instregex "SQRTSDm", - "VSQRTPDYm")>; +def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>; def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 25; @@ -2839,13 +2826,6 @@ def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI16m", "DIV_FI32m")>; -def SKLWriteResGroup205 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 26; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup205], (instregex "SQRTPDm")>; - def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 27; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 1b01236..32ef910 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -5624,7 +5624,8 @@ def: InstRW<[SKXWriteResGroup201], (instregex "DIVPSrm", "VDIVPSZ128rm(b?)(k?)(z?)", "VDIVPSrm", "VDIVSSZrm(_Int)?(k?)(z?)", - "VSQRTSSm")>; + "VSQRTSSm", + "VSQRTSSZm(_Int)?(k?)(z?)")>; def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { let Latency = 17; @@ -5656,8 +5657,7 @@ def: InstRW<[SKXWriteResGroup204], (instregex "SQRTPSm", "VDIVPSYrm", "VDIVPSZ256rm(b?)(k?)(z?)", "VSQRTPSZ128m(b?)(k?)(z?)", - "VSQRTPSm", - "VSQRTSSZm(_Int)?(k?)(z?)")>; + "VSQRTPSm")>; def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 18; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 766869c..7a1a70c 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -553,7 +553,7 @@ declare float @sqrtf(float) readnone define float @sqrtA(float %a) nounwind uwtable readnone ssp { ; GENERIC-LABEL: sqrtA: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] +; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sqrtA: @@ -585,7 +585,7 @@ declare float @llvm.sqrt.f32(float) define float @sqrtC(float %a) nounwind { ; GENERIC-LABEL: sqrtC: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] +; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sqrtC: diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index c805a54..8e571e9 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -1753,8 +1753,8 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; HASWELL-SSE-LABEL: test_divps: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00] +; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [13:1.00] +; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [19:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_divps: @@ -1861,8 +1861,8 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; ; HASWELL-SSE-LABEL: test_divss: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00] +; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [13:1.00] +; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [18:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_divss: @@ -4936,36 +4936,36 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; ; HASWELL-SSE-LABEL: test_sqrtps: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00] -; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00] +; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:1.00] +; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [17:1.00] ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] -; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00] +; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:1.00] +; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [17:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_sqrtps: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00] -; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00] +; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:1.00] +; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [16:1.00] ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_sqrtps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] -; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] +; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:1.00] +; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [16:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_sqrtps: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00] -; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00] +; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:1.00] +; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00] ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; @@ -5062,49 +5062,49 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtss: ; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] +; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00] +; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_sqrtss: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00] +; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:1.00] ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] +; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] +; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_sqrtss: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:1.00] ; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00] +; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:1.00] ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_sqrtss: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:1.00] ; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] +; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_sqrtss: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00] +; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:1.00] ; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00] +; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:1.00] ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 1427cf7..5500cb0 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -3194,8 +3194,8 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; HASWELL-SSE-LABEL: test_divpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] -; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [26:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_divpd: @@ -3302,8 +3302,8 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; ; HASWELL-SSE-LABEL: test_divsd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] -; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [25:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_divsd: @@ -14142,8 +14142,8 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_sqrtpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00] -; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00] +; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:1.00] +; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00] ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14164,50 +14164,50 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-SSE-LABEL: test_sqrtpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00] -; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:1.00] +; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00] ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [28:1.00] +; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_sqrtpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] -; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00] +; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:1.00] +; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [22:1.00] ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] -; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00] +; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [22:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_sqrtpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] -; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [25:1.00] +; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:1.00] +; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [21:1.00] ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_sqrtpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] -; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:1.00] +; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:1.00] +; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_sqrtpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] -; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00] +; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:1.00] +; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:1.00] ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; @@ -14272,9 +14272,9 @@ declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_sqrtsd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00] +; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:1.00] ; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00] +; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:1.00] ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14296,9 +14296,9 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-SSE-LABEL: test_sqrtsd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:1.00] ; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00] +; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:1.00] ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; @@ -14312,41 +14312,41 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; ; HASWELL-SSE-LABEL: test_sqrtsd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:1.00] ; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:1.00] ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:1.00] ; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_sqrtsd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:1.00] ; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:1.00] ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_sqrtsd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:1.00] ; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_sqrtsd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00] ; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00] ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ;