I've used Agner's data as best I could to get the values to converge on.
llvm-svn: 328473
def: InstRW<[BWWriteResGroup136], (instregex "(V?)MPSADBWrmi")>;
def BWWriteResGroup137 : SchedWriteRes<[BWPort0]> {
- let Latency = 13;
+ let Latency = 11;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup137], (instregex "SQRTPSr",
- "SQRTSSr")>;
+def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr",
+ "(V?)SQRTSSr")>;
def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
let Latency = 13;
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr",
- "(V?)DIVSDrr",
- "VSQRTPSr",
- "VSQRTSSr")>;
+ "(V?)DIVSDrr")>;
def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 14;
"VRSQRTPSYm")>;
def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23]> {
- let Latency = 18;
+ let Latency = 16;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup157], (instregex "SQRTPSm",
- "SQRTSSm")>;
+def: InstRW<[BWWriteResGroup157], (instregex "(V?)SQRTPSm",
+ "(V?)SQRTSSm")>;
def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> {
let Latency = 18;
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm",
- "(V?)DIVSDrm",
- "VSQRTPSm",
- "VSQRTSSm")>;
+ "(V?)DIVSDrm")>;
def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
let Latency = 19;
}
def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0",
"DIV_FST0r",
- "DIV_FrST0",
- "SQRTPDr",
- "SQRTSDr")>;
+ "DIV_FrST0")>;
def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
let Latency = 20;
"INSW")>;
def BWWriteResGroup168 : SchedWriteRes<[BWPort0]> {
- let Latency = 21;
+ let Latency = 16;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup168], (instregex "VSQRTPDr",
- "VSQRTSDr")>;
+def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr",
+ "(V?)SQRTSDr")>;
def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 21;
"DIV_FI32m")>;
def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23]> {
- let Latency = 25;
+ let Latency = 21;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup179], (instregex "SQRTPDm",
- "SQRTSDm")>;
+def: InstRW<[BWWriteResGroup179], (instregex "(V?)SQRTPDm",
+ "(V?)SQRTSDm")>;
def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 26;
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F32m",
- "DIVR_F64m",
- "VSQRTPDm",
- "VSQRTSDm")>;
+ "DIVR_F64m")>;
def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> {
let Latency = 27;
"(V?)RSQRTSSm")>;
def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23]> {
+ let Latency = 16;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[HWWriteResGroup91_1], (instregex "(V?)SQRTSSm")>;
+
+def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 18;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup91_1], (instregex "SQRTSSm",
- "VDIVSSrm")>;
+def: InstRW<[HWWriteResGroup91_4], (instregex "(V?)DIVSSrm")>;
def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 11;
def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>;
def HWWriteResGroup121 : SchedWriteRes<[HWPort0]> {
- let Latency = 11;
+ let Latency = 13;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup121], (instregex "DIVPSrr",
- "DIVSSrr")>;
-
-def HWWriteResGroup122 : SchedWriteRes<[HWPort0,HWPort23]> {
- let Latency = 17;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup122], (instregex "DIVPSrm")>;
-
-def HWWriteResGroup122_1 : SchedWriteRes<[HWPort0,HWPort23]> {
- let Latency = 16;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup122_1], (instregex "DIVSSrm")>;
+def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr",
+ "(V?)DIVSSrr")>;
def HWWriteResGroup125 : SchedWriteRes<[HWPort0,HWPort015]> {
let Latency = 11;
def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>;
def HWWriteResGroup133 : SchedWriteRes<[HWPort0]> {
- let Latency = 13;
+ let Latency = 11;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup133], (instregex "SQRTPSr",
- "SQRTSSr",
- "VDIVPSrr",
- "VDIVSSrr")>;
+def: InstRW<[HWWriteResGroup133], (instregex "(V?)SQRTPSr",
+ "(V?)SQRTSSr")>;
def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 19;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup134], (instregex "DIVSDrm",
- "SQRTPSm",
- "VDIVPSrm",
- "VSQRTSSm")>;
+def: InstRW<[HWWriteResGroup134], (instregex "(V?)DIVPSrm")>;
def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 19;
}
def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>;
-def HWWriteResGroup136 : SchedWriteRes<[HWPort0]> {
- let Latency = 14;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[HWWriteResGroup136], (instregex "DIVPDrr",
- "DIVSDrr",
- "VSQRTPSr",
- "VSQRTSSr")>;
-
def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23]> {
- let Latency = 20;
+ let Latency = 17;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup138], (instregex "DIVPDrm",
- "VSQRTPSm")>;
+def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>;
def HWWriteResGroup140 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> {
let Latency = 14;
def: InstRW<[HWWriteResGroup154], (instregex "DIV_FPrST0",
"DIV_FST0r",
"DIV_FrST0",
- "SQRTPDr",
- "SQRTSDr",
- "VDIVPDrr",
- "VDIVSDrr")>;
+ "(V?)DIVPDrr",
+ "(V?)DIVSDrr")>;
def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 27;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F32m",
- "DIVR_F64m",
- "VSQRTPDm")>;
+ "DIVR_F64m")>;
def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 26;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup155_1], (instregex "SQRTPDm",
- "VDIVPDrm",
- "VSQRTSDm")>;
+def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>;
def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[HWWriteResGroup155_2], (instregex "(V?)SQRTSDm")>;
+
+def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23]> {
+ let Latency = 22;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[HWWriteResGroup155_3], (instregex "(V?)SQRTPDm")>;
+
+def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 25;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup155_2], (instregex "SQRTSDm",
- "VDIVSDrm")>;
+def: InstRW<[HWWriteResGroup155_4], (instregex "(V?)DIVSDrm")>;
def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> {
let Latency = 20;
def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>;
def HWWriteResGroup157 : SchedWriteRes<[HWPort0]> {
- let Latency = 21;
+ let Latency = 16;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup157], (instregex "VSQRTPDr",
- "VSQRTSDr")>;
+def: InstRW<[HWWriteResGroup157], (instregex "(V?)SQRTPDr",
+ "(V?)SQRTSDr")>;
def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort015]> {
let Latency = 21;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr",
+def: InstRW<[SBWriteResGroup116], (instregex "(V?)SQRTSSr",
"(V?)DIVPSrr",
"(V?)DIVSSrr",
"(V?)SQRTPSr")>;
-def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 14;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>;
-
def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> {
let Latency = 14;
let NumMicroOps = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm",
+def: InstRW<[SBWriteResGroup123], (instregex "(V?)SQRTSSm",
"(V?)DIVPSrm",
"(V?)DIVSSrm",
"(V?)SQRTPSm")>;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>;
+def: InstRW<[SBWriteResGroup124], (instregex "(V?)SQRTPDr",
+ "(V?)SQRTSDr")>;
def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 21;
+ let Latency = 27;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>;
+def: InstRW<[SBWriteResGroup125], (instregex "(V?)SQRTPDm",
+ "(V?)SQRTSDm")>;
def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> {
let Latency = 22;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr",
- "(V?)DIVPDrr",
- "(V?)DIVSDrr",
- "(V?)SQRTPDr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "(V?)DIVPDrr",
+ "(V?)DIVSDrr")>;
def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> {
let Latency = 24;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm",
- "(V?)DIVPDrm",
- "(V?)DIVSDrm",
- "(V?)SQRTPDm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "(V?)DIVPDrm",
+ "(V?)DIVSDrm")>;
def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05]> {
let Latency = 29;
def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm",
"VSQRTPDYm")>;
-def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> {
- let Latency = 114;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>;
-
} // SchedModel
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTPS(Y?)r",
- "VSQRTSSr")>;
+def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPS(Y?)r",
+ "(V?)SQRTSSr")>;
def SKLWriteResGroup159 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 12;
}
def: InstRW<[SKLWriteResGroup160], (instregex "CVTTSS2SI64rm")>;
-def SKLWriteResGroup161 : SchedWriteRes<[SKLPort0]> {
- let Latency = 13;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[SKLWriteResGroup161], (instregex "SQRTPSr",
- "SQRTSSr")>;
-
def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 13;
let NumMicroOps = 3;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm",
- "VSQRTSSm")>;
+ "(V?)SQRTSSm")>;
def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
let Latency = 17;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTPD(Y?)r",
- "VSQRTSDr")>;
+def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPD(Y?)r",
+ "(V?)SQRTSDr")>;
def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 18;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup182], (instregex "SQRTSSm",
- "VDIVPSYrm",
- "VSQRTPSm")>;
+def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm",
+ "(V?)SQRTPSm")>;
def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 18;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup186], (instregex "SQRTPSm",
- "(V?)DIVSDrm",
+def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm",
"VSQRTPSYm")>;
def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
}
def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FPrST0",
"DIV_FST0r",
- "DIV_FrST0",
- "SQRTPDr",
- "SQRTSDr")>;
+ "DIV_FrST0")>;
def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 20;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup197], (instregex "VSQRTSDm")>;
+def: InstRW<[SKLWriteResGroup197], (instregex "(V?)SQRTSDm")>;
def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 23;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDm")>;
+def: InstRW<[SKLWriteResGroup199], (instregex "(V?)SQRTPDm")>;
def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 25;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup201], (instregex "SQRTSDm",
- "VSQRTPDYm")>;
+def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>;
def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 25;
def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI16m",
"DIV_FI32m")>;
-def SKLWriteResGroup205 : SchedWriteRes<[SKLPort0,SKLPort23]> {
- let Latency = 26;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup205], (instregex "SQRTPDm")>;
-
def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 27;
let NumMicroOps = 2;
"VDIVPSZ128rm(b?)(k?)(z?)",
"VDIVPSrm",
"VDIVSSZrm(_Int)?(k?)(z?)",
- "VSQRTSSm")>;
+ "VSQRTSSm",
+ "VSQRTSSZm(_Int)?(k?)(z?)")>;
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
let Latency = 17;
"VDIVPSYrm",
"VDIVPSZ256rm(b?)(k?)(z?)",
"VSQRTPSZ128m(b?)(k?)(z?)",
- "VSQRTPSm",
- "VSQRTSSZm(_Int)?(k?)(z?)")>;
+ "VSQRTPSm")>;
def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 18;
define float @sqrtA(float %a) nounwind uwtable readnone ssp {
; GENERIC-LABEL: sqrtA:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
+; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sqrtA:
define float @sqrtC(float %a) nounwind {
; GENERIC-LABEL: sqrtC:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
+; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sqrtC:
;
; HASWELL-SSE-LABEL: test_divps:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00]
+; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [19:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_divps:
;
; HASWELL-SSE-LABEL: test_divss:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00]
+; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [18:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_divss:
;
; HASWELL-SSE-LABEL: test_sqrtps:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00]
-; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [17:1.00]
; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
-; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
+; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:1.00]
+; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [17:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_sqrtps:
; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00]
-; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00]
+; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:1.00]
+; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [16:1.00]
; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_sqrtps:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
-; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
+; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:1.00]
+; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [16:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_sqrtps:
; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00]
-; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00]
+; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:1.00]
+; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00]
; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
;
; SANDY-LABEL: test_sqrtss:
; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
+; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
+; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_sqrtss:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:1.00]
; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_sqrtss:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
+; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
+; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_sqrtss:
; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:1.00]
; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:1.00]
; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_sqrtss:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
+; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:1.00]
; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
+; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_sqrtss:
; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:1.00]
; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00]
+; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:1.00]
; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
;
; HASWELL-SSE-LABEL: test_divpd:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
-; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [26:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_divpd:
;
; HASWELL-SSE-LABEL: test_divsd:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
-; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [25:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_divsd:
define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; GENERIC-LABEL: test_sqrtpd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00]
-; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00]
+; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:1.00]
+; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
;
; SANDY-SSE-LABEL: test_sqrtpd:
; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00]
-; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:1.00]
+; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_sqrtpd:
; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00]
-; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [28:1.00]
+; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
+; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_sqrtpd:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
-; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:1.00]
+; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [22:1.00]
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_sqrtpd:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
-; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
+; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:1.00]
+; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [22:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_sqrtpd:
; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
-; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [25:1.00]
+; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:1.00]
+; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [21:1.00]
; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_sqrtpd:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
-; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:1.00]
+; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:1.00]
+; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_sqrtpd:
; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
-; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:1.00]
+; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:1.00]
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; GENERIC-LABEL: test_sqrtsd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00]
+; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:1.00]
; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00]
+; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:1.00]
; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
;
; SANDY-SSE-LABEL: test_sqrtsd:
; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:1.00]
; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:1.00]
; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
;
; HASWELL-SSE-LABEL: test_sqrtsd:
; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:1.00]
; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:1.00]
; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_sqrtsd:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
+; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:1.00]
; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00]
+; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_sqrtsd:
; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:1.00]
; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:1.00]
; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_sqrtsd:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
+; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:1.00]
; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00]
+; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_sqrtsd:
; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00]
; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00]
; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;