Intel models were targeting x87 instead of packed sse.
Also fixes XOP's VFRCZ to use WriteFAdd/WriteFAddY.
llvm-svn: 331340
PatFrag memop> {
def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[WriteFAdd]>;
+ [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[WriteFAddY]>;
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
- Sched<[WriteFAddLd, ReadAfterLd]>;
+ Sched<[WriteFAddYLd, ReadAfterLd]>;
}
let ExeDomain = SSEPackedSingle in {
// scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
+
def BroadwellModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and BW can decode 4
// instructions per cycle.
def : WriteRes<WriteFMove, [BWPort5]>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
-defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
+defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 6>; // Floating point add/sub (YMM/ZMM).
defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare.
-defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 7>; // Floating point compare (YMM/ZMM).
+defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point compare (YMM/ZMM).
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort0], 5, [1], 1, 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFMulY, [BWPort0], 5, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
}
def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
- "VADDPDYrm",
- "VADDPSYrm",
- "VADDSUBPDYrm",
- "VADDSUBPSYrm",
- "VCMPPDYrmi",
- "VCMPPSYrmi",
"VCVTPS2DQYrm",
- "VCVTTPS2DQYrm",
- "VMAX(C?)PDYrm",
- "VMAX(C?)PSYrm",
- "VMIN(C?)PDYrm",
- "VMIN(C?)PSYrm",
- "VSUBPDYrm",
- "VSUBPSYrm")>;
+ "VCVTTPS2DQYrm")>;
def BWWriteResGroup102 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 9;
def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteFMove, [HWPort5]>;
-defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
+defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm",
"PDEP(32|64)rm",
"PEXT(32|64)rm",
+ "(V?)ADDSDrm",
+ "(V?)ADDSSrm",
"(V?)CMPSDrm",
"(V?)CMPSSrm",
"(V?)MAX(C?)SDrm",
"(V?)MAX(C?)SSrm",
"(V?)MIN(C?)SDrm",
- "(V?)MIN(C?)SSrm")>;
+ "(V?)MIN(C?)SSrm",
+ "(V?)SUBSDrm",
+ "(V?)SUBSSrm")>;
def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup52], (instregex "(V?)ADDPDrm",
- "(V?)ADDPSrm",
- "(V?)ADDSUBPDrm",
- "(V?)ADDSUBPSrm",
- "(V?)CVTPS2DQrm",
- "(V?)CVTTPS2DQrm",
- "(V?)SUBPDrm",
- "(V?)SUBPSrm")>;
+def: InstRW<[HWWriteResGroup52], (instregex "(V?)CVTPS2DQrm",
+ "(V?)CVTTPS2DQrm")>;
def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 10;
def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
def : WriteRes<WriteFMove, [SBPort5]>;
-defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 5>;
+defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVBE_F",
- "CMOVB_F",
- "CMOVE_F",
- "CMOVNBE_F",
- "CMOVNB_F",
- "CMOVNE_F",
- "CMOVNP_F",
- "CMOVP_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOV(N?)(B|BE|E|P)_F")>;
def SBWriteResGroup26 : SchedWriteRes<[SBPort05,SBPort015]> {
let Latency = 3;
}
def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm",
"MMX_CVTTPS2PIirm",
- "(V?)ADDPDrm",
- "(V?)ADDPSrm",
- "(V?)ADDSDrm",
- "(V?)ADDSSrm",
- "(V?)ADDSUBPDrm",
- "(V?)ADDSUBPSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm",
"(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",
- "(V?)ROUNDSSm",
- "(V?)SUBPDrm",
- "(V?)SUBPSrm",
- "(V?)SUBSDrm",
- "(V?)SUBSSrm")>;
+ "(V?)ROUNDSSm")>;
def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 9;
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteFMove, [SKLPort015]>;
-defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3, [1], 1, 5>; // Floating point add/sub.
-defm : SKLWriteResPair<WriteFAddY, [SKLPort1], 3, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
+defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub.
+defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare.
defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
+def: InstRW<[SKLWriteResGroup29], (instregex "CMOV(N?)(B|BE|E|P)_F",
+ "PDEP(32|64)rr",
"PEXT(32|64)rr",
"SHLD(16|32|64)rri8",
"SHRD(16|32|64)rri8")>;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr",
- "(V?)ADDPS(Y?)rr",
- "(V?)ADDSDrr",
- "(V?)ADDSSrr",
- "(V?)ADDSUBPD(Y?)rr",
- "(V?)ADDSUBPS(Y?)rr",
- "(V?)CVTDQ2PS(Y?)rr",
+def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
"(V?)MULPD(Y?)rr",
"(V?)PMULHUW(Y?)rr",
"(V?)PMULHW(Y?)rr",
"(V?)PMULLW(Y?)rr",
- "(V?)PMULUDQ(Y?)rr",
- "(V?)SUBPD(Y?)rr",
- "(V?)SUBPS(Y?)rr",
- "(V?)SUBSDrr",
- "(V?)SUBSSrr")>;
+ "(V?)PMULUDQ(Y?)rr")>;
def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm",
- "(V?)ADDPSrm",
- "(V?)ADDSUBPDrm",
- "(V?)ADDSUBPSrm",
- "(V?)CVTDQ2PSrm",
+def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm",
"(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
- "(V?)PMULUDQrm",
- "(V?)SUBPDrm",
- "(V?)SUBPSrm")>;
+ "(V?)PMULUDQrm")>;
def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 10;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm",
- "VADDPSYrm",
- "VADDSUBPDYrm",
- "VADDSUBPSYrm",
- "VCVTDQ2PSYrm",
+def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
"VCVTPS2DQYrm",
"VCVTPS2PDYrm",
"VCVTTPS2DQYrm",
"VPMULHUWYrm",
"VPMULHWYrm",
"VPMULLWYrm",
- "VPMULUDQYrm",
- "VSUBPDYrm",
- "VSUBPSYrm")>;
+ "VPMULUDQYrm")>;
def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 11;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup137], (instregex "ADDSDrm",
- "ADDSSrm",
- "CMPSDrm",
- "CMPSSrm",
- "CVTPS2PDrm",
- "MAX(C?)SDrm",
- "MAX(C?)SSrm",
- "MIN(C?)SDrm",
- "MIN(C?)SSrm",
- "MMX_CVTPS2PIirm",
+def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTPS2PIirm",
"MMX_CVTTPS2PIirm",
- "MULSDrm",
- "MULSSrm",
- "SUBSDrm",
- "SUBSSrm",
- "VADDSDrm",
- "VADDSSrm",
- "VCMPSDrm",
- "VCMPSSrm",
+ "(V?)ADDSDrm",
+ "(V?)ADDSSrm",
+ "(V?)CMPSDrm",
+ "(V?)CMPSSrm",
"VCVTPH2PSrm",
- "VCVTPS2PDrm",
- "VMAX(C?)SDrm",
- "VMAX(C?)SSrm",
- "VMIN(C?)SDrm",
- "VMIN(C?)SSrm",
- "VMULSDrm",
- "VMULSSrm",
- "VSUBSDrm",
- "VSUBSSrm")>;
+ "(V?)CVTPS2PDrm",
+ "(V?)MAX(C?)SDrm",
+ "(V?)MAX(C?)SSrm",
+ "(V?)MIN(C?)SDrm",
+ "(V?)MIN(C?)SSrm",
+ "(V?)MULSDrm",
+ "(V?)MULSSrm",
+ "(V?)SUBSDrm",
+ "(V?)SUBSSrm")>;
def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
let Latency = 9;
; CHECK-LABEL: test_pfacc:
; CHECK: # %bb.0:
; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfadd:
; CHECK: # %bb.0:
; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfcmpeq:
; CHECK: # %bb.0:
; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfcmpge:
; CHECK: # %bb.0:
; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfcmpgt:
; CHECK: # %bb.0:
; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfmax:
; CHECK: # %bb.0:
; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfmin:
; CHECK: # %bb.0:
; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfmul:
; CHECK: # %bb.0:
; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfnacc:
; CHECK: # %bb.0:
; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfpnacc:
; CHECK: # %bb.0:
; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1)
define i64 @test_pfrcp(x86_mmx* %a0) optsize {
; CHECK-LABEL: test_pfrcp:
; CHECK: # %bb.0:
-; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
; CHECK-LABEL: test_pfrcpit1:
; CHECK: # %bb.0:
; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfrcpit2:
; CHECK: # %bb.0:
; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfrsqit1:
; CHECK: # %bb.0:
; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1)
define i64 @test_pfrsqrt(x86_mmx* %a0) optsize {
; CHECK-LABEL: test_pfrsqrt:
; CHECK: # %bb.0:
-; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
; CHECK-LABEL: test_pfsub:
; CHECK: # %bb.0:
; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1)
; CHECK-LABEL: test_pfsubr:
; CHECK: # %bb.0:
; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [8:1.00]
+; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [9:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1)
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [8:1.00]
+; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [8:1.00]
+; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00]
+; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [8:1.00]
+; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;