From e93fd5f1e4188d89ef6515c0941faf5d4b45e946 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 2 May 2018 09:18:49 +0000 Subject: [PATCH] [X86] Cleanup WriteFAdd/WriteFCmp scheduler classes with more common default values Intel models were targeting x87 instead of packed sse. Also fixes XOP's VFRCZ to use WriteFAdd/WriteFAddY. llvm-svn: 331340 --- llvm/lib/Target/X86/X86InstrXOP.td | 4 +-- llvm/lib/Target/X86/X86SchedBroadwell.td | 19 +++---------- llvm/lib/Target/X86/X86SchedHaswell.td | 18 ++++++------ llvm/lib/Target/X86/X86SchedSandyBridge.td | 23 ++-------------- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 41 +++++++--------------------- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 41 ++++++++++------------------ llvm/test/CodeGen/X86/3dnow-schedule.ll | 34 +++++++++++------------ llvm/test/CodeGen/X86/xop-schedule.ll | 12 ++++---- 8 files changed, 64 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index 5055c7e..ef9230d 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -66,11 +66,11 @@ multiclass xop2op256 opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def Yrr : IXOP, XOP, VEX_L, Sched<[WriteFAdd]>; + [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[WriteFAddY]>; def Yrm : IXOP, XOP, VEX_L, - Sched<[WriteFAddLd, ReadAfterLd]>; + Sched<[WriteFAddYLd, ReadAfterLd]>; } let ExeDomain = SSEPackedSingle in { diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index f900d79..0531ef5 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -11,6 +11,7 @@ // scheduling and other instruction cost heuristics. // //===----------------------------------------------------------------------===// + def BroadwellModel : SchedMachineModel { // All x86 instructions are modeled as a single micro-op, and BW can decode 4 // instructions per cycle. @@ -155,9 +156,9 @@ def : WriteRes; def : WriteRes; defm : BWWriteResPair; // Floating point add/sub. -defm : BWWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : BWWriteResPair; // Floating point add/sub (YMM/ZMM). defm : BWWriteResPair; // Floating point compare. -defm : BWWriteResPair; // Floating point compare (YMM/ZMM). +defm : BWWriteResPair; // Floating point compare (YMM/ZMM). defm : BWWriteResPair; // Floating point compare to flags. defm : BWWriteResPair; // Floating point multiplication. defm : BWWriteResPair; // Floating point multiplication (YMM/ZMM). @@ -1368,20 +1369,8 @@ def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m", - "VADDPDYrm", - "VADDPSYrm", - "VADDSUBPDYrm", - "VADDSUBPSYrm", - "VCMPPDYrmi", - "VCMPPSYrmi", "VCVTPS2DQYrm", - "VCVTTPS2DQYrm", - "VMAX(C?)PDYrm", - "VMAX(C?)PSYrm", - "VMIN(C?)PDYrm", - "VMIN(C?)PSYrm", - "VSUBPDYrm", - "VSUBPSYrm")>; + "VCVTTPS2DQYrm")>; def BWWriteResGroup102 : SchedWriteRes<[BWPort5,BWPort23]> { let Latency = 9; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 611ed77..1d68d47 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -148,7 +148,7 @@ def : WriteRes; def : WriteRes { let Latency = 5; } def : WriteRes; -defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -844,12 +844,16 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm", "PDEP(32|64)rm", "PEXT(32|64)rm", + "(V?)ADDSDrm", + "(V?)ADDSSrm", "(V?)CMPSDrm", "(V?)CMPSSrm", "(V?)MAX(C?)SDrm", "(V?)MAX(C?)SSrm", "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm")>; + "(V?)MIN(C?)SSrm", + "(V?)SUBSDrm", + "(V?)SUBSSrm")>; def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> { let Latency = 8; @@ -1392,14 +1396,8 @@ def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup52], (instregex "(V?)ADDPDrm", - "(V?)ADDPSrm", - "(V?)ADDSUBPDrm", - "(V?)ADDSUBPSrm", - "(V?)CVTPS2DQrm", - "(V?)CVTTPS2DQrm", - "(V?)SUBPDrm", - "(V?)SUBPSrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "(V?)CVTPS2DQrm", + "(V?)CVTTPS2DQrm")>; def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> { let Latency = 10; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index c4daa36..b44ee05 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -138,7 +138,7 @@ def : WriteRes; def : WriteRes { let Latency = 6; } def : WriteRes; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -648,14 +648,7 @@ def SBWriteResGroup25_2 : SchedWriteRes<[SBPort5,SBPort05]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVBE_F", - "CMOVB_F", - "CMOVE_F", - "CMOVNBE_F", - "CMOVNB_F", - "CMOVNE_F", - "CMOVNP_F", - "CMOVP_F")>; +def: InstRW<[SBWriteResGroup25_2], (instregex "CMOV(N?)(B|BE|E|P)_F")>; def SBWriteResGroup26 : SchedWriteRes<[SBPort05,SBPort015]> { let Latency = 3; @@ -1300,22 +1293,12 @@ def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm", "MMX_CVTTPS2PIirm", - "(V?)ADDPDrm", - "(V?)ADDPSrm", - "(V?)ADDSDrm", - "(V?)ADDSSrm", - "(V?)ADDSUBPDrm", - "(V?)ADDSUBPSrm", "(V?)CVTPS2DQrm", "(V?)CVTTPS2DQrm", "(V?)ROUNDPDm", "(V?)ROUNDPSm", "(V?)ROUNDSDm", - "(V?)ROUNDSSm", - "(V?)SUBPDrm", - "(V?)SUBPSrm", - "(V?)SUBSDrm", - "(V?)SUBSSrm")>; + "(V?)ROUNDSSm")>; def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 9; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index bb6c79e..601a14ab 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -151,8 +151,8 @@ def : WriteRes { let Latency = 6; } def : WriteRes; def : WriteRes; -defm : SKLWriteResPair; // Floating point add/sub. -defm : SKLWriteResPair; // Floating point add/sub (YMM/ZMM). +defm : SKLWriteResPair; // Floating point add/sub. +defm : SKLWriteResPair; // Floating point add/sub (YMM/ZMM). defm : SKLWriteResPair; // Floating point compare. defm : SKLWriteResPair; // Floating point compare (YMM/ZMM). defm : SKLWriteResPair; // Floating point compare to flags. @@ -726,7 +726,8 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr", +def: InstRW<[SKLWriteResGroup29], (instregex "CMOV(N?)(B|BE|E|P)_F", + "PDEP(32|64)rr", "PEXT(32|64)rr", "SHLD(16|32|64)rri8", "SHRD(16|32|64)rri8")>; @@ -910,13 +911,7 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr", - "(V?)ADDPS(Y?)rr", - "(V?)ADDSDrr", - "(V?)ADDSSrr", - "(V?)ADDSUBPD(Y?)rr", - "(V?)ADDSUBPS(Y?)rr", - "(V?)CVTDQ2PS(Y?)rr", +def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", "(V?)MULPD(Y?)rr", @@ -930,11 +925,7 @@ def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr", "(V?)PMULHUW(Y?)rr", "(V?)PMULHW(Y?)rr", "(V?)PMULLW(Y?)rr", - "(V?)PMULUDQ(Y?)rr", - "(V?)SUBPD(Y?)rr", - "(V?)SUBPS(Y?)rr", - "(V?)SUBSDrr", - "(V?)SUBSSrr")>; + "(V?)PMULUDQ(Y?)rr")>; def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> { let Latency = 4; @@ -1822,11 +1813,7 @@ def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm", - "(V?)ADDPSrm", - "(V?)ADDSUBPDrm", - "(V?)ADDSUBPSrm", - "(V?)CVTDQ2PSrm", +def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm", "(V?)CVTPH2PSYrm", "(V?)CVTPS2DQrm", "(V?)CVTSS2SDrm", @@ -1840,9 +1827,7 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm", "(V?)PMULHUWrm", "(V?)PMULHWrm", "(V?)PMULLWrm", - "(V?)PMULUDQrm", - "(V?)SUBPDrm", - "(V?)SUBPSrm")>; + "(V?)PMULUDQrm")>; def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 10; @@ -1927,11 +1912,7 @@ def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm", - "VADDPSYrm", - "VADDSUBPDYrm", - "VADDSUBPSYrm", - "VCVTDQ2PSYrm", +def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm", "VCVTPS2DQYrm", "VCVTPS2PDYrm", "VCVTTPS2DQYrm", @@ -1944,9 +1925,7 @@ def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm", "VPMULHUWYrm", "VPMULHWYrm", "VPMULLWYrm", - "VPMULUDQYrm", - "VSUBPDYrm", - "VSUBPSYrm")>; + "VPMULUDQYrm")>; def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 11; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 16a6c24..00c38b3 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -3235,35 +3235,22 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup137], (instregex "ADDSDrm", - "ADDSSrm", - "CMPSDrm", - "CMPSSrm", - "CVTPS2PDrm", - "MAX(C?)SDrm", - "MAX(C?)SSrm", - "MIN(C?)SDrm", - "MIN(C?)SSrm", - "MMX_CVTPS2PIirm", +def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTPS2PIirm", "MMX_CVTTPS2PIirm", - "MULSDrm", - "MULSSrm", - "SUBSDrm", - "SUBSSrm", - "VADDSDrm", - "VADDSSrm", - "VCMPSDrm", - "VCMPSSrm", + "(V?)ADDSDrm", + "(V?)ADDSSrm", + "(V?)CMPSDrm", + "(V?)CMPSSrm", "VCVTPH2PSrm", - "VCVTPS2PDrm", - "VMAX(C?)SDrm", - "VMAX(C?)SSrm", - "VMIN(C?)SDrm", - "VMIN(C?)SSrm", - "VMULSDrm", - "VMULSSrm", - "VSUBSDrm", - "VSUBSSrm")>; + "(V?)CVTPS2PDrm", + "(V?)MAX(C?)SDrm", + "(V?)MAX(C?)SSrm", + "(V?)MIN(C?)SDrm", + "(V?)MIN(C?)SSrm", + "(V?)MULSDrm", + "(V?)MULSSrm", + "(V?)SUBSDrm", + "(V?)SUBSSrm")>; def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> { let Latency = 9; diff --git a/llvm/test/CodeGen/X86/3dnow-schedule.ll b/llvm/test/CodeGen/X86/3dnow-schedule.ll index 9369713..c5abfad 100644 --- a/llvm/test/CodeGen/X86/3dnow-schedule.ll +++ b/llvm/test/CodeGen/X86/3dnow-schedule.ll @@ -60,7 +60,7 @@ define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfacc: ; CHECK: # %bb.0: ; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1) @@ -75,7 +75,7 @@ define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1) @@ -90,7 +90,7 @@ define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpeq: ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1) @@ -105,7 +105,7 @@ define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpge: ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1) @@ -120,7 +120,7 @@ define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfcmpgt: ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1) @@ -135,7 +135,7 @@ define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmax: ; CHECK: # %bb.0: ; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1) @@ -150,7 +150,7 @@ define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmin: ; CHECK: # %bb.0: ; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1) @@ -165,7 +165,7 @@ define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfmul: ; CHECK: # %bb.0: ; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1) @@ -180,7 +180,7 @@ define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfnacc: ; CHECK: # %bb.0: ; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1) @@ -195,7 +195,7 @@ define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfpnacc: ; CHECK: # %bb.0: ; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1) @@ -209,7 +209,7 @@ declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrcp(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pfrcp: ; CHECK: # %bb.0: -; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -225,7 +225,7 @@ define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrcpit1: ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1) @@ -240,7 +240,7 @@ define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrcpit2: ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1) @@ -255,7 +255,7 @@ define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfrsqit1: ; CHECK: # %bb.0: ; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1) @@ -269,7 +269,7 @@ declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone define i64 @test_pfrsqrt(x86_mmx* %a0) optsize { ; CHECK-LABEL: test_pfrsqrt: ; CHECK: # %bb.0: -; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] @@ -285,7 +285,7 @@ define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfsub: ; CHECK: # %bb.0: ; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1) @@ -300,7 +300,7 @@ define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK-LABEL: test_pfsubr: ; CHECK: # %bb.0: ; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [8:1.00] +; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1) diff --git a/llvm/test/CodeGen/X86/xop-schedule.ll b/llvm/test/CodeGen/X86/xop-schedule.ll index 04abb901..9d9f2c5 100644 --- a/llvm/test/CodeGen/X86/xop-schedule.ll +++ b/llvm/test/CodeGen/X86/xop-schedule.ll @@ -11,8 +11,8 @@ define void @test_vfrczpd(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [8:1.00] +; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -37,8 +37,8 @@ define void @test_vfrczps(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, < ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [8:1.00] +; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -62,7 +62,7 @@ define void @test_vfrczsd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -82,7 +82,7 @@ define void @test_vfrczss(<4 x float> %a0, <4 x double> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; -- 2.7.4