From: Craig Topper Date: Thu, 22 Mar 2018 19:22:51 +0000 (+0000) Subject: [X86] Correct the scheduling data for some of the 32 and 64 bit multiplies to as... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4a3be6e5786f6a0ad96fc2e98f10ef801c5bdb93;p=platform%2Fupstream%2Fllvm.git [X86] Correct the scheduling data for some of the 32 and 64 bit multiplies to as best as I understand how they are implemented. llvm-svn: 328231 --- diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 2b2bec0..76199f0 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -1445,8 +1445,7 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup42], (instrs IMUL32r, IMUL64r, MUL32r, MUL64r)>; -def: InstRW<[BWWriteResGroup42], (instrs MULX64rr)>; +def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr)>; def: InstRW<[BWWriteResGroup42], (instregex "CVTDQ2PDrr", "CVTPD2DQrr", "CVTPD2PSrr", @@ -1662,11 +1661,11 @@ def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> { def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>; def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { - let Latency = 5; + let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup52], (instrs MULX32rr)>; +def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>; def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> { let Latency = 5; @@ -2433,7 +2432,6 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup91], (instrs IMUL64m, MUL64m)>; def: InstRW<[BWWriteResGroup91], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi8, IMUL64rmi32)>; def: InstRW<[BWWriteResGroup91], (instrs IMUL8m, MUL8m)>; def: InstRW<[BWWriteResGroup91], (instregex "ADDPDrm", @@ -2518,13 +2516,6 @@ def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { } def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>; -def BWWriteResGroup91_32 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup91_32], (instrs IMUL32m, MUL32m)>; - def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { let Latency = 8; let NumMicroOps = 2; @@ -2733,7 +2724,7 @@ def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup107], (instrs MULX64rm)>; +def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm)>; def: InstRW<[BWWriteResGroup107], (instregex "CVTDQ2PDrm", "CVTPD2DQrm", "CVTPD2PSrm", @@ -2900,11 +2891,11 @@ def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>; def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> { - let Latency = 10; + let Latency = 9; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[BWWriteResGroup121], (instrs MULX32rm)>; +def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>; def BWWriteResGroup122 : SchedWriteRes<[BWPort0]> { let Latency = 11; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b47efe9..30bd423 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1235,8 +1235,8 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m, MUL32m, MUL64m, - IMUL8m, IMUL16m, IMUL32m, IMUL64m, +def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m, + IMUL8m, IMUL16m, IMUL16rm, IMUL16rmi, IMUL16rmi8, IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; def: InstRW<[HWWriteResGroup12], (instregex "BSF(16|32|64)rm", "BSR(16|32|64)rm", @@ -2412,12 +2412,6 @@ def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort0156]> { } def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>; -def HWWriteResGroup74_32 : SchedWriteRes<[HWPort1,HWPort0156]> { - let Latency = 4; - let NumMicroOps = 3; -} -def: InstRW<[HWWriteResGroup74_32], (instrs IMUL32r, MUL32r)>; - def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -2481,7 +2475,7 @@ def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup79], (instrs MULX64rm)>; +def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>; def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; @@ -2710,11 +2704,11 @@ def HWWriteResGroup94 : SchedWriteRes<[HWPort1,HWPort6,HWPort06]> { def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>; def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { - let Latency = 5; + let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup95], (instrs MULX32rr)>; +def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>; def HWWriteResGroup96 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let Latency = 11; @@ -2744,11 +2738,11 @@ def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>; def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> { - let Latency = 10; + let Latency = 9; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[HWWriteResGroup98], (instrs MULX32rm)>; +def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>; def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> { let Latency = 5; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 1ce56cb..87076b8 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -858,6 +858,7 @@ def: InstRW<[SBWriteResGroup26_2], (instregex "COM_FIPr", "UCOM_FIPr", "UCOM_FIr")>; +// FIXME: this is probably incorrect. def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> { let Latency = 4; let NumMicroOps = 2; @@ -1746,6 +1747,7 @@ def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm", "CVTTSD2SIrm", "CVTTSS2SI64rm", "CVTTSS2SIrm")>; +// FIXME this is probably incorrect. def: InstRW<[SBWriteResGroup93], (instrs MUL16m, MUL32m, MUL64m)>; def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 4852d53..2fe60f2 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -1687,12 +1687,11 @@ def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> { def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>; def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { - let Latency = 5; + let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r)>; -def: InstRW<[SKLWriteResGroup62], (instrs MULX32rr)>; +def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>; def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 5; @@ -2438,8 +2437,7 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup107], (instrs IMUL64m, MUL64m)>; -def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; +def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; def: InstRW<[SKLWriteResGroup107], (instrs IMUL8m, MUL8m)>; def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm", "BSR(16|32|64)rm", @@ -2462,13 +2460,6 @@ def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> } def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>; -def SKLWriteResGroup107_32 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup107_32], (instrs IMUL32m, MUL32m)>; - def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 8; let NumMicroOps = 2; @@ -2786,7 +2777,7 @@ def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup127], (instrs MULX64rm)>; +def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>; def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 9; @@ -2968,11 +2959,11 @@ def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDDYrm", "VPHSUBWYrm")>; def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> { - let Latency = 10; + let Latency = 9; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup142], (instrs MULX32rm)>; +def: InstRW<[SKLWriteResGroup142], (instrs IMUL32rm, MUL32m, MULX32rm)>; def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 10; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index cf125db..22044e2 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -2575,15 +2575,12 @@ def: InstRW<[SKXWriteResGroup51], (instregex "MPSADBWrri", "VPMOVWBZ256rr(b?)(k?)(z?)", "VPMOVWBZrr(b?)(k?)(z?)")>; -// FIXME: IMUL32r/MUL32r should be uops lik SkylakeClient. def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup52], (instrs IMUL32r, IMUL64r)>; -def: InstRW<[SKXWriteResGroup52], (instrs MUL32r, MUL64r)>; -def: InstRW<[SKXWriteResGroup52], (instrs MULX64rr)>; +def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>; def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { let Latency = 4; @@ -2775,11 +2772,11 @@ def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { - let Latency = 5; + let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup64], (instrs MULX32rr)>; +def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>; def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> { let Latency = 5; @@ -3907,7 +3904,6 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup118], (instrs IMUL64m, MUL64m)>; def: InstRW<[SKXWriteResGroup118], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; def: InstRW<[SKXWriteResGroup118], (instrs IMUL8m, MUL8m)>; def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm", @@ -3931,13 +3927,6 @@ def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> } def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>; -def SKXWriteResGroup118_32 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup118_32], (instrs IMUL32m, MUL32m)>; - def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 8; let NumMicroOps = 2; @@ -4660,7 +4649,7 @@ def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup142], (instrs MULX64rm)>; +def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>; def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { let Latency = 9; @@ -5059,11 +5048,11 @@ def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDDYrm", "VPHSUBWYrm")>; def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> { - let Latency = 10; + let Latency = 9; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKXWriteResGroup156], (instrs MULX32rm)>; +def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>; def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { let Latency = 10; diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll index 7effa1e..1ccd7c3 100644 --- a/llvm/test/CodeGen/X86/bmi2-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi2-schedule.ll @@ -118,32 +118,32 @@ define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize { ; HASWELL-LABEL: test_mulx_i32: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00] -; HASWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00] +; HASWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] +; HASWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_mulx_i32: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00] -; BROADWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00] +; BROADWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] +; BROADWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulx_i32: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00] -; SKYLAKE-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00] +; SKYLAKE-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] +; SKYLAKE-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_mulx_i32: ; KNL: # %bb.0: ; KNL-NEXT: #APP -; KNL-NEXT: mulxl %esi, %esi, %edi # sched: [5:1.00] -; KNL-NEXT: mulxl (%rdx), %esi, %edi # sched: [10:1.00] +; KNL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] +; KNL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] ; KNL-NEXT: #NO_APP ; KNL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 77f047a..a3e0a49 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -5852,7 +5852,7 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP ; HASWELL-NEXT: imull %edi # sched: [4:1.00] -; HASWELL-NEXT: imull (%rsi) # sched: [8:1.00] +; HASWELL-NEXT: imull (%rsi) # sched: [9:1.00] ; HASWELL-NEXT: imull %edi, %edi # sched: [3:1.00] ; HASWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; HASWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 @@ -5868,7 +5868,7 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP ; BROADWELL-NEXT: imull %edi # sched: [4:1.00] -; BROADWELL-NEXT: imull (%rsi) # sched: [8:1.00] +; BROADWELL-NEXT: imull (%rsi) # sched: [9:1.00] ; BROADWELL-NEXT: imull %edi, %edi # sched: [3:1.00] ; BROADWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; BROADWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 @@ -5883,10 +5883,10 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; SKYLAKE-LABEL: test_imul_32: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: imull %edi # sched: [5:1.00] +; SKYLAKE-NEXT: imull %edi # sched: [4:1.00] ; SKYLAKE-NEXT: imull (%rsi) # sched: [8:1.00] ; SKYLAKE-NEXT: imull %edi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [8:1.00] +; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [9:1.00] ; SKYLAKE-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 ; SKYLAKE-NEXT: # sched: [3:1.00] ; SKYLAKE-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 @@ -5900,7 +5900,7 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; SKX: # %bb.0: ; SKX-NEXT: #APP ; SKX-NEXT: imull %edi # sched: [4:1.00] -; SKX-NEXT: imull (%rsi) # sched: [8:1.00] +; SKX-NEXT: imull (%rsi) # sched: [9:1.00] ; SKX-NEXT: imull %edi, %edi # sched: [3:1.00] ; SKX-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; SKX-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 @@ -6015,7 +6015,7 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP ; HASWELL-NEXT: imulq %rdi # sched: [4:1.00] -; HASWELL-NEXT: imulq (%rsi) # sched: [8:1.00] +; HASWELL-NEXT: imulq (%rsi) # sched: [9:1.00] ; HASWELL-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; HASWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; HASWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -6031,7 +6031,7 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP ; BROADWELL-NEXT: imulq %rdi # sched: [4:1.00] -; BROADWELL-NEXT: imulq (%rsi) # sched: [8:1.00] +; BROADWELL-NEXT: imulq (%rsi) # sched: [9:1.00] ; BROADWELL-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; BROADWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; BROADWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -6047,7 +6047,7 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP ; SKYLAKE-NEXT: imulq %rdi # sched: [4:1.00] -; SKYLAKE-NEXT: imulq (%rsi) # sched: [8:1.00] +; SKYLAKE-NEXT: imulq (%rsi) # sched: [9:1.00] ; SKYLAKE-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; SKYLAKE-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; SKYLAKE-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -6063,7 +6063,7 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; SKX: # %bb.0: ; SKX-NEXT: #APP ; SKX-NEXT: imulq %rdi # sched: [4:1.00] -; SKX-NEXT: imulq (%rsi) # sched: [8:1.00] +; SKX-NEXT: imulq (%rsi) # sched: [9:1.00] ; SKX-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; SKX-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; SKX-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -8028,9 +8028,9 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; HASWELL-NEXT: mulw %si # sched: [4:1.00] ; HASWELL-NEXT: mulw (%r9) # sched: [8:1.00] ; HASWELL-NEXT: mull %edx # sched: [4:1.00] -; HASWELL-NEXT: mull (%rax) # sched: [8:1.00] +; HASWELL-NEXT: mull (%rax) # sched: [9:1.00] ; HASWELL-NEXT: mulq %rcx # sched: [4:1.00] -; HASWELL-NEXT: mulq (%r10) # sched: [8:1.00] +; HASWELL-NEXT: mulq (%r10) # sched: [9:1.00] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retq # sched: [7:1.00] ; @@ -8044,9 +8044,9 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; BROADWELL-NEXT: mulw %si # sched: [4:1.00] ; BROADWELL-NEXT: mulw (%r9) # sched: [8:1.00] ; BROADWELL-NEXT: mull %edx # sched: [4:1.00] -; BROADWELL-NEXT: mull (%rax) # sched: [8:1.00] +; BROADWELL-NEXT: mull (%rax) # sched: [9:1.00] ; BROADWELL-NEXT: mulq %rcx # sched: [4:1.00] -; BROADWELL-NEXT: mulq (%r10) # sched: [8:1.00] +; BROADWELL-NEXT: mulq (%r10) # sched: [9:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -8059,10 +8059,10 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SKYLAKE-NEXT: mulb (%r8) # sched: [8:1.00] ; SKYLAKE-NEXT: mulw %si # sched: [4:1.00] ; SKYLAKE-NEXT: mulw (%r9) # sched: [8:1.00] -; SKYLAKE-NEXT: mull %edx # sched: [5:1.00] -; SKYLAKE-NEXT: mull (%rax) # sched: [8:1.00] +; SKYLAKE-NEXT: mull %edx # sched: [4:1.00] +; SKYLAKE-NEXT: mull (%rax) # sched: [9:1.00] ; SKYLAKE-NEXT: mulq %rcx # sched: [4:1.00] -; SKYLAKE-NEXT: mulq (%r10) # sched: [8:1.00] +; SKYLAKE-NEXT: mulq (%r10) # sched: [9:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; @@ -8076,9 +8076,9 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SKX-NEXT: mulw %si # sched: [4:1.00] ; SKX-NEXT: mulw (%r9) # sched: [8:1.00] ; SKX-NEXT: mull %edx # sched: [4:1.00] -; SKX-NEXT: mull (%rax) # sched: [8:1.00] +; SKX-NEXT: mull (%rax) # sched: [9:1.00] ; SKX-NEXT: mulq %rcx # sched: [4:1.00] -; SKX-NEXT: mulq (%r10) # sched: [8:1.00] +; SKX-NEXT: mulq (%r10) # sched: [9:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retq # sched: [7:1.00] ;