From 23578e7d3cb0bff9d4b29d3bab1c75a03b101cfd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 16 Mar 2018 14:01:01 +0000 Subject: [PATCH] [X86][Btver2] Add correct mul/imul schedule costs Integer multiply is performed on the JMul function unit and i64 requires double pumping llvm-svn: 327707 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 15 ++++++++++++++- llvm/test/CodeGen/X86/schedule-x86_64.ll | 8 ++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 88a8560..58c67e6 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -117,7 +117,7 @@ def : WriteRes; //////////////////////////////////////////////////////////////////////////////// defm : JWriteResIntPair; -defm : JWriteResIntPair; +defm : JWriteResIntPair; // i8/i16/i32 multiplication defm : JWriteResIntPair; // Worst case (i64 division) def : WriteRes { @@ -152,6 +152,19 @@ def JWriteTZCNTLd : SchedWriteRes<[JLAGU, JALU01]> { def : InstRW<[JWriteTZCNT], (instrs TZCNT16rr, TZCNT32rr, TZCNT64rr)>; def : InstRW<[JWriteTZCNTLd], (instrs TZCNT16rm, TZCNT32rm, TZCNT64rm)>; +def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> { + let Latency = 6; + let ResourceCycles = [1, 4]; + let NumMicroOps = 2; +} +def JWriteIMul64Ld : SchedWriteRes<[JLAGU, JALU1, JMul]> { + let Latency = 9; + let ResourceCycles = [1, 1, 4]; + let NumMicroOps = 2; +} +def : InstRW<[JWriteIMul64], (instrs MUL64r, IMUL64r)>; +def : InstRW<[JWriteIMul64Ld], (instrs MUL64m, IMUL64m)>; + def JWriteIDiv8 : SchedWriteRes<[JALU1, JDiv]> { let Latency = 12; let ResourceCycles = [1, 12]; diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 17cb286..cd056a1 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -6078,8 +6078,8 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; BTVER2-LABEL: test_imul_64: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: imulq %rdi # sched: [3:1.00] -; BTVER2-NEXT: imulq (%rsi) # sched: [6:1.00] +; BTVER2-NEXT: imulq %rdi # sched: [6:4.00] +; BTVER2-NEXT: imulq (%rsi) # sched: [9:4.00] ; BTVER2-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [6:1.00] ; BTVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -8093,8 +8093,8 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; BTVER2-NEXT: mulw (%r9) # sched: [6:1.00] ; BTVER2-NEXT: mull %edx # sched: [3:1.00] ; BTVER2-NEXT: mull (%rax) # sched: [6:1.00] -; BTVER2-NEXT: mulq %rcx # sched: [3:1.00] -; BTVER2-NEXT: mulq (%r10) # sched: [6:1.00] +; BTVER2-NEXT: mulq %rcx # sched: [6:4.00] +; BTVER2-NEXT: mulq (%r10) # sched: [9:4.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retq # sched: [4:1.00] ; -- 2.7.4