From f4fbe4a51be5f30c77245f4b546bc62eef2e6ffd Mon Sep 17 00:00:00 2001 From: "Andrew V. Tischenko" Date: Tue, 24 Oct 2017 13:38:30 +0000 Subject: [PATCH] Update f16c instruction scheduling on btver2. Differential Revision: https://reviews.llvm.org/D39051 llvm-svn: 316435 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 50 ++++++++++++++++++++++++ llvm/test/CodeGen/X86/f16c-schedule.ll | 66 ++++++++++++++++---------------- 2 files changed, 83 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 2fe0396..38657d4 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -419,6 +419,56 @@ def WriteINSERTQ: SchedWriteRes<[JFPU01]> { def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; //////////////////////////////////////////////////////////////////////////////// +// F16C instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteCVT3: SchedWriteRes<[JFPU1]> { + let Latency = 3; +} +def : InstRW<[WriteCVT3], (instregex "VCVTPS2PHrr")>; +def : InstRW<[WriteCVT3], (instregex "VCVTPH2PSrr")>; + +def WriteCVT3St: SchedWriteRes<[JFPU1, JLAGU]> { + let Latency = 3; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVT3St], (instregex "VCVTPS2PHmr")>; + +def WriteCVT3Ld: SchedWriteRes<[JFPU1, JLAGU]> { + let Latency = 8; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVT3Ld], (instregex "VCVTPH2PSrm")>; + +def WriteCVTPS2PHY: SchedWriteRes<[JFPU1, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2,2]; + let NumMicroOps = 3; +} +def : InstRW<[WriteCVTPS2PHY], (instregex "VCVTPS2PHYrr")>; + +def WriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JFPU01, JLAGU]> { + let Latency = 11; + let ResourceCycles = [2,2,1]; + let NumMicroOps = 3; +} +def : InstRW<[WriteCVTPS2PHYSt], (instregex "VCVTPS2PHYmr")>; + +def WriteCVTPH2PSY: SchedWriteRes<[JFPU1]> { + let Latency = 3; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTPH2PSY], (instregex "VCVTPH2PSYrr")>; + +def WriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 8; + let ResourceCycles = [1,2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTPH2PSYLd], (instregex "VCVTPH2PSYrm")>; + +//////////////////////////////////////////////////////////////////////////////// // AVX instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/test/CodeGen/X86/f16c-schedule.ll b/llvm/test/CodeGen/X86/f16c-schedule.ll index 654fef4..88ec56e 100644 --- a/llvm/test/CodeGen/X86/f16c-schedule.ll +++ b/llvm/test/CodeGen/X86/f16c-schedule.ll @@ -13,49 +13,49 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] ; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtph2ps_128: ; IVY: # BB#0: ; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] ; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] ; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; IVY-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtph2ps_128: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_vcvtph2ps_128: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00] ; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_vcvtph2ps_128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50] ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: ret{{[l|q]}} # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtph2ps_128: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] ; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: ret{{[l|q]}} # sched: [4:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_vcvtph2ps_128: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [100:?] ; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [100:?] ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: ret{{[l|q]}} # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = load <8 x i16>, <8 x i16> *%a1 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) @@ -70,49 +70,49 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] ; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtph2ps_256: ; IVY: # BB#0: ; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] ; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] ; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; IVY-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtph2ps_256: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00] ; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_vcvtph2ps_256: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00] ; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_vcvtph2ps_256: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50] ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: ret{{[l|q]}} # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtph2ps_256: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: ret{{[l|q]}} # sched: [4:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_vcvtph2ps_256: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [100:?] ; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [100:?] ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: ret{{[l|q]}} # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = load <8 x i16>, <8 x i16> *%a1 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) @@ -126,43 +126,43 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> ; GENERIC: # BB#0: ; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00] -; GENERIC-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtps2ph_128: ; IVY: # BB#0: ; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] ; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00] -; IVY-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtps2ph_128: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; HASWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_vcvtps2ph_128: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] ; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; BROADWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_vcvtps2ph_128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00] -; SKYLAKE-NEXT: ret{{[l|q]}} # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtps2ph_128: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00] -; BTVER2-NEXT: ret{{[l|q]}} # sched: [4:1.00] +; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [3:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_vcvtps2ph_128: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:?] ; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:?] -; ZNVER1-NEXT: ret{{[l|q]}} # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0) %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> @@ -177,48 +177,48 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> ; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] ; GENERIC-NEXT: vzeroupper -; GENERIC-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; IVY-LABEL: test_vcvtps2ph_256: ; IVY: # BB#0: ; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] ; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] ; IVY-NEXT: vzeroupper -; IVY-NEXT: ret{{[l|q]}} # sched: [1:1.00] +; IVY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_vcvtps2ph_256: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00] ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_vcvtps2ph_256: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: ret{{[l|q]}} # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_vcvtps2ph_256: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: ret{{[l|q]}} # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtps2ph_256: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] -; BTVER2-NEXT: ret{{[l|q]}} # sched: [4:1.00] +; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00] +; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [11:2.00] +; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_vcvtps2ph_256: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:?] ; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:?] ; ZNVER1-NEXT: vzeroupper # sched: [100:?] -; ZNVER1-NEXT: ret{{[l|q]}} # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0) store <8 x i16> %2, <8 x i16> *%a2 -- 2.7.4