From f8c75b8794e71cca4e6a9b3bd81e694a4fb868e1 Mon Sep 17 00:00:00 2001 From: "Andrew V. Tischenko" Date: Thu, 9 Nov 2017 14:19:59 +0000 Subject: [PATCH] Sched model improving on btver2: JFPU01 resource, vtestp* for xmm. Differential Revision: https://reviews.llvm.org/D39802 llvm-svn: 317785 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 37 ++++++++++++++++++++++---------- llvm/test/CodeGen/X86/avx-schedule.ll | 16 +++++++------- llvm/test/CodeGen/X86/sse41-schedule.ll | 16 +++++++------- 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index f29ccd3..7fb3bcf 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -199,11 +199,13 @@ defm : JWriteResFpuPair; // Float -> Float size conve def : WriteRes { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [4]; + let NumMicroOps = 3; } def : WriteRes { let Latency = 7; - let ResourceCycles = [1, 2]; + let ResourceCycles = [1, 4]; + let NumMicroOps = 3; } // Vector integer operations. @@ -217,21 +219,20 @@ defm : JWriteResFpuPair; def : WriteRes { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [4]; + let NumMicroOps = 3; } def : WriteRes { let Latency = 7; - let ResourceCycles = [1, 2]; + let ResourceCycles = [1, 4]; + let NumMicroOps = 3; } // FIXME: why do we need to define AVX2 resource on CPU that doesn't have AVX2? -def : WriteRes { - let Latency = 1; - let ResourceCycles = [1]; -} +def : WriteRes {} def : WriteRes { let Latency = 6; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1, 2]; } def : WriteRes { @@ -654,18 +655,32 @@ def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)(Y)?rr")>; // and ALU0 in the integer unit is occupied instead. def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> { let Latency = 4; - let ResourceCycles = [4, 2]; + let ResourceCycles = [2, 2]; + let NumMicroOps = 3; } def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr")>; def : InstRW<[WriteVTESTY], (instregex "VPTESTYrr")>; def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> { let Latency = 9; - let ResourceCycles = [1, 4, 2]; + let ResourceCycles = [1, 2, 2]; + let NumMicroOps = 3; } def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm")>; def : InstRW<[WriteVTESTYLd], (instregex "VPTESTYrm")>; +def WriteVTEST: SchedWriteRes<[JFPU0]> { + let Latency = 3; +} +def : InstRW<[WriteVTEST], (instregex "VTESTP(S|D)rr")>; +def : InstRW<[WriteVTEST], (instregex "VPTESTrr")>; + +def WriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; +} +def : InstRW<[WriteVTESTLd], (instregex "VTESTP(S|D)rm")>; +def : InstRW<[WriteVTESTLd], (instregex "VPTESTrm")>; + def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 858a27b..b5fad3e 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -4611,9 +4611,9 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BTVER2-LABEL: test_testpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4697,9 +4697,9 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:3.00] +; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:2.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:3.00] +; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:2.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4778,9 +4778,9 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BTVER2-LABEL: test_testps: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4864,9 +4864,9 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:3.00] +; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:2.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:3.00] +; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:2.00] ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index bdcefe8..76242c8 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -192,8 +192,8 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendvpd: @@ -259,8 +259,8 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_blendvps: @@ -745,8 +745,8 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # BB#0: -; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pblendvb: @@ -2935,9 +2935,9 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; BTVER2-LABEL: test_ptest: ; BTVER2: # BB#0: -; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: setb %cl # sched: [1:0.50] ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] ; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] -- 2.7.4