From: Andrew V. Tischenko Date: Wed, 1 Nov 2017 16:10:20 +0000 (+0000) Subject: Update VCVTx, VMOVNTPx and VROUNDYPx instructions scheduling on btver2. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3d971e39f8a41b10e0a55232ce83be5c00a29d28;p=platform%2Fupstream%2Fllvm.git Update VCVTx, VMOVNTPx and VROUNDYPx instructions scheduling on btver2. Differential Revision: https://reviews.llvm.org/D39059 llvm-svn: 317101 --- diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index e30bc3b..f3e114a 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -534,6 +534,45 @@ def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { } def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; +def WriteVCVTY: SchedWriteRes<[JSTC]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVCVTY], (instregex "VCVTDQ2P(S|D)Yrr")>; +def : InstRW<[WriteVCVTY], (instregex "VROUNDYP(S|D)r")>; +def : InstRW<[WriteVCVTY], (instregex "VCVTPS2DQYrr")>; +def : InstRW<[WriteVCVTY], (instregex "VCVTTPS2DQYrr")>; + +def WriteVCVTYLd: SchedWriteRes<[JLAGU, JSTC]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTDQ2P(S|D)Yrm")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VROUNDYP(S|D)m")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTTPS2DQYrm")>; + +def WriteVMONTPSt: SchedWriteRes<[JSTC, JLAGU]> { + let Latency = 3; + let ResourceCycles = [2,1]; +} +def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTP(S|D)Ymr")>; +def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTDQYmr")>; + +def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2, 4]; +} +def : InstRW<[WriteVCVTPDY], (instregex "VCVTPD2(DQ|PS)Yrr")>; +def : InstRW<[WriteVCVTPDY], (instregex "VCVTTPD2DQYrr")>; + +def WriteVCVTPDYLd: SchedWriteRes<[JLAGU, JSTC, JFPU01]> { + let Latency = 11; + let ResourceCycles = [1, 2, 4]; +} +def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>; +def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTTPD2DQYrm")>; + def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 055f481..6dc0dd5 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -1090,8 +1090,8 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1155,8 +1155,8 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1218,8 +1218,8 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00] +; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00] ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1282,8 +1282,8 @@ define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00] +; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00] ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1345,8 +1345,8 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00] +; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00] ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1408,8 +1408,8 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -1472,8 +1472,8 @@ define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # BB#0: -; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -2847,7 +2847,7 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntpd: @@ -2900,7 +2900,7 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movntps: @@ -4053,8 +4053,8 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -4117,8 +4117,8 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00] +; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ;