From 8cbc1d232bd581539cd4d6030725e36fe113625d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Mar 2018 18:20:35 +0000 Subject: [PATCH] [X86][BTVER2] Fix throughput of YMM bitwise instructions These instructions are double-pumped, split into 2 128-bit ops and then passing through either FPU pipe. Found while testing llvm-mca (D43951) llvm-svn: 326597 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 20 ++++++++++++++++++++ llvm/test/CodeGen/X86/avx-schedule.ll | 24 ++++++++++++------------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index beb0fcd..006662f 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -508,6 +508,26 @@ def : InstRW<[WriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; // AVX instructions. //////////////////////////////////////////////////////////////////////////////// +def WriteLogicY: SchedWriteRes<[JFPU01]> { + let Latency = 1; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteLogicY], (instrs VORPDYrr, VORPSYrr, + VXORPDYrr, VXORPSYrr, + VANDPDYrr, VANDPSYrr, + VANDNPDYrr, VANDNPSYrr)>; + +def WriteLogicYLd: SchedWriteRes<[JLAGU, JFPU01]> { + let Latency = 6; + let ResourceCycles = [1, 2]; + let NumMicroOps = 3; +} +def : InstRW<[WriteLogicYLd], (instrs VORPDYrm, VORPSYrm, + VXORPDYrm, VXORPSYrm, + VANDPDYrm, VANDPSYrm, + VANDNPDYrm, VANDNPSYrm)>; + def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> { let Latency = 12; let ResourceCycles = [6, 6]; diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 9aaca64..164f97f 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -272,7 +272,7 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -341,7 +341,7 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -410,7 +410,7 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -477,7 +477,7 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; BTVER2-LABEL: test_andps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -965,7 +965,7 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] ; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cmppd: @@ -1031,7 +1031,7 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] ; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cmpps: @@ -1415,7 +1415,7 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00] ; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvtps2dq: @@ -1479,7 +1479,7 @@ define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) { ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00] ; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvttps2dq: @@ -3392,7 +3392,7 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) ; ; BTVER2-LABEL: orpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -3459,7 +3459,7 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; ; BTVER2-LABEL: test_orps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -5247,7 +5247,7 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -5314,7 +5314,7 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] -- 2.7.4