From e3547af7bedf5cf01cbfd38f18e7a37dda430e7f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 25 Mar 2018 10:21:19 +0000 Subject: [PATCH] [X86] Add the ability to override memory folding latency to schedules and add 1uop for memory folds for Intel models The Intel models need an extra 1uop for memory folded instructions, plus a lot of instructions take a non-default memory latency which should allow us to use the multiclass a lot more to tidy things up. Differential Revision: https://reviews.llvm.org/D44840 llvm-svn: 328446 --- llvm/lib/Target/X86/X86SchedBroadwell.td | 11 ++++++----- llvm/lib/Target/X86/X86SchedHaswell.td | 11 ++++++----- llvm/lib/Target/X86/X86SchedSandyBridge.td | 11 ++++++----- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 11 ++++++----- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 11 ++++++----- llvm/lib/Target/X86/X86ScheduleSLM.td | 9 +++++---- 6 files changed, 35 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index b575d98..0aa0700 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -80,7 +80,8 @@ def : ReadAdvance; // folded loads. multiclass BWWriteResPair ExePorts, - int Lat, list Res = [1], int UOps = 1> { + int Lat, list Res = [1], int UOps = 1, + int LoadLat = 5> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -88,12 +89,12 @@ multiclass BWWriteResPair { - let Latency = !add(Lat, 5); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); - let NumMicroOps = UOps; + let NumMicroOps = !add(UOps, 1); } } diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index c15498d..8ae1fbf 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -81,7 +81,8 @@ def : ReadAdvance; // folded loads. multiclass HWWriteResPair ExePorts, - int Lat, list Res = [1], int UOps = 1> { + int Lat, list Res = [1], int UOps = 1, + int LoadLat = 5> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -89,12 +90,12 @@ multiclass HWWriteResPair { - let Latency = !add(Lat, 5); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); - let NumMicroOps = UOps; + let NumMicroOps = !add(UOps, 1); } } diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index a23c732..86cfce1 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -72,7 +72,8 @@ def : ReadAdvance; // folded loads. multiclass SBWriteResPair ExePorts, - int Lat, list Res = [1], int UOps = 1> { + int Lat, list Res = [1], int UOps = 1, + int LoadLat = 4> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -80,12 +81,12 @@ multiclass SBWriteResPair { - let Latency = !add(Lat, 4); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); - let NumMicroOps = UOps; + let NumMicroOps = !add(UOps, 1); } } diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 8ee7046..8909cce 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -78,7 +78,8 @@ def : ReadAdvance; // folded loads. multiclass SKLWriteResPair ExePorts, - int Lat, list Res = [1], int UOps = 1> { + int Lat, list Res = [1], int UOps = 1, + int LoadLat = 5> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -86,12 +87,12 @@ multiclass SKLWriteResPair { - let Latency = !add(Lat, 5); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); - let NumMicroOps = UOps; + let NumMicroOps = !add(UOps, 1); } } diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index a5b8d68..5ecfd3e 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -78,7 +78,8 @@ def : ReadAdvance; // folded loads. multiclass SKXWriteResPair ExePorts, - int Lat, list Res = [1], int UOps = 1> { + int Lat, list Res = [1], int UOps = 1, + int LoadLat = 5> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -86,12 +87,12 @@ multiclass SKXWriteResPair { - let Latency = !add(Lat, 5); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); - let NumMicroOps = UOps; + let NumMicroOps = !add(UOps, 1); } } diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 6aa4523..851d925 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -57,7 +57,8 @@ def : ReadAdvance; // folded loads. multiclass SLMWriteResPair ExePorts, - int Lat, list Res = [1], int UOps = 1> { + int Lat, list Res = [1], int UOps = 1, + int LoadLat = 3> { // Register variant is using a single cycle on ExePort. def : WriteRes { let Latency = Lat; @@ -65,10 +66,10 @@ multiclass SLMWriteResPair { - let Latency = !add(Lat, 3); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !listconcat([1], Res); let NumMicroOps = UOps; } -- 2.7.4