[X86] Add the ability to override memory folding latency to schedules and add 1uop...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 25 Mar 2018 10:21:19 +0000 (10:21 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 25 Mar 2018 10:21:19 +0000 (10:21 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 25 Mar 2018 10:21:19 +0000 (10:21 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 25 Mar 2018 10:21:19 +0000 (10:21 +0000)
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td

index b575d98..0aa0700 100755 (executable)
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -80,7 +80,8 @@ def : ReadAdvance<ReadAfterLd, 5>;
  // folded loads.
  multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
                            list<ProcResourceKind> ExePorts,
-                          int Lat, list<int> Res = [1], int UOps = 1> {
+                          int Lat, list<int> Res = [1], int UOps = 1,
+                          int LoadLat = 5> {
    // Register variant is using a single cycle on ExePort.
    def : WriteRes<SchedRW, ExePorts> {
      let Latency = Lat;
@@ -88,12 +89,12 @@ multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
      let NumMicroOps = UOps;
    }
  
-  // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
-  // latency.
+  // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+  // the latency (default = 5).
    def : WriteRes<SchedRW.Folded, !listconcat([BWPort23], ExePorts)> {
-    let Latency = !add(Lat, 5);
+    let Latency = !add(Lat, LoadLat);
      let ResourceCycles = !listconcat([1], Res);
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, 1);
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td

index c15498d..8ae1fbf 100644 (file)
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -81,7 +81,8 @@ def : ReadAdvance<ReadAfterLd, 5>;
  // folded loads.
  multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
                            list<ProcResourceKind> ExePorts,
-                          int Lat, list<int> Res = [1], int UOps = 1> {
+                          int Lat, list<int> Res = [1], int UOps = 1,
+                          int LoadLat = 5> {
    // Register variant is using a single cycle on ExePort.
    def : WriteRes<SchedRW, ExePorts> {
      let Latency = Lat;
@@ -89,12 +90,12 @@ multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
      let NumMicroOps = UOps;
    }
  
-  // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
-  // latency.
+  // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+  // the latency (default = 5).
    def : WriteRes<SchedRW.Folded, !listconcat([HWPort23], ExePorts)> {
-    let Latency = !add(Lat, 5);
+    let Latency = !add(Lat, LoadLat);
      let ResourceCycles = !listconcat([1], Res);
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, 1);
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td

index a23c732..86cfce1 100644 (file)
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -72,7 +72,8 @@ def : ReadAdvance<ReadAfterLd, 4>;
  // folded loads.
  multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
                            list<ProcResourceKind> ExePorts,
-                          int Lat, list<int> Res = [1], int UOps = 1> {
+                          int Lat, list<int> Res = [1], int UOps = 1,
+                          int LoadLat = 4> {
    // Register variant is using a single cycle on ExePort.
    def : WriteRes<SchedRW, ExePorts> {
      let Latency = Lat;
@@ -80,12 +81,12 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
      let NumMicroOps = UOps;
    }
  
-  // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
-  // latency.
+  // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+  // the latency (default = 4).
    def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
-    let Latency = !add(Lat, 4);
+    let Latency = !add(Lat, LoadLat);
      let ResourceCycles = !listconcat([1], Res);
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, 1);
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td

index 8ee7046..8909cce 100644 (file)
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -78,7 +78,8 @@ def : ReadAdvance<ReadAfterLd, 5>;
  // folded loads.
  multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
                            list<ProcResourceKind> ExePorts,
-                          int Lat, list<int> Res = [1], int UOps = 1> {
+                          int Lat, list<int> Res = [1], int UOps = 1,
+                          int LoadLat = 5> {
    // Register variant is using a single cycle on ExePort.
    def : WriteRes<SchedRW, ExePorts> {
      let Latency = Lat;
@@ -86,12 +87,12 @@ multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
      let NumMicroOps = UOps;
    }
  
-  // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
-  // latency.
+  // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+  // the latency (default = 5).
    def : WriteRes<SchedRW.Folded, !listconcat([SKLPort23], ExePorts)> {
-    let Latency = !add(Lat, 5);
+    let Latency = !add(Lat, LoadLat);
      let ResourceCycles = !listconcat([1], Res);
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, 1);
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td

index a5b8d68..5ecfd3e 100755 (executable)
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -78,7 +78,8 @@ def : ReadAdvance<ReadAfterLd, 5>;
  // folded loads.
  multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
                            list<ProcResourceKind> ExePorts,
-                          int Lat, list<int> Res = [1], int UOps = 1> {
+                          int Lat, list<int> Res = [1], int UOps = 1,
+                          int LoadLat = 5> {
    // Register variant is using a single cycle on ExePort.
    def : WriteRes<SchedRW, ExePorts> {
      let Latency = Lat;
@@ -86,12 +87,12 @@ multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
      let NumMicroOps = UOps;
    }
  
-  // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
-  // latency.
+  // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+  // the latency (default = 5).
    def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> {
-    let Latency = !add(Lat, 5);
+    let Latency = !add(Lat, LoadLat);
      let ResourceCycles = !listconcat([1], Res);
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, 1);
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td

index 6aa4523..851d925 100644 (file)
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -57,7 +57,8 @@ def : ReadAdvance<ReadAfterLd, 3>;
  // folded loads.
  multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
                             list<ProcResourceKind> ExePorts,
-                           int Lat, list<int> Res = [1], int UOps = 1> {
+                           int Lat, list<int> Res = [1], int UOps = 1,
+                           int LoadLat = 3> {
    // Register variant is using a single cycle on ExePort.
    def : WriteRes<SchedRW, ExePorts> {
      let Latency = Lat;
@@ -65,10 +66,10 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
      let NumMicroOps = UOps;
    }
  
-  // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the
-  // latency.
+  // Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
+  // the latency (default = 3).
    def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
-    let Latency = !add(Lat, 3);
+    let Latency = !add(Lat, LoadLat);
      let ResourceCycles = !listconcat([1], Res);
      let NumMicroOps = UOps;
    }
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 25 Mar 2018 10:21:19 +0000 (10:21 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 25 Mar 2018 10:21:19 +0000 (10:21 +0000)
llvm/lib/Target/X86/X86SchedBroadwell.td		patch \| blob \| history
llvm/lib/Target/X86/X86SchedHaswell.td		patch \| blob \| history
llvm/lib/Target/X86/X86SchedSandyBridge.td		patch \| blob \| history
llvm/lib/Target/X86/X86SchedSkylakeClient.td		patch \| blob \| history
llvm/lib/Target/X86/X86SchedSkylakeServer.td		patch \| blob \| history
llvm/lib/Target/X86/X86ScheduleSLM.td		patch \| blob \| history