From: Matthias Braun <matze@braunis.de>
Date: Tue, 1 Mar 2016 21:20:31 +0000 (+0000)
Subject: AArch64: Add missing schedinfo, check completeness for cyclone
X-Git-Tag: llvmorg-3.9.0-rc1~12767
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a6cfb6f682ec819bfb1a272ff10431b2b8e7980c;p=platform%2Fupstream%2Fllvm.git

AArch64: Add missing schedinfo, check completeness for cyclone

This adds some missing generic schedule info definitions, enables
completeness checking for cyclone and fixes a typo uncovered by that.

Differential Revision: http://reviews.llvm.org/D17748

llvm-svn: 262393
---

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6ac2175..caade48 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -9377,7 +9377,8 @@ class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
 class BaseCAS<string order, string size, RegisterClass RC>
       : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
                         "cas" # order # size, "\t$Rs, $Rt, [$Rn]",
-                        "$out = $Rs",[]> {
+                        "$out = $Rs",[]>,
+        Sched<[WriteAtomic]> {
   let NP = 1;
 }
 
@@ -9391,7 +9392,8 @@ multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> {
 class BaseCASP<string order, string size, RegisterOperand RC>
       : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
                         "casp" # order # size, "\t$Rs, $Rt, [$Rn]",
-                        "$out = $Rs",[]> {
+                        "$out = $Rs",[]>,
+        Sched<[WriteAtomic]> {
   let NP = 0;
 }
 
@@ -9405,7 +9407,8 @@ multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
 let Predicates = [HasV8_1a] in
 class BaseSWP<string order, string size, RegisterClass RC>
       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size,
-          "\t$Rs, $Rt, [$Rn]","",[]> {
+          "\t$Rs, $Rt, [$Rn]","",[]>,
+        Sched<[WriteAtomic]> {
   bits<2> Sz;
   bit Acq;
   bit Rel;
@@ -9436,7 +9439,8 @@ multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
 let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
 class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size,
-          "\t$Rs, $Rt, [$Rn]","",[]> {
+          "\t$Rs, $Rt, [$Rn]","",[]>,
+        Sched<[WriteAtomic]> {
   bits<2> Sz;
   bit Acq;
   bit Rel;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c98db1c..f2e7650 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -312,10 +312,13 @@ include "AArch64InstrFormats.td"
 //===----------------------------------------------------------------------===//
 
 let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
+// We set Sched to empty list because we expect these instructions to simply get
+// removed in most cases.
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
-                              [(AArch64callseq_start timm:$amt)]>;
+                              [(AArch64callseq_start timm:$amt)]>, Sched<[]>;
 def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
+                            Sched<[]>;
 } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
 
 let isReMaterializable = 1, isCodeGenOnly = 1 in {
@@ -1206,7 +1209,8 @@ def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
 // Create a separate pseudo-instruction for codegen to use so that we don't
 // flag lr as used in every function. It'll be restored before the RET by the
 // epilogue if it's legitimately used.
-def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> {
+def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
+                   Sched<[WriteBrReg]> {
   let isTerminator = 1;
   let isBarrier = 1;
   let isReturn = 1;
@@ -1216,7 +1220,7 @@ def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> {
 // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
 // (which in the usual case is a BLR).
 let hasSideEffects = 1 in
-def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
+def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
   let AsmString = ".tlsdesccall $sym";
 }
 
@@ -1226,7 +1230,8 @@ let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
     isCodeGenOnly = 1 in
 def TLSDESC_CALLSEQ
     : Pseudo<(outs), (ins i64imm:$sym),
-             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
+      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
 def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
           (TLSDESC_CALLSEQ texternalsym:$sym)>;
 
@@ -2536,9 +2541,11 @@ defm FMOV : UnscaledConversion<"fmov">;
 let isReMaterializable = 1, isCodeGenOnly = 1 in {
 def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
     PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+    Sched<[WriteF]>,
     Requires<[NoZCZ]>;
 def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
     PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+    Sched<[WriteF]>,
     Requires<[NoZCZ]>;
 }
 
@@ -2665,6 +2672,7 @@ def F128CSEL : Pseudo<(outs FPR128:$Rd),
                                        (i32 imm:$cond), NZCV))]> {
   let Uses = [NZCV];
   let usesCustomInserter = 1;
+  let hasNoSchedulingInfo = 1;
 }
 
 
@@ -6038,8 +6046,10 @@ def : Pat<(nontemporalstore GPR64:$Rt,
 // Tail call return handling. These are all compiler pseudo-instructions,
 // so no encoding information or anything like that.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
-  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>;
-  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>;
+  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
+                   Sched<[WriteBrReg]>;
+  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
+                   Sched<[WriteBrReg]>;
 }
 
 def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td
index ad5505b..f34cfc6 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -110,6 +110,8 @@ def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
 def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
                                                   let ResourceCycles = [3]; }
 
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
 // Branch
 def : WriteRes<WriteBr, [A53UnitB]>;
 def : WriteRes<WriteBrReg, [A53UnitB]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 4b3a9b0..874e445 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -97,6 +97,8 @@ def : SchedAlias<WriteV,     A57Write_3cyc_1V>;
 def : SchedAlias<WriteVLD,   A57Write_5cyc_1L>;
 def : SchedAlias<WriteVST,   A57Write_1cyc_1S>;
 
+def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
+
 def : WriteRes<WriteSys,     []> { let Latency = 1; }
 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
 def : WriteRes<WriteHint,    []> { let Latency = 1; }
diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 21e2bc2..3457f2f 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -17,7 +17,7 @@ def CycloneModel : SchedMachineModel {
   let MicroOpBufferSize = 192; // Based on the reorder buffer.
   let LoadLatency = 4; // Optimistic load latency.
   let MispredictPenalty = 16; // 14-19 cycles are typical.
-  let CompleteModel = 0;
+  let CompleteModel = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -727,7 +727,7 @@ def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
              (instrs LD3Rv1d,LD3Rv2d)>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
-             (instrs LD3Rv2d_POST,LD3Rv2d_POST)>;
+             (instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
 
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
              (instregex "LD4Fourv(8b|4h|2s)$")>;
@@ -852,6 +852,9 @@ def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
 def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],          (instrs ST4i64)>;
 def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
 
+// Atomic operations are not supported.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
 //---
 // Unused SchedRead types
 //---
diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
index dc01199..429829e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@@ -100,6 +100,8 @@ def : WriteRes<WriteHint,    []> { let Latency = 1; }
 
 def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
 // No forwarding logic is modelled yet.
 def : ReadAdvance<ReadI,       0>;
 def : ReadAdvance<ReadISReg,   0>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedM1.td b/llvm/lib/Target/AArch64/AArch64SchedM1.td
index 6525628..636168e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedM1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedM1.td
@@ -356,4 +356,7 @@ def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
 // CRC instructions.
 def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
 
+// atomic memory operations.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
 } // SchedModel = ExynosM1Model
diff --git a/llvm/lib/Target/AArch64/AArch64Schedule.td b/llvm/lib/Target/AArch64/AArch64Schedule.td
index eaa9110..c157781 100644
--- a/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -92,6 +92,8 @@ def WriteV   : SchedWrite; // Vector ops.
 def WriteVLD : SchedWrite; // Vector loads.
 def WriteVST : SchedWrite; // Vector stores.
 
+def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
+
 // Read the unwritten lanes of the VLD's destination registers.
 def ReadVLD : SchedRead;