From: Matthias Braun Date: Tue, 1 Mar 2016 21:20:31 +0000 (+0000) Subject: AArch64: Add missing schedinfo, check completeness for cyclone X-Git-Tag: llvmorg-3.9.0-rc1~12767 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a6cfb6f682ec819bfb1a272ff10431b2b8e7980c;p=platform%2Fupstream%2Fllvm.git AArch64: Add missing schedinfo, check completeness for cyclone This adds some missing generic schedule info definitions, enables completeness checking for cyclone and fixes a typo uncovered by that. Differential Revision: http://reviews.llvm.org/D17748 llvm-svn: 262393 --- diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6ac2175..caade48 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -9377,7 +9377,8 @@ class BaseCASEncoding : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), "cas" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]> { + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { let NP = 1; } @@ -9391,7 +9392,8 @@ multiclass CompareAndSwap Acq, bits<1> Rel, string order> { class BaseCASP : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), "casp" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]> { + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { let NP = 0; } @@ -9405,7 +9407,8 @@ multiclass CompareAndSwapPair Acq, bits<1> Rel, string order> { let Predicates = [HasV8_1a] in class BaseSWP : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, - "\t$Rs, $Rt, [$Rn]","",[]> { + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { bits<2> Sz; bit Acq; bit Rel; @@ -9436,7 +9439,8 @@ multiclass Swap Acq, bits<1> Rel, string order> { let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in class BaseLDOPregister : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, - "\t$Rs, $Rt, [$Rn]","",[]> { + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { bits<2> Sz; bit Acq; bit Rel; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c98db1c..f2e7650 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -312,10 +312,13 @@ include "AArch64InstrFormats.td" //===----------------------------------------------------------------------===// let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { +// We set Sched to empty list because we expect these instructions to simply get +// removed in most cases. def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - [(AArch64callseq_start timm:$amt)]>; + [(AArch64callseq_start timm:$amt)]>, Sched<[]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, + Sched<[]>; } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 let isReMaterializable = 1, isCodeGenOnly = 1 in { @@ -1206,7 +1209,8 @@ def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; // Create a separate pseudo-instruction for codegen to use so that we don't // flag lr as used in every function. It'll be restored before the RET by the // epilogue if it's legitimately used. -def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { +def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, + Sched<[WriteBrReg]> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; @@ -1216,7 +1220,7 @@ def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction // (which in the usual case is a BLR). let hasSideEffects = 1 in -def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { +def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { let AsmString = ".tlsdesccall $sym"; } @@ -1226,7 +1230,8 @@ let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, isCodeGenOnly = 1 in def TLSDESC_CALLSEQ : Pseudo<(outs), (ins i64imm:$sym), - [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, + Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), (TLSDESC_CALLSEQ texternalsym:$sym)>; @@ -2536,9 +2541,11 @@ defm FMOV : UnscaledConversion<"fmov">; let isReMaterializable = 1, isCodeGenOnly = 1 in { def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>, + Sched<[WriteF]>, Requires<[NoZCZ]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>, + Sched<[WriteF]>, Requires<[NoZCZ]>; } @@ -2665,6 +2672,7 @@ def F128CSEL : Pseudo<(outs FPR128:$Rd), (i32 imm:$cond), NZCV))]> { let Uses = [NZCV]; let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; } @@ -6038,8 +6046,10 @@ def : Pat<(nontemporalstore GPR64:$Rt, // Tail call return handling. These are all compiler pseudo-instructions, // so no encoding information or anything like that. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>; - def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; + def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; + def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; } def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td index ad5505b..f34cfc6 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -110,6 +110,8 @@ def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; let ResourceCycles = [3]; } +def : WriteRes { let Unsupported = 1; } + // Branch def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td index 4b3a9b0..874e445 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -97,6 +97,8 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; +def : WriteRes { let Unsupported = 1; } + def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index 21e2bc2..3457f2f 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -17,7 +17,7 @@ def CycloneModel : SchedMachineModel { let MicroOpBufferSize = 192; // Based on the reorder buffer. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 16; // 14-19 cycles are typical. - let CompleteModel = 0; + let CompleteModel = 1; } //===----------------------------------------------------------------------===// @@ -727,7 +727,7 @@ def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], (instrs LD3Rv1d,LD3Rv2d)>; def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], - (instrs LD3Rv2d_POST,LD3Rv2d_POST)>; + (instrs LD3Rv1d_POST,LD3Rv2d_POST)>; def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], (instregex "LD4Fourv(8b|4h|2s)$")>; @@ -852,6 +852,9 @@ def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; +// Atomic operations are not supported. +def : WriteRes { let Unsupported = 1; } + //--- // Unused SchedRead types //--- diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td index dc01199..429829e 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -100,6 +100,8 @@ def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 4; } +def : WriteRes { let Unsupported = 1; } + // No forwarding logic is modelled yet. def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/AArch64/AArch64SchedM1.td b/llvm/lib/Target/AArch64/AArch64SchedM1.td index 6525628..636168e 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedM1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedM1.td @@ -356,4 +356,7 @@ def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; // CRC instructions. def : InstRW<[M1WriteC2], (instregex "^CRC32")>; +// atomic memory operations. +def : WriteRes { let Unsupported = 1; } + } // SchedModel = ExynosM1Model diff --git a/llvm/lib/Target/AArch64/AArch64Schedule.td b/llvm/lib/Target/AArch64/AArch64Schedule.td index eaa9110..c157781 100644 --- a/llvm/lib/Target/AArch64/AArch64Schedule.td +++ b/llvm/lib/Target/AArch64/AArch64Schedule.td @@ -92,6 +92,8 @@ def WriteV : SchedWrite; // Vector ops. def WriteVLD : SchedWrite; // Vector loads. def WriteVST : SchedWrite; // Vector stores. +def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP) + // Read the unwritten lanes of the VLD's destination registers. def ReadVLD : SchedRead;