[TableGen][SchedModels] Fix read/write variant substitution

author Evgeny Leviant <eleviant@accesssoftek.com>

Mon, 30 Nov 2020 08:55:55 +0000 (11:55 +0300)

committer Evgeny Leviant <eleviant@accesssoftek.com>

Mon, 30 Nov 2020 08:55:55 +0000 (11:55 +0300)
author Evgeny Leviant <eleviant@accesssoftek.com>
Mon, 30 Nov 2020 08:55:55 +0000 (11:55 +0300)
committer Evgeny Leviant <eleviant@accesssoftek.com>
Mon, 30 Nov 2020 08:55:55 +0000 (11:55 +0300)
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td

index be85919..0c610a4 100644 (file)
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -183,11 +183,6 @@ class A57BranchForm<SchedWriteRes non_br> :
  // TODO: according to the doc, conditional uses I0/I1, unconditional uses M
  // Why more complex instruction uses more simple pipeline?
  // May be an error in doc.
-def A57WriteALUsi : SchedWriteVariant<[
-  // lsl #2, lsl #1, or lsr #1.
-  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
-  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
-]>;
  def A57WriteALUsr : SchedWriteVariant<[
    SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
    SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
@@ -200,7 +195,7 @@ def A57ReadALUsr : SchedReadVariant<[
    SchedVar<IsPredicatedPred, [ReadDefault]>,
    SchedVar<NoSchedPred,      [ReadDefault]>
  ]>;
-def : SchedAlias<WriteALUsi,  A57WriteALUsi>;
+def : SchedAlias<WriteALUsi,  CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
  def : SchedAlias<WriteALUsr,  A57WriteALUsr>;
  def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
  def : SchedAlias<ReadALUsr,   A57ReadALUsr>;
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir b/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir

new file mode 100644 (file)

index 0000000..da8df20
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir
@@ -0,0 +1,34 @@
+# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s 2>&1 | FileCheck %s
+
+# CHECK-LABEL: ********** MI Scheduling **********
+# CHECK:       %[[RES:[0-9]+]]:rgpr = t2MLA
+# CHECK-NEXT:  # preds left
+# CHECK-NEXT:  # succs left
+# CHECK-NEXT:  # rdefs left
+# CHECK-NEXT:  Latency : 3
+# CHECK-NEXT:  Depth
+# CHECK-NEXT:  Height
+# CHECK-NEXT:  Predecessors:
+# CHECK-NEXT:    SU({{.*}}): Data Latency=1 Reg=
+# CHECK-NEXT:    SU({{.*}}): Out  Latency=
+# CHECK-NEXT:    SU({{.*}}): Data Latency=1 Reg=
+# CHECK-NEXT:  Successors:
+# CHECK-NEXT:    SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]]
+# CHECK-NEXT:  Pressure Diff
+# CHECK-NEXT:  Single Issue : false;
+# CHECK-NEXT:  SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg
+
+name:            test_smlal_forwarding
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $r1, $r3, $r4, $r5, $r6
+    %1:rgpr = COPY $r1
+    %3:rgpr = COPY $r3
+    %4:rgpr = COPY $r4
+    %5:rgpr = COPY $r5
+    %6:rgpr = COPY $r6
+    %3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg
+    %6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg
+    $r0 = COPY %6:rgpr
+    BX_RET 14, $noreg, implicit $r0
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp

index 0851c0f..fa84199 100644 (file)
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1338,8 +1338,7 @@ public:
    PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
  
    bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
-                                bool IsRead, bool IsForAnyCPU,
-                                unsigned StartIdx);
+                                bool IsRead, unsigned StartIdx);
  
    bool substituteVariants(const PredTransition &Trans);
  
@@ -1413,29 +1412,6 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
    return false;
  }
  
-static bool hasAliasedVariants(const CodeGenSchedRW &RW,
-                               CodeGenSchedModels &SchedModels) {
-  if (RW.HasVariants)
-    return true;
-
-  for (Record *Alias : RW.Aliases) {
-    const CodeGenSchedRW &AliasRW =
-      SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
-    if (AliasRW.HasVariants)
-      return true;
-    if (AliasRW.IsSequence) {
-      IdxVec ExpandedRWs;
-      SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
-      for (unsigned SI : ExpandedRWs) {
-        if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
-                               SchedModels))
-          return true;
-      }
-    }
-  }
-  return false;
-}
-
  static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
                                                ArrayRef<unsigned> ProcIndices) {
    std::vector<Record *> Preds;
@@ -1613,21 +1589,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
  // starts. RWSeq must be applied to all transitions between StartIdx and the end
  // of TransVec.
  bool PredTransitions::substituteVariantOperand(
-    const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
-    unsigned StartIdx) {
-
-  auto CollectAndAddVariants = [&](unsigned TransIdx,
-                                   const CodeGenSchedRW &SchedRW) {
-    // Distribute this partial PredTransition across intersecting variants.
-    // This will push a copies of TransVec[TransIdx] on the back of TransVec.
-    std::vector<TransVariant> IntersectingVariants;
-    getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
-    // Now expand each variant on top of its copy of the transition.
-    for (const TransVariant &IV : IntersectingVariants)
-      pushVariant(IV, IsRead);
-    return !IntersectingVariants.empty();
-  };
-
+    const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
    bool Subst = false;
    // Visit each original RW within the current sequence.
    for (SmallVectorImpl<unsigned>::const_iterator
@@ -1636,35 +1598,24 @@ bool PredTransitions::substituteVariantOperand(
      // Push this RW on all partial PredTransitions or distribute variants.
      // New PredTransitions may be pushed within this loop which should not be
      // revisited (TransEnd must be loop invariant).
-    bool HasAliases = false, WasPushed = false;
      for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
           TransIdx != TransEnd; ++TransIdx) {
-      // In the common case, push RW onto the current operand's sequence.
-      if (!hasAliasedVariants(SchedRW, SchedModels)) {
+      // Distribute this partial PredTransition across intersecting variants.
+      // This will push a copies of TransVec[TransIdx] on the back of TransVec.
+      std::vector<TransVariant> IntersectingVariants;
+      getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
+      // Now expand each variant on top of its copy of the transition.
+      for (const TransVariant &IV : IntersectingVariants)
+        pushVariant(IV, IsRead);
+      if (IntersectingVariants.empty()) {
          if (IsRead)
            TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
          else
            TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
          continue;
+      } else {
+        Subst = true;
        }
-      HasAliases = true;
-      WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
-      Subst |= WasPushed;
-    }
-    if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
-      // If we're here this means that in some sched class:
-      // a) We have read variant for CPU A
-      // b) We have write variant for CPU B
-      // b) We don't have write variant for CPU A
-      // d) We must expand all read/write variants (IsForAnyCPU is true)
-      // e) We couldn't expand SchedRW because TransVec doesn't have
-      //    any transition with compatible CPU ID.
-      // In such case we create new empty transition with zero (AnyCPU)
-      // index.
-      TransVec.reserve(TransVec.size() + 1);
-      TransVec.emplace_back(TransVec[StartIdx].PredTerm);
-      TransVec.back().ReadSequences.emplace_back();
-      Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW);
      }
    }
    return Subst;
@@ -1683,7 +1634,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
    bool Subst = false;
    TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
  
-  bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
+  assert(!llvm::count(Trans.ProcIndices, 0));
    // Visit each original write sequence.
    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
           WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@@ -1693,8 +1644,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
             TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
        I->WriteSequences.emplace_back();
      }
-    Subst |=
-        substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
+    Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
    }
    // Visit each original read sequence.
    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1705,8 +1655,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
             TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
        I->ReadSequences.emplace_back();
      }
-    Subst |=
-        substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
+    Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
    }
    return Subst;
  }
@@ -1745,6 +1694,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
    // requires creating a new SchedClass.
    for (ArrayRef<PredTransition>::iterator
           I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
+    // Variant expansion (substituteVariants) may create unconditional
+    // transitions. We don't need to build sched classes for them.
+    if (I->PredTerm.empty())
+      continue;
      IdxVec OperWritesVariant, OperReadsVariant;
      addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
      addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
@@ -1777,6 +1730,26 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
    }
  }
  
+std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
+  std::vector<unsigned> ProcIdVec;
+  for (const auto &PM : ProcModelMap)
+    if (PM.second != 0)
+      ProcIdVec.push_back(PM.second);
+  return ProcIdVec;
+}
+
+static std::vector<PredTransition>
+makePerProcessorTransitions(const PredTransition &Trans,
+                            ArrayRef<unsigned> ProcIndices) {
+  std::vector<PredTransition> PerCpuTransVec;
+  for (unsigned ProcId : ProcIndices) {
+    assert(ProcId != 0);
+    PerCpuTransVec.push_back(Trans);
+    PerCpuTransVec.back().ProcIndices.assign(1, ProcId);
+  }
+  return PerCpuTransVec;
+}
+
  // Create new SchedClasses for the given ReadWrite list. If any of the
  // ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
  // of the ReadWrite list, following Aliases if necessary.
@@ -1812,6 +1785,10 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
    }
    LLVM_DEBUG(dbgs() << '\n');
  
+  LastTransitions = makePerProcessorTransitions(
+      LastTransitions[0], llvm::count(ProcIndices, 0)
+                              ? ArrayRef<unsigned>(getAllProcIndices())
+                              : ProcIndices);
    // Collect all PredTransitions for individual operands.
    // Iterate until no variant writes remain.
    bool SubstitutedAny;
@@ -1823,9 +1800,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
      LLVM_DEBUG(Transitions.dump());
      LastTransitions.swap(Transitions.TransVec);
    } while (SubstitutedAny);
-  // If the first transition has no variants, nothing to do.
-  if (LastTransitions[0].PredTerm.empty())
-    return;
  
    // WARNING: We are about to mutate the SchedClasses vector. Do not refer to
    // OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h

index 62a359e..5df99b2 100644 (file)
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -443,6 +443,7 @@ class CodeGenSchedModels {
    InstClassMapTy InstrClassMap;
  
    std::vector<STIPredicateFunction> STIPredicates;
+  std::vector<unsigned> getAllProcIndices() const;
  
  public:
    CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
author	Evgeny Leviant <eleviant@accesssoftek.com>
	Mon, 30 Nov 2020 08:55:55 +0000 (11:55 +0300)
committer	Evgeny Leviant <eleviant@accesssoftek.com>
	Mon, 30 Nov 2020 08:55:55 +0000 (11:55 +0300)
llvm/lib/Target/ARM/ARMScheduleA57.td		patch \| blob \| history
llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir	[new file with mode: 0644]	patch \| blob
llvm/utils/TableGen/CodeGenSchedule.cpp		patch \| blob \| history
llvm/utils/TableGen/CodeGenSchedule.h		patch \| blob \| history