--- /dev/null
+# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s 2>&1 | FileCheck %s
+
+# CHECK-LABEL: ********** MI Scheduling **********
+# CHECK: %[[RES:[0-9]+]]:rgpr = t2MLA
+# CHECK-NEXT: # preds left
+# CHECK-NEXT: # succs left
+# CHECK-NEXT: # rdefs left
+# CHECK-NEXT: Latency : 3
+# CHECK-NEXT: Depth
+# CHECK-NEXT: Height
+# CHECK-NEXT: Predecessors:
+# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
+# CHECK-NEXT: SU({{.*}}): Out Latency=
+# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
+# CHECK-NEXT: Successors:
+# CHECK-NEXT: SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]]
+# CHECK-NEXT: Pressure Diff
+# CHECK-NEXT: Single Issue : false;
+# CHECK-NEXT: SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg
+
+name: test_smlal_forwarding
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r1, $r3, $r4, $r5, $r6
+ %1:rgpr = COPY $r1
+ %3:rgpr = COPY $r3
+ %4:rgpr = COPY $r4
+ %5:rgpr = COPY $r5
+ %6:rgpr = COPY $r6
+ %3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg
+ %6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg
+ $r0 = COPY %6:rgpr
+ BX_RET 14, $noreg, implicit $r0
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
- bool IsRead, bool IsForAnyCPU,
- unsigned StartIdx);
+ bool IsRead, unsigned StartIdx);
bool substituteVariants(const PredTransition &Trans);
return false;
}
-static bool hasAliasedVariants(const CodeGenSchedRW &RW,
- CodeGenSchedModels &SchedModels) {
- if (RW.HasVariants)
- return true;
-
- for (Record *Alias : RW.Aliases) {
- const CodeGenSchedRW &AliasRW =
- SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
- if (AliasRW.HasVariants)
- return true;
- if (AliasRW.IsSequence) {
- IdxVec ExpandedRWs;
- SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
- for (unsigned SI : ExpandedRWs) {
- if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
- SchedModels))
- return true;
- }
- }
- }
- return false;
-}
-
static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
ArrayRef<unsigned> ProcIndices) {
std::vector<Record *> Preds;
// starts. RWSeq must be applied to all transitions between StartIdx and the end
// of TransVec.
bool PredTransitions::substituteVariantOperand(
- const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
- unsigned StartIdx) {
-
- auto CollectAndAddVariants = [&](unsigned TransIdx,
- const CodeGenSchedRW &SchedRW) {
- // Distribute this partial PredTransition across intersecting variants.
- // This will push a copies of TransVec[TransIdx] on the back of TransVec.
- std::vector<TransVariant> IntersectingVariants;
- getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
- // Now expand each variant on top of its copy of the transition.
- for (const TransVariant &IV : IntersectingVariants)
- pushVariant(IV, IsRead);
- return !IntersectingVariants.empty();
- };
-
+ const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
bool Subst = false;
// Visit each original RW within the current sequence.
for (SmallVectorImpl<unsigned>::const_iterator
// Push this RW on all partial PredTransitions or distribute variants.
// New PredTransitions may be pushed within this loop which should not be
// revisited (TransEnd must be loop invariant).
- bool HasAliases = false, WasPushed = false;
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
TransIdx != TransEnd; ++TransIdx) {
- // In the common case, push RW onto the current operand's sequence.
- if (!hasAliasedVariants(SchedRW, SchedModels)) {
+ // Distribute this partial PredTransition across intersecting variants.
+ // This will push a copies of TransVec[TransIdx] on the back of TransVec.
+ std::vector<TransVariant> IntersectingVariants;
+ getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
+ // Now expand each variant on top of its copy of the transition.
+ for (const TransVariant &IV : IntersectingVariants)
+ pushVariant(IV, IsRead);
+ if (IntersectingVariants.empty()) {
if (IsRead)
TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
else
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
continue;
+ } else {
+ Subst = true;
}
- HasAliases = true;
- WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
- Subst |= WasPushed;
- }
- if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
- // If we're here this means that in some sched class:
- // a) We have read variant for CPU A
- // b) We have write variant for CPU B
- // b) We don't have write variant for CPU A
- // d) We must expand all read/write variants (IsForAnyCPU is true)
- // e) We couldn't expand SchedRW because TransVec doesn't have
- // any transition with compatible CPU ID.
- // In such case we create new empty transition with zero (AnyCPU)
- // index.
- TransVec.reserve(TransVec.size() + 1);
- TransVec.emplace_back(TransVec[StartIdx].PredTerm);
- TransVec.back().ReadSequences.emplace_back();
- Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW);
}
}
return Subst;
bool Subst = false;
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
- bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
+ assert(!llvm::count(Trans.ProcIndices, 0));
// Visit each original write sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->WriteSequences.emplace_back();
}
- Subst |=
- substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
+ Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
}
// Visit each original read sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->ReadSequences.emplace_back();
}
- Subst |=
- substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
+ Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
}
return Subst;
}
// requires creating a new SchedClass.
for (ArrayRef<PredTransition>::iterator
I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
+ // Variant expansion (substituteVariants) may create unconditional
+ // transitions. We don't need to build sched classes for them.
+ if (I->PredTerm.empty())
+ continue;
IdxVec OperWritesVariant, OperReadsVariant;
addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
}
}
+std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
+ std::vector<unsigned> ProcIdVec;
+ for (const auto &PM : ProcModelMap)
+ if (PM.second != 0)
+ ProcIdVec.push_back(PM.second);
+ return ProcIdVec;
+}
+
+static std::vector<PredTransition>
+makePerProcessorTransitions(const PredTransition &Trans,
+ ArrayRef<unsigned> ProcIndices) {
+ std::vector<PredTransition> PerCpuTransVec;
+ for (unsigned ProcId : ProcIndices) {
+ assert(ProcId != 0);
+ PerCpuTransVec.push_back(Trans);
+ PerCpuTransVec.back().ProcIndices.assign(1, ProcId);
+ }
+ return PerCpuTransVec;
+}
+
// Create new SchedClasses for the given ReadWrite list. If any of the
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
// of the ReadWrite list, following Aliases if necessary.
}
LLVM_DEBUG(dbgs() << '\n');
+ LastTransitions = makePerProcessorTransitions(
+ LastTransitions[0], llvm::count(ProcIndices, 0)
+ ? ArrayRef<unsigned>(getAllProcIndices())
+ : ProcIndices);
// Collect all PredTransitions for individual operands.
// Iterate until no variant writes remain.
bool SubstitutedAny;
LLVM_DEBUG(Transitions.dump());
LastTransitions.swap(Transitions.TransVec);
} while (SubstitutedAny);
- // If the first transition has no variants, nothing to do.
- if (LastTransitions[0].PredTerm.empty())
- return;
// WARNING: We are about to mutate the SchedClasses vector. Do not refer to
// OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.