if (!MM->getValue())
return;
GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+ for (Value *V : Objs) {
+ if (!isIdentifiedObject(V)) {
+ Objs.clear();
+ return;
+ }
+ Objs.push_back(V);
+ }
}
/// Add a chain edge between a load and store if the store can be an
/// but that code doesn't create loop carried dependences.
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ Value *UnknownValue =
+ UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
for (auto &SU : SUnits) {
MachineInstr &MI = *SU.getInstr();
if (isDependenceBarrier(MI, AA))
else if (MI.mayLoad()) {
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
for (auto V : Objs) {
SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
SUs.push_back(&SU);
} else if (MI.mayStore()) {
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
for (auto V : Objs) {
MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
PendingLoads.find(V);
// offset, then mark the dependence as loop carried potentially.
unsigned BaseReg1, BaseReg2;
int64_t Offset1, Offset2;
- if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
- !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
- "What happened to the chain edge?");
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
+ if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
+ TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+ if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+ "What happened to the chain edge?");
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
}
// Second, the more expensive check that uses alias analysis on the
// base registers. If they alias, and the load offset is less than
--- /dev/null
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the pipeliner schedules a store before the load in which there is a
+; loop carried dependence. Previously, the loop carried dependence wasn't added
+; and the load from iteration n was scheduled prior to the store from iteration
+; n-1.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: memh({{.*}}) =
+; CHECK: = memuh({{.*}})
+; CHECK: endloop0
+
+%s.0 = type { i16, i16 }
+
+; Function Attrs: nounwind
+define void @f0() local_unnamed_addr #0 {
+b0:
+ br label %b1
+
+b1: ; preds = %b1, %b0
+ %v0 = phi i32 [ 0, %b0 ], [ %v22, %b1 ]
+ %v1 = load %s.0*, %s.0** undef, align 4
+ %v2 = getelementptr inbounds %s.0, %s.0* %v1, i32 0, i32 0
+ %v3 = load i16, i16* %v2, align 2
+ %v4 = add i16 0, %v3
+ %v5 = add i16 %v4, 0
+ %v6 = add i16 %v5, 0
+ %v7 = add i16 %v6, 0
+ %v8 = add i16 %v7, 0
+ %v9 = add i16 %v8, 0
+ %v10 = add i16 %v9, 0
+ %v11 = add i16 %v10, 0
+ %v12 = add i16 %v11, 0
+ %v13 = add i16 %v12, 0
+ %v14 = add i16 %v13, 0
+ %v15 = add i16 %v14, 0
+ %v16 = add i16 %v15, 0
+ %v17 = add i16 %v16, 0
+ %v18 = add i16 %v17, 0
+ %v19 = add i16 %v18, 0
+ %v20 = load %s.0*, %s.0** undef, align 4
+ store i16 %v19, i16* undef, align 2
+ %v21 = getelementptr inbounds %s.0, %s.0* %v20, i32 0, i32 1
+ store i16 0, i16* %v21, align 2
+ %v22 = add nuw nsw i32 %v0, 1
+ %v23 = icmp eq i32 %v22, 6
+ br i1 %v23, label %b2, label %b1
+
+b2: ; preds = %b1
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }