From 9f041b1830bd270c5dbfa8a94395e6300836e0dd Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 26 Mar 2018 16:50:11 +0000 Subject: [PATCH] [Pipeliner] Add missing loop carried dependences The pipeliner is not adding a dependence edge for a loop carried dependence, and ends up scheduling a load from iteration n prior to an aliased store in iteration n-1. The code that adds the loop carried dependences in the pipeliner doesn't check if the memory objects for loads and stores are "identified" (i.e., distinct) objects. If they are not, then the code that adds the dependences needs to be conservative. The objects can be used to check dependences only when they are distinct objects. The code that checks for loop carried dependences has been updated to classify loads and stores that are not identified as "unknown" values. A store with an "unknown" value can potentially create a loop carried dependence with any pending load. Patch by Brendon Cahoon. llvm-svn: 328547 --- llvm/lib/CodeGen/MachinePipeliner.cpp | 37 +++++++++------ .../CodeGen/Hexagon/swp-loop-carried-unknown.ll | 54 ++++++++++++++++++++++ 2 files changed, 77 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index a3c6003..8015eda 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1051,6 +1051,13 @@ static void getUnderlyingObjects(MachineInstr *MI, if (!MM->getValue()) return; GetUnderlyingObjects(const_cast(MM->getValue()), Objs, DL); + for (Value *V : Objs) { + if (!isIdentifiedObject(V)) { + Objs.clear(); + return; + } + Objs.push_back(V); + } } /// Add a chain edge between a load and store if the store can be an @@ -1059,6 +1066,8 @@ static void getUnderlyingObjects(MachineInstr *MI, /// but that code doesn't create loop carried dependences. void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { MapVector> PendingLoads; + Value *UnknownValue = + UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { MachineInstr &MI = *SU.getInstr(); if (isDependenceBarrier(MI, AA)) @@ -1066,6 +1075,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { else if (MI.mayLoad()) { SmallVector Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + if (Objs.empty()) + Objs.push_back(UnknownValue); for (auto V : Objs) { SmallVector &SUs = PendingLoads[V]; SUs.push_back(&SU); @@ -1073,6 +1084,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { } else if (MI.mayStore()) { SmallVector Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + if (Objs.empty()) + Objs.push_back(UnknownValue); for (auto V : Objs) { MapVector>::iterator I = PendingLoads.find(V); @@ -1087,20 +1100,16 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // offset, then mark the dependence as loop carried potentially. unsigned BaseReg1, BaseReg2; int64_t Offset1, Offset2; - if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) || - !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { - SDep Dep(Load, SDep::Barrier); - Dep.setLatency(1); - SU.addPred(Dep); - continue; - } - if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && - "What happened to the chain edge?"); - SDep Dep(Load, SDep::Barrier); - Dep.setLatency(1); - SU.addPred(Dep); - continue; + if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) && + TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { + if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + "What happened to the chain edge?"); + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); + continue; + } } // Second, the more expensive check that uses alias analysis on the // base registers. If they alias, and the load offset is less than diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll new file mode 100644 index 0000000..3f8abf0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test that the pipeliner schedules a store before the load in which there is a +; loop carried dependence. Previously, the loop carried dependence wasn't added +; and the load from iteration n was scheduled prior to the store from iteration +; n-1. + +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: memh({{.*}}) = +; CHECK: = memuh({{.*}}) +; CHECK: endloop0 + +%s.0 = type { i16, i16 } + +; Function Attrs: nounwind +define void @f0() local_unnamed_addr #0 { +b0: + br label %b1 + +b1: ; preds = %b1, %b0 + %v0 = phi i32 [ 0, %b0 ], [ %v22, %b1 ] + %v1 = load %s.0*, %s.0** undef, align 4 + %v2 = getelementptr inbounds %s.0, %s.0* %v1, i32 0, i32 0 + %v3 = load i16, i16* %v2, align 2 + %v4 = add i16 0, %v3 + %v5 = add i16 %v4, 0 + %v6 = add i16 %v5, 0 + %v7 = add i16 %v6, 0 + %v8 = add i16 %v7, 0 + %v9 = add i16 %v8, 0 + %v10 = add i16 %v9, 0 + %v11 = add i16 %v10, 0 + %v12 = add i16 %v11, 0 + %v13 = add i16 %v12, 0 + %v14 = add i16 %v13, 0 + %v15 = add i16 %v14, 0 + %v16 = add i16 %v15, 0 + %v17 = add i16 %v16, 0 + %v18 = add i16 %v17, 0 + %v19 = add i16 %v18, 0 + %v20 = load %s.0*, %s.0** undef, align 4 + store i16 %v19, i16* undef, align 2 + %v21 = getelementptr inbounds %s.0, %s.0* %v20, i32 0, i32 1 + store i16 0, i16* %v21, align 2 + %v22 = add nuw nsw i32 %v0, 1 + %v23 = icmp eq i32 %v22, 6 + br i1 %v23, label %b2, label %b1 + +b2: ; preds = %b1 + ret void +} + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } -- 2.7.4