From ec2564818c28182a5ca00620a87deeb382949af2 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 18 Dec 2012 20:53:01 +0000 Subject: [PATCH] MISched: add dependence to ExitSU to model live-out latency. llvm-svn: 170454 --- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 17 ++++++++- llvm/test/CodeGen/ARM/misched-inorder-latency.ll | 48 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/misched-inorder-latency.ll diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index ef33b12..ebb80a7 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -765,6 +765,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(SU && "No SUnit mapped to this MI"); // Add register-based dependencies (data, anti, and output). + bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg()) continue; @@ -775,12 +776,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addPhysRegDeps(SU, j); else { assert(!IsPostRA && "Virtual register encountered!"); - if (MO.isDef()) + if (MO.isDef()) { + HasVRegDef = true; addVRegDefDeps(SU, j); + } else if (MO.readsReg()) // ignore undef operands addVRegUseDeps(SU, j); } } + // If we haven't seen any uses in this scheduling region, create a + // dependence edge to ExitSU to model the live-out latency. This is required + // for vreg defs with no in-region use, and prefetches with no vreg def. + // + // FIXME: NumDataSuccs would be more precise than NumSuccs here. This + // check currently relies on being called before adding chain deps. + if (SU->NumSuccs == 0 && SU->Latency > 1 + && (HasVRegDef || MI->mayLoad())) { + SDep Dep(SU, SDep::Artificial); + Dep.setLatency(SU->Latency - 1); + ExitSU.addPred(Dep); + } // Add chain dependencies. // Chain dependencies used to enforce memory order should have diff --git a/llvm/test/CodeGen/ARM/misched-inorder-latency.ll b/llvm/test/CodeGen/ARM/misched-inorder-latency.ll new file mode 100644 index 0000000..8c06b4c --- /dev/null +++ b/llvm/test/CodeGen/ARM/misched-inorder-latency.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \ +; RUN: -pre-RA-sched=source -scheditins=false -ilp-window=0 \ +; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s +; +; For these tests, we set -ilp-window=0 to simulate in order processor. + +; %val1 is a 3-cycle load live out of %entry. It should be hoisted +; above the add. +; CHECK: @testload +; CHECK: %entry +; CHECK: ldr +; CHECK: adds +; CHECK: bne +; CHECK: %true +define i32 @testload(i32 *%ptr, i32 %sumin) { +entry: + %sum1 = add i32 %sumin, 1 + %val1 = load i32* %ptr + %p = icmp eq i32 %sumin, 0 + br i1 %p, label %true, label %end +true: + %sum2 = add i32 %sum1, 1 + %ptr2 = getelementptr i32* %ptr, i32 1 + %val = load i32* %ptr2 + %val2 = add i32 %val1, %val + br label %end +end: + %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ] + %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ] + %sumout = add i32 %valmerge, %summerge + ret i32 %sumout +} + +; The prefetch gets a default latency of 3 cycles and should be hoisted +; above the add. +; +; CHECK: @testprefetch +; CHECK: %entry +; CHECK: pld +; CHECK: adds +; CHECK: bx +define i32 @testprefetch(i8 *%ptr, i32 %i) { +entry: + %tmp = add i32 %i, 1 + tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) + ret i32 %tmp +} +declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind -- 2.7.4