}
/// Add @p Access to this statement's list of accesses.
- void addAccess(MemoryAccess *Access);
+ ///
+ /// @param Access The access to add.
+ /// @param Prepend If true, will add @p Access before all other instructions
+ /// (instead of appending it).
+ void addAccess(MemoryAccess *Access, bool Preprend = false);
/// Remove a MemoryAccess from this statement.
///
isl::union_map convertZoneToTimepoints(isl::union_map Zone, isl::dim Dim,
bool InclStart, bool InclEnd);
+/// Overload of convertZoneToTimepoints(isl::map,InclStart,InclEnd) to process
+/// only a single map.
+isl::map convertZoneToTimepoints(isl::map Zone, isl::dim Dim, bool InclStart,
+ bool InclEnd);
+
/// Distribute the domain to the tuples of a wrapped range map.
///
/// @param Map { Domain[] -> [Range1[] -> Range2[]] }
/// { DomainRead[] -> Element[] }
isl::union_map AllReads;
+ /// The loaded values (llvm::LoadInst) of all reads.
+ /// { [Element[] -> DomainRead[]] -> ValInst[] }
+ isl::union_map AllReadValInst;
+
/// Combined access relations of all MemoryKind::Array, MAY_WRITE accesses.
/// { DomainMayWrite[] -> Element[] }
isl::union_map AllMayWrites;
/// { DomainMustWrite[] -> Element[] }
isl::union_map AllMustWrites;
+ /// Combined access relations of all MK_Array write accesses (union of
+ /// AllMayWrites and AllMustWrites).
+ /// { DomainWrite[] -> Element[] }
+ isl::union_map AllWrites;
+
/// The value instances written to array elements of all write accesses.
/// { [Element[] -> DomainWrite[]] -> ValInst[] }
isl::union_map AllWriteValInst;
public:
/// Return the SCoP this object is analyzing.
Scop *getScop() const { return S; }
+
+ /// A reaching definition zone is known to have the definition's written value
+ /// if the definition is a MUST_WRITE.
+ ///
+ /// @return { [Element[] -> Zone[]] -> ValInst[] }
+ isl::union_map computeKnownFromMustWrites() const;
+
+ /// A reaching definition zone is known to be the same value as any load that
+ /// reads from that array element in that period.
+ ///
+ /// @return { [Element[] -> Zone[]] -> ValInst[] }
+ isl::union_map computeKnownFromLoad() const;
+
+ /// Compute which value an array element stores at every instant.
+ ///
+ /// @param FromWrite Use stores as source of information.
+ /// @param FromRead Use loads as source of information.
+ ///
+ /// @return { [Element[] -> Zone[]] -> ValInst[] }
+ isl::union_map computeKnown(bool FromWrite, bool FromRead) const;
};
/// Create a domain-to-unknown value mapping.
}
}
-void ScopStmt::addAccess(MemoryAccess *Access) {
+void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
Instruction *AccessInst = Access->getAccessInstruction();
if (Access->isArrayKind()) {
PHIReads[PHI] = Access;
}
+ if (Prepend) {
+ MemAccs.insert(MemAccs.begin(), Access);
+ return;
+ }
MemAccs.push_back(Access);
}
return give(isl_union_map_union(Zone.take(), ShiftedZone.take()));
}
+isl::map polly::convertZoneToTimepoints(isl::map Zone, isl::dim Dim,
+ bool InclStart, bool InclEnd) {
+ if (!InclStart && InclEnd)
+ return Zone;
+
+ auto ShiftedZone = shiftDim(Zone, Dim, -1, -1);
+ if (InclStart && !InclEnd)
+ return ShiftedZone;
+ else if (!InclStart && !InclEnd)
+ return give(isl_map_intersect(Zone.take(), ShiftedZone.take()));
+
+ assert(InclStart && InclEnd);
+ return give(isl_map_union(Zone.take(), ShiftedZone.take()));
+}
+
isl::map polly::distributeDomain(isl::map Map) {
// Note that we cannot take Map apart into { Domain[] -> Range1[] } and {
// Domain[] -> Range2[] } and combine again. We would loose any relation
isl_map_wrap(isl_map_apply_domain(Lifetime.copy(), DefTarget.copy())));
simplify(EltZone);
+ // When known knowledge is disabled, just return the unknown value. It will
+ // either get filtered out or conflict with itself.
// { DomainDef[] -> ValInst[] }
- auto ValInst = makeValInst(V, DefMA->getStatement(),
- LI->getLoopFor(DefInst->getParent()));
+ isl::map ValInst;
+ if (DelicmComputeKnown)
+ ValInst = makeValInst(V, DefMA->getStatement(),
+ LI->getLoopFor(DefInst->getParent()));
+ else
+ ValInst = makeUnknownForDomain(DefMA->getStatement());
// { DomainDef[] -> [Element[] -> Zone[]] }
auto EltKnownTranslator =
return Result;
}
- /// Compute which value an array element stores at every instant.
- ///
- /// @return { [Element[] -> Zone[]] -> ValInst[] }
- isl::union_map computeKnown() const {
- // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] }
- auto EltReachdDef =
- distributeDomain(give(isl_union_map_curry(WriteReachDefZone.copy())));
-
- // { [Element[] -> DomainWrite[]] -> ValInst[] }
- auto AllKnownWriteValInst = filterKnownValInst(AllWriteValInst);
-
- // { [Element[] -> Zone[]] -> ValInst[] }
- return EltReachdDef.apply_range(AllKnownWriteValInst);
- }
-
/// Determine when an array element is written to, and which value instance is
/// written.
///
computeCommon();
EltUnused = computeLifetime();
- EltKnown = computeKnown();
+ EltKnown = computeKnown(true, false);
EltWritten = computeWritten();
}
DeLICMAnalyzed++;
#include "polly/ForwardOpTree.h"
+#include "polly/Options.h"
+#include "polly/RegisterPasses.h"
#include "polly/ScopBuilder.h"
#include "polly/ScopInfo.h"
#include "polly/ScopPass.h"
#include "polly/Support/GICHelper.h"
+#include "polly/Support/ISLOStream.h"
+#include "polly/Support/ISLTools.h"
#include "polly/Support/VirtualInstruction.h"
+#include "polly/ZoneAlgo.h"
#include "llvm/Analysis/ValueTracking.h"
#define DEBUG_TYPE "polly-delicm"
using namespace polly;
using namespace llvm;
+static cl::opt<bool>
+ AnalyzeKnown("polly-optree-analyze-known",
+ cl::desc("Analyze array contents for load forwarding"),
+ cl::cat(PollyCategory), cl::init(true), cl::Hidden);
+
+static cl::opt<unsigned long>
+ MaxOps("polly-optree-max-ops",
+ cl::desc("Maximum number of ISL operations to invest for known "
+ "analysis; 0=no limit"),
+ cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
+
+STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
+STATISTIC(KnownOutOfQuota,
+ "Analyses aborted because max_operations was reached");
+STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis");
+
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
+STATISTIC(TotalKnownLoadsForwarded,
+ "Number of forwarded loads because their value was known");
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
STATISTIC(TotalModifiedStmts,
/// the MemoryAccess is removed and the all the operand tree instructions are
/// moved into the statement. All original instructions are left in the source
/// statements. The simplification pass can clean these up.
-class ForwardOpTreeImpl {
+class ForwardOpTreeImpl : ZoneAlgorithm {
private:
- /// The SCoP we are currently processing.
- Scop *S;
-
- /// LoopInfo is required for VirtualUse.
- LoopInfo *LI;
-
/// How many instructions have been copied to other statements.
int NumInstructionsCopied = 0;
+ /// Number of loads forwarded because their value was known.
+ int NumKnownLoadsForwarded = 0;
+
/// How many read-only accesses have been copied.
int NumReadOnlyCopied = 0;
/// Whether we carried out at least one change to the SCoP.
bool Modified = false;
+ /// Contains the zones where array elements are known to contain a specific
+ /// value.
+ /// { [Element[] -> Zone[]] -> ValInst[] }
+ /// @see computeKnown()
+ isl::union_map Known;
+
+ /// Translator for newly introduced ValInsts to already existing ValInsts such
+ /// that new introduced load instructions can reuse the Known analysis of its
+ /// original load. { ValInst[] -> ValInst[] }
+ isl::union_map Translator;
+
+ /// Get list of array elements that do contain the same ValInst[] at Domain[].
+ ///
+ /// @param ValInst { Domain[] -> ValInst[] }
+ /// The values for which we search for alternative locations,
+ /// per statement instance.
+ ///
+ /// @return { Domain[] -> Element[] }
+ /// For each statement instance, the array elements that contain the
+ /// same ValInst.
+ isl::union_map findSameContentElements(isl::union_map ValInst) {
+ assert(ValInst.is_single_valued().is_true());
+
+ // { Domain[] }
+ isl::union_set Domain = ValInst.domain();
+
+ // { Domain[] -> Scatter[] }
+ isl::union_map Schedule = getScatterFor(Domain);
+
+ // { Element[] -> [Scatter[] -> ValInst[]] }
+ isl::union_map MustKnownCurried =
+ convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
+
+ // { [Domain[] -> ValInst[]] -> Scatter[] }
+ isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
+
+ // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
+ isl::union_map SchedValDomVal =
+ DomValSched.range_product(ValInst.range_map()).reverse();
+
+ // { Element[] -> [Domain[] -> ValInst[]] }
+ isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
+
+ // { Domain[] -> Element[] }
+ isl::union_map MustKnownMap =
+ MustKnownInst.uncurry().domain().unwrap().reverse();
+ simplify(MustKnownMap);
+
+ return MustKnownMap;
+ }
+
+ /// Find a single array element for each statement instance, within a single
+ /// array.
+ ///
+ /// @param MustKnown { Domain[] -> Element[] }
+ /// Set of candidate array elements.
+ /// @param Domain { Domain[] }
+ /// The statement instance for which we need elements for.
+ ///
+ /// @return { Domain[] -> Element[] }
+ /// For each statement instance, an array element out of @p MustKnown.
+ /// All array elements must be in the same array (Polly does not yet
+ /// support reading from different accesses using the same
+ /// MemoryAccess). If no mapping for all of @p Domain exists, returns
+ /// null.
+ isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
+ // { Domain[] -> Element[] }
+ isl::map Result;
+
+ // MemoryAccesses can read only elements from a single array
+ // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
+ // Look through all spaces until we find one that contains at least the
+ // wanted statement instance.s
+ MustKnown.foreach_map([&, this](isl::map Map) -> isl::stat {
+ // Get the array this is accessing.
+ isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
+ ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
+
+ // No support for generation of indirect array accesses.
+ if (SAI->getBasePtrOriginSAI())
+ return isl::stat::ok; // continue
+
+ // Determine whether this map contains all wanted values.
+ isl::set MapDom = Map.domain();
+ if (!Domain.is_subset(MapDom).is_true())
+ return isl::stat::ok; // continue
+
+ // There might be multiple array elements that contain the same value, but
+ // choose only one of them. lexmin is used because it returns a one-value
+ // mapping, we do not care about which one.
+ // TODO: Get the simplest access function.
+ Result = Map.lexmin();
+ return isl::stat::error; // break
+ });
+
+ return Result;
+ }
+
+public:
+ /// Compute the zones of known array element contents.
+ ///
+ /// @return True if the computed #Known is usable.
+ bool computeKnownValues() {
+ isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
+
+ // Check that nothing strange occurs.
+ if (!isCompatibleScop()) {
+ KnownIncompatible++;
+ return false;
+ }
+
+ isl_ctx_reset_error(IslCtx.get());
+ {
+ IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps);
+
+ computeCommon();
+ Known = computeKnown(true, true);
+ simplify(Known);
+
+ // Preexisting ValInsts use the known content analysis of themselves.
+ Translator = makeIdentityMap(Known.range(), false);
+ }
+
+ if (!Known || !Translator) {
+ assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
+ KnownOutOfQuota++;
+ Known = nullptr;
+ Translator = nullptr;
+ DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
+ return false;
+ }
+
+ KnownAnalyzed++;
+ DEBUG(dbgs() << "All known: " << Known << "\n");
+
+ return true;
+ }
+
void printStatistics(raw_ostream &OS, int Indent = 0) {
OS.indent(Indent) << "Statistics {\n";
OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
<< '\n';
+ OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
+ << '\n';
OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
<< '\n';
OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
OS.indent(Indent) << "}\n";
}
+ /// Create a new MemoryAccess of type read and MemoryKind::Array.
+ ///
+ /// @param Stmt The statement in which the access occurs.
+ /// @param LI The instruction that does the access.
+ /// @param AccessRelation The array element that each statement instance
+ /// accesses.
+ ///
+ /// @param The newly created access.
+ MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
+ isl::map AccessRelation) {
+ isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
+ ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
+
+ // Create a dummy SCEV access, to be replaced anyway.
+ SmallVector<const SCEV *, 4> Sizes;
+ Sizes.reserve(SAI->getNumberOfDimensions());
+ SmallVector<const SCEV *, 4> Subscripts;
+ Subscripts.reserve(SAI->getNumberOfDimensions());
+ for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
+ Sizes.push_back(SAI->getDimensionSize(i));
+ Subscripts.push_back(nullptr);
+ }
+
+ MemoryAccess *Access =
+ new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
+ LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
+ S->addAccessFunction(Access);
+ Stmt->addAccess(Access, true);
+
+ Access->setNewAccessRelation(AccessRelation);
+
+ return Access;
+ }
+
+ /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
+ /// use in every instance of @p UseStmt.
+ ///
+ /// @param UseStmt Statement a scalar is used in.
+ /// @param DefStmt Statement a scalar is defined in.
+ ///
+ /// @return { DomainUse[] -> DomainDef[] }
+ isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
+ // { DomainUse[] -> Scatter[] }
+ isl::map UseScatter = getScatterFor(UseStmt);
+
+ // { Zone[] -> DomainDef[] }
+ isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
+
+ // { Scatter[] -> DomainDef[] }
+ isl::map ReachDefTimepoints =
+ convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
+
+ // { DomainUse[] -> DomainDef[] }
+ return UseScatter.apply_range(ReachDefTimepoints);
+ }
+
+ /// Forward a load by reading from an array element that contains the same
+ /// value. Typically the location it was loaded from.
+ ///
+ /// @param TargetStmt The statement the operand tree will be copied to.
+ /// @param Inst The (possibly speculatable) instruction to forward.
+ /// @param UseStmt The statement that uses @p Inst.
+ /// @param UseLoop The loop @p Inst is used in.
+ /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
+ /// A mapping from the statement instance @p Inst is used
+ /// to the statement instance it is forwarded to.
+ /// @param DefStmt The statement @p Inst is defined in.
+ /// @param DefLoop The loop which contains @p Inst.
+ /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
+ /// A mapping from the statement instance @p Inst is
+ /// defined to the statement instance it is forwarded to.
+ /// @param DoIt If false, only determine whether an operand tree can be
+ /// forwarded. If true, carry out the forwarding. Do not
+ /// use DoIt==true if an operand tree is not known to be
+ /// forwardable.
+ ///
+ /// @return FD_NotApplicable if @p Inst is not a LoadInst.
+ /// FD_CannotForward if no array element to load from was found.
+ /// FD_CanForwardLeaf if the load is already in the target statement
+ /// instance.
+ /// FD_CanForwardTree if @p Inst is forwardable.
+ /// FD_DidForward if @p DoIt was true.
+ ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
+ ScopStmt *UseStmt, Loop *UseLoop,
+ isl::map UseToTarget, ScopStmt *DefStmt,
+ Loop *DefLoop, isl::map DefToTarget,
+ bool DoIt) {
+ // Cannot do anything without successful known analysis.
+ if (Known.is_null())
+ return FD_NotApplicable;
+
+ LoadInst *LI = dyn_cast<LoadInst>(Inst);
+ if (!LI)
+ return FD_NotApplicable;
+
+ // If the load is already in the statement, not forwarding is necessary.
+ // However, it might happen that the LoadInst is already present in the
+ // statement's instruction list. In that case we do as follows:
+ // - For the evaluation (DoIt==false), we can trivially forward it as it is
+ // benefit of forwarding an already present instruction.
+ // - For the execution (DoIt==false), prepend the instruction (to make it
+ // available to all instructions following in the instruction list), but
+ // do not add another MemoryAccess.
+ MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
+ if (Access && !DoIt)
+ return FD_CanForwardLeaf;
+
+ if (DoIt)
+ TargetStmt->prependInstruction(LI);
+
+ ForwardingDecision OpDecision =
+ forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
+ DefToTarget, DoIt);
+ switch (OpDecision) {
+ case FD_CannotForward:
+ assert(!DoIt);
+ return OpDecision;
+
+ case FD_CanForwardLeaf:
+ case FD_CanForwardTree:
+ assert(!DoIt);
+ break;
+
+ case FD_DidForward:
+ assert(DoIt);
+ break;
+
+ default:
+ llvm_unreachable("Shouldn't return this");
+ }
+
+ // { DomainDef[] -> ValInst[] }
+ isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
+
+ // { DomainTarget[] -> ValInst[] }
+ isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
+ isl::union_map TranslatedExpectedVal =
+ isl::union_map(TargetExpectedVal).apply_range(Translator);
+
+ // { DomainTarget[] -> Element[] }
+ isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
+
+ isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
+ if (!SameVal)
+ return FD_CannotForward;
+
+ if (!DoIt)
+ return FD_CanForwardTree;
+
+ if (Access) {
+ DEBUG(dbgs() << " forwarded known load with preexisting MemoryAccess"
+ << Access << "\n");
+ } else {
+ Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
+ DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access
+ << "\n");
+
+ // { ValInst[] }
+ isl::space ValInstSpace = ExpectedVal.get_space().range();
+
+ // After adding a new load to the SCoP, also update the Known content
+ // about it. The new load will have a known ValInst of
+ // { [DomainTarget[] -> Value[]] }
+ // but which -- because it is a copy of it -- has same value as the
+ // { [DomainDef[] -> Value[]] }
+ // that it replicates. Instead of cloning the known content of
+ // [DomainDef[] -> Value[]]
+ // for DomainTarget[], we add a 'translator' that maps
+ // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
+ // before comparing to the known content.
+ // TODO: 'Translator' could also be used to map PHINodes to their incoming
+ // ValInsts.
+ if (ValInstSpace.is_wrapping()) {
+ // { DefDomain[] -> Value[] }
+ isl::map ValInsts = ExpectedVal.range().unwrap();
+
+ // { DefDomain[] }
+ isl::set DefDomain = ValInsts.domain();
+
+ // { Value[] }
+ isl::space ValSpace = ValInstSpace.unwrap().range();
+
+ // { Value[] -> Value[] }
+ isl::map ValToVal =
+ isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
+
+ // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
+ isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
+
+ Translator = Translator.add_map(LocalTranslator);
+ DEBUG(dbgs() << " local translator is " << LocalTranslator
+ << "\n");
+ }
+ }
+ DEBUG(dbgs() << " expected values where " << TargetExpectedVal
+ << "\n");
+ DEBUG(dbgs() << " candidate elements where " << Candidates << "\n");
+ assert(Access);
+
+ NumKnownLoadsForwarded++;
+ TotalKnownLoadsForwarded++;
+ return FD_DidForward;
+ }
+
/// Forwards a speculatively executable instruction.
///
- /// If the instruction itself cannot be executed speculatively, returns
- /// FD_NotApplicable.
+ /// @param TargetStmt The statement the operand tree will be copied to.
+ /// @param UseInst The (possibly speculatable) instruction to forward.
+ /// @param DefStmt The statement @p UseInst is defined in.
+ /// @param DefLoop The loop which contains @p UseInst.
+ /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
+ /// A mapping from the statement instance @p UseInst is
+ /// defined to the statement instance it is forwarded to.
+ /// @param DoIt If false, only determine whether an operand tree can be
+ /// forwarded. If true, carry out the forwarding. Do not
+ /// use DoIt==true if an operand tree is not known to be
+ /// forwardable.
///
- /// The parameters the same as for
- /// @see forwardTree()
+ /// @return FD_NotApplicable if @p UseInst is not speculatable.
+ /// FD_CannotForward if one of @p UseInst's operands is not
+ /// forwardable.
+ /// FD_CanForwardTree if @p UseInst is forwardable.
+ /// FD_DidForward if @p DoIt was true.
ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
Instruction *UseInst,
- ScopStmt *UseStmt, Loop *UseLoop,
- bool DoIt) {
+ ScopStmt *DefStmt, Loop *DefLoop,
+ isl::map DefToTarget, bool DoIt) {
// PHIs, unless synthesizable, are not yet supported.
if (isa<PHINode>(UseInst))
return FD_NotApplicable;
if (mayBeMemoryDependent(*UseInst))
return FD_NotApplicable;
- Loop *DefLoop = LI->getLoopFor(UseInst->getParent());
- ScopStmt *DefStmt = S->getStmtFor(UseInst);
- assert(DefStmt && "Value must be defined somewhere");
-
if (DoIt) {
// To ensure the right order, prepend this instruction before its
// operands. This ensures that its operands are inserted before the
for (Value *OpVal : UseInst->operand_values()) {
ForwardingDecision OpDecision =
- forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
+ forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
switch (OpDecision) {
case FD_CannotForward:
assert(!DoIt);
/// Determines whether an operand tree can be forwarded or carries out a
/// forwarding, depending on the @p DoIt flag.
///
- /// @param TargetStmt The statement the operand tree will be copied to.
- /// @param UseVal The value (usually an instruction) which is root of an
- /// operand tree.
- /// @param UseStmt The statement that uses @p UseVal.
- /// @param UseLoop The loop @p UseVal is used in.
- /// @param DoIt If false, only determine whether an operand tree can be
- /// forwarded. If true, carry out the forwarding. Do not use
- /// DoIt==true if an operand tree is not known to be
- /// forwardable.
+ /// @param TargetStmt The statement the operand tree will be copied to.
+ /// @param UseVal The value (usually an instruction) which is root of an
+ /// operand tree.
+ /// @param UseStmt The statement that uses @p UseVal.
+ /// @param UseLoop The loop @p UseVal is used in.
+ /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
+ /// A mapping from the statement instance @p UseVal is used
+ /// to the statement instance it is forwarded to.
+ /// @param DoIt If false, only determine whether an operand tree can be
+ /// forwarded. If true, carry out the forwarding. Do not
+ /// use DoIt==true if an operand tree is not known to be
+ /// forwardable.
///
/// @return If DoIt==false, return whether the operand tree can be forwarded.
/// If DoIt==true, return FD_DidForward.
- ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
- ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
+ ForwardingDecision forwardTree(ScopStmt *TargetStmt, llvm::Value *UseVal,
+ ScopStmt *UseStmt, llvm::Loop *UseLoop,
+ isl::map UseToTarget, bool DoIt) {
+ ScopStmt *DefStmt = nullptr;
+ Loop *DefLoop = nullptr;
+
+ // { DefDomain[] -> TargetDomain[] }
+ isl::map DefToTarget;
+
VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
switch (VUse.getKind()) {
case VirtualUse::Constant:
return FD_DidForward;
case VirtualUse::Intra:
+ // Knowing that UseStmt and DefStmt are the same statement instance, just
+ // reuse the information about UseStmt for DefStmt
+ DefStmt = UseStmt;
+ DefToTarget = UseToTarget;
+
+ LLVM_FALLTHROUGH;
case VirtualUse::Inter:
- auto Inst = cast<Instruction>(UseVal);
+ Instruction *Inst = cast<Instruction>(UseVal);
+
+ if (!DefStmt)
+ DefStmt = S->getStmtFor(Inst);
+ assert(DefStmt && "Value must be defined somewhere");
+ DefLoop = LI->getLoopFor(Inst->getParent());
+
+ if (DefToTarget.is_null() && !Known.is_null()) {
+ // { UseDomain[] -> DefDomain[] }
+ isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
+
+ // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
+ // { DefDomain[] -> TargetDomain[] }
+ DefToTarget = UseToTarget.apply_domain(UseToDef);
+ simplify(DefToTarget);
+ }
- ForwardingDecision SpeculativeResult =
- forwardSpeculatable(TargetStmt, Inst, UseStmt, UseLoop, DoIt);
+ ForwardingDecision SpeculativeResult = forwardSpeculatable(
+ TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
if (SpeculativeResult != FD_NotApplicable)
return SpeculativeResult;
+ ForwardingDecision KnownResult =
+ forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
+ DefStmt, DefLoop, DefToTarget, DoIt);
+ if (KnownResult != FD_NotApplicable)
+ return KnownResult;
+
// When no method is found to forward the operand tree, we effectively
// cannot handle it.
DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
ScopStmt *Stmt = RA->getStatement();
Loop *InLoop = Stmt->getSurroundingLoop();
- ForwardingDecision Assessment =
- forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
+ isl::map TargetToUse;
+ if (!Known.is_null()) {
+ isl::space DomSpace = Stmt->getDomainSpace();
+ TargetToUse =
+ isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
+ }
+
+ ForwardingDecision Assessment = forwardTree(
+ Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
assert(Assessment != FD_DidForward);
if (Assessment != FD_CanForwardTree)
return false;
- ForwardingDecision Execution =
- forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
+ ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
+ InLoop, TargetToUse, true);
assert(Execution == FD_DidForward &&
"A previous positive assessment must also be executable");
(void)Execution;
}
public:
- ForwardOpTreeImpl(Scop *S, LoopInfo *LI) : S(S), LI(LI) {}
+ ForwardOpTreeImpl(Scop *S, LoopInfo *LI)
+ : ZoneAlgorithm("polly-optree", S, LI) {}
/// Return which SCoP this instance is processing.
Scop *getScop() const { return S; }
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
Impl = make_unique<ForwardOpTreeImpl>(&S, &LI);
+ if (AnalyzeKnown) {
+ DEBUG(dbgs() << "Prepare forwarders...\n");
+ Impl->computeKnownValues();
+ }
+
DEBUG(dbgs() << "Forwarding operand trees...\n");
Impl->forwardOperandTrees();
return give(isl_map_from_domain(Domain.take()));
}
+/// Return whether @p Map maps to an unknown value.
+///
+/// @param { [] -> ValInst[] }
+static bool isMapToUnknown(const isl::map &Map) {
+ isl::space Space = Map.get_space().range();
+ return Space.has_tuple_id(isl::dim::set).is_false() &&
+ Space.is_wrapping().is_false() && Space.dim(isl::dim::set) == 0;
+}
+
+/// Return only the mappings that map to known values.
+///
+/// @param UMap { [] -> ValInst[] }
+///
+/// @return { [] -> ValInst[] }
+static isl::union_map filterKnownValInst(const isl::union_map &UMap) {
+ isl::union_map Result = isl::union_map::empty(UMap.get_space());
+ UMap.foreach_map([=, &Result](isl::map Map) -> isl::stat {
+ if (!isMapToUnknown(Map))
+ Result = Result.add_map(Map);
+ return isl::stat::ok;
+ });
+ return Result;
+}
+
static std::string printInstruction(Instruction *Instr,
bool IsForDebug = false) {
std::string Result;
void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) {
assert(MA->isLatestArrayKind());
assert(MA->isRead());
+ ScopStmt *Stmt = MA->getStatement();
// { DomainRead[] -> Element[] }
auto AccRel = getAccessRelationFor(MA);
AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy()));
+
+ if (LoadInst *Load = dyn_cast_or_null<LoadInst>(MA->getAccessInstruction())) {
+ // { DomainRead[] -> ValInst[] }
+ isl::map LoadValInst = makeValInst(
+ Load, Stmt, LI->getLoopFor(Load->getParent()), Stmt->isBlockStmt());
+
+ // { DomainRead[] -> [Element[] -> DomainRead[]] }
+ isl::map IncludeElement =
+ give(isl_map_curry(isl_map_domain_map(AccRel.take())));
+
+ // { [Element[] -> DomainRead[]] -> ValInst[] }
+ isl::map EltLoadValInst =
+ give(isl_map_apply_domain(LoadValInst.take(), IncludeElement.take()));
+
+ AllReadValInst = give(
+ isl_union_map_add_map(AllReadValInst.take(), EltLoadValInst.take()));
+ }
}
void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
AllMayWrites = makeEmptyUnionMap();
AllMustWrites = makeEmptyUnionMap();
AllWriteValInst = makeEmptyUnionMap();
+ AllReadValInst = makeEmptyUnionMap();
for (auto &Stmt : *S) {
for (auto *MA : Stmt) {
}
// { DomainWrite[] -> Element[] }
- auto AllWrites =
+ AllWrites =
give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy()));
// { [Element[] -> Zone[]] -> DomainWrite[] }
}
OS.indent(Indent) << "}\n";
}
+
+isl::union_map ZoneAlgorithm::computeKnownFromMustWrites() const {
+ // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] }
+ isl::union_map EltReachdDef = distributeDomain(WriteReachDefZone.curry());
+
+ // { [Element[] -> DomainWrite[]] -> ValInst[] }
+ isl::union_map AllKnownWriteValInst = filterKnownValInst(AllWriteValInst);
+
+ // { [Element[] -> Zone[]] -> ValInst[] }
+ return EltReachdDef.apply_range(AllKnownWriteValInst);
+}
+
+isl::union_map ZoneAlgorithm::computeKnownFromLoad() const {
+ // { Element[] }
+ isl::union_set AllAccessedElts = AllReads.range().unite(AllWrites.range());
+
+ // { Element[] -> Scatter[] }
+ isl::union_map EltZoneUniverse = isl::union_map::from_domain_and_range(
+ AllAccessedElts, isl::set::universe(ScatterSpace));
+
+ // This assumes there are no "holes" in
+ // isl_union_map_domain(WriteReachDefZone); alternatively, compute the zone
+ // before the first write or that are not written at all.
+ // { Element[] -> Scatter[] }
+ isl::union_set NonReachDef =
+ EltZoneUniverse.wrap().subtract(WriteReachDefZone.domain());
+
+ // { [Element[] -> Zone[]] -> ReachDefId[] }
+ isl::union_map DefZone =
+ WriteReachDefZone.unite(isl::union_map::from_domain(NonReachDef));
+
+ // { [Element[] -> Scatter[]] -> Element[] }
+ isl::union_map EltZoneElt = EltZoneUniverse.domain_map();
+
+ // { [Element[] -> Zone[]] -> [Element[] -> ReachDefId[]] }
+ isl::union_map DefZoneEltDefId = EltZoneElt.range_product(DefZone);
+
+ // { Element[] -> [Zone[] -> ReachDefId[]] }
+ isl::union_map EltDefZone = DefZone.curry();
+
+ // { [Element[] -> Zone[] -> [Element[] -> ReachDefId[]] }
+ isl::union_map EltZoneEltDefid = distributeDomain(EltDefZone);
+
+ // { [Element[] -> Scatter[]] -> DomainRead[] }
+ isl::union_map Reads = AllReads.range_product(Schedule).reverse();
+
+ // { [Element[] -> Scatter[]] -> [Element[] -> DomainRead[]] }
+ isl::union_map ReadsElt = EltZoneElt.range_product(Reads);
+
+ // { [Element[] -> Scatter[]] -> ValInst[] }
+ isl::union_map ScatterKnown = ReadsElt.apply_range(AllReadValInst);
+
+ // { [Element[] -> ReachDefId[]] -> ValInst[] }
+ isl::union_map DefidKnown =
+ DefZoneEltDefId.apply_domain(ScatterKnown).reverse();
+
+ // { [Element[] -> Zone[]] -> ValInst[] }
+ return DefZoneEltDefId.apply_range(DefidKnown);
+}
+
+isl::union_map ZoneAlgorithm::computeKnown(bool FromWrite,
+ bool FromRead) const {
+ isl::union_map Result = makeEmptyUnionMap();
+
+ if (FromWrite)
+ Result = Result.unite(computeKnownFromMustWrites());
+
+ if (FromRead)
+ Result = Result.unite(computeKnownFromLoad());
+
+ simplify(Result);
+ return Result;
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Rematerialize a load.
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ %val = load double, double* %B_idx
+ br label %bodyB
+
+ bodyB:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %val, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Known loads forwarded: 1
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: store double %val, double* %A_idx
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ %val = load double, double* %B_idx
+ br label %bodyB
+
+ bodyB:
+ store double 0.0, double* %B_idx
+ %C_idx = getelementptr inbounds double, double* %C, i32 %j
+ store double %val, double* %C_idx
+ br label %bodyC
+
+ bodyC:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %val, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Known loads forwarded: 2
+; CHECK: Operand trees forwarded: 2
+; CHECK: Statements with forwarded operand trees: 2
+; CHECK: }
+
+; CHECK-NEXT: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: store double 0.000000e+00, double* %B_idx
+; CHECK-NEXT: store double %val, double* %C_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyC
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_C[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: store double %val, double* %A_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp sle i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %i = phi i32 [0, %for], [%i.inc, %bodyA]
+ %B_idx = getelementptr inbounds double, double* %B, i32 %i
+ %val = load double, double* %B_idx
+ %i.inc = add nuw nsw i32 %i, 1
+ %i.cmp = icmp slt i32 %i, %n
+ br i1 %i.cmp, label %bodyA, label %bodyB
+
+ bodyB:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %val, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Known loads forwarded: 1
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0, i1] -> MemRef_B[i1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0, i1] -> MemRef_val[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: %i.cmp = icmp slt i32 %i, %n
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[n] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: store double %val, double* %A_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ %val = load double, double* %B_idx
+ %add = fadd double %val, 42.0
+ br label %bodyB
+
+ bodyB:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %add, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Instructions copied: 1
+; CHECK: Known loads forwarded: 1
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_add[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: %add = fadd double %val, 4.200000e+01
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: %add = fadd double %val, 4.200000e+01
+; CHECK-NEXT: store double %add, double* %A_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -polly-codegen -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func1(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ %val1 = load double, double* %A_idx
+ %val2 = fadd double %val1, %val1
+ br label %bodyB
+
+ bodyB:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ store double %val1, double* %B_idx
+ %C_idx = getelementptr inbounds double, double* %C, i32 %j
+ store double %val2, double* %C_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Instructions copied: 1
+; CHECK: Known loads forwarded: 3
+; CHECK: Operand trees forwarded: 2
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val1[] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val2[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: %val2 = fadd double %val1, %val1
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: %val2 = fadd double %val1, %val1
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: store double %val1, double* %B_idx
+; CHECK-NEXT: store double %val2, double* %C_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: }
+
+
+define void @func2(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ %val1 = load double, double* %A_idx
+ %val2 = fadd double %val1, %val1
+ br label %bodyB
+
+ bodyB:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ store double %val2, double* %B_idx
+ %C_idx = getelementptr inbounds double, double* %C, i32 %j
+ store double %val1, double* %C_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Instructions copied: 1
+; CHECK: Known loads forwarded: 2
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val2[] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val1[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: %val2 = fadd double %val1, %val1
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: null;
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_val1[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: %val1 = load double, double* %A_idx
+; CHECK-NEXT: %val2 = fadd double %val1, %val1
+; CHECK-NEXT: store double %val2, double* %B_idx
+; CHECK-NEXT: store double %val1, double* %C_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ %val = load double, double* %B_idx
+ %cond = icmp slt i32 %j, 1
+ br i1 %cond, label %bodyA_true, label %bodyB
+
+ bodyA_true:
+ store double 0.0, double* %B_idx
+ br label %bodyB
+
+ bodyB:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %val, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: ForwardOpTree executed, but did not modify anything
--- /dev/null
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Cannot rematerialize %val from B[0] at bodyC because B[0] has been
+; overwritten in bodyB.
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ %val = load double, double* %B_idx
+ br label %bodyB
+
+ bodyB:
+ store double 0.0, double* %B_idx
+ br label %bodyC
+
+ bodyC:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %val, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: ForwardOpTree executed, but did not modify anything