#define POLLY_ZONEALGO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "isl/isl-noexceptions.h"
#include <memory>
class Value;
class LoopInfo;
class Loop;
+class PHINode;
} // namespace llvm
namespace polly {
class Scop;
class ScopStmt;
class MemoryAccess;
+class ScopArrayInfo;
/// Return only the mappings that map to known values.
///
/// { Element[] }
isl::union_set CompatibleElts;
+ /// List of PHIs that may transitively refer to themselves.
+ ///
+ /// Computing them would require a polyhedral transitive closure operation,
+ /// for which isl may only return an approximation. For correctness, we always
+ /// require an exact result. Hence, we exclude such PHIs.
+ llvm::SmallPtrSet<llvm::PHINode *, 4> RecursivePHIs;
+
+ /// PHIs that have been computed.
+ ///
+ /// Computed PHIs are replaced by their incoming values using #NormalizeMap.
+ llvm::DenseSet<llvm::PHINode *> ComputedPHIs;
+
+ /// For computed PHIs, contains the ValInst they stand for.
+ ///
+ /// To show an example, assume the following PHINode:
+ ///
+ /// Stmt:
+ /// %phi = phi double [%val1, %bb1], [%val2, %bb2]
+ ///
+ /// It's ValInst is:
+ ///
+ /// { [Stmt[i] -> phi[]] }
+ ///
+ /// The value %phi will be either %val1 or %val2, depending on whether in
+ /// iteration i %bb1 or %bb2 has been executed before. In SCoPs, this can be
+ /// determined at compile-time, and the result stored in #NormalizeMap. For
+ /// the previous example, it could be:
+ ///
+ /// { [Stmt[i] -> phi[]] -> [Stmt[0] -> val1[]];
+ /// [Stmt[i] -> phi[]] -> [Stmt[i] -> val2[]] : i > 0 }
+ ///
+ /// Only ValInsts in #ComputedPHIs are present in this map. Other values are
+ /// assumed to represent themselves. This is to avoid adding lots of identity
+ /// entries to this map.
+ ///
+ /// { PHIValInst[] -> IncomingValInst[] }
+ isl::union_map NormalizeMap;
+
+ /// Cache for computePerPHI(const ScopArrayInfo *)
+ llvm::SmallDenseMap<llvm::PHINode *, isl::union_map> PerPHIMaps;
+
/// Prepare the object before computing the zones of @p S.
///
/// @param PassName Name of the pass using this analysis.
/// have.
///
/// @return { ValInst[] }
- isl::map getWrittenValue(MemoryAccess *MA, isl::map AccRel);
+ isl::union_map getWrittenValue(MemoryAccess *MA, isl::map AccRel);
void addArrayWriteAccess(MemoryAccess *MA);
isl::union_map makeEmptyUnionMap() const;
+ /// For each 'execution' of a PHINode, get the incoming block that was
+ /// executed before.
+ ///
+ /// For each PHI instance we can directly determine which was the incoming
+ /// block, and hence derive which value the PHI has.
+ ///
+ /// @param SAI The ScopArrayInfo representing the PHI's storage.
+ ///
+ /// @return { DomainPHIRead[] -> DomainPHIWrite[] }
+ isl::union_map computePerPHI(const polly::ScopArrayInfo *SAI);
+
/// Find the array elements that can be used for zone analysis.
void collectCompatibleElts();
isl::map makeValInst(llvm::Value *Val, ScopStmt *UserStmt, llvm::Loop *Scope,
bool IsCertain = true);
+ /// Create and normalize a ValInst.
+ ///
+ /// @see makeValInst
+ /// @see normalizeValInst
+ /// @see #NormalizedPHI
+ isl::union_map makeNormalizedValInst(llvm::Value *Val, ScopStmt *UserStmt,
+ llvm::Loop *Scope,
+ bool IsCertain = true);
+
/// Return whether @p MA can be used for transformations (e.g. OpTree load
/// forwarding, DeLICM mapping).
bool isCompatibleAccess(MemoryAccess *MA);
/// Compute the different zones.
void computeCommon();
+ /// Compute the normalization map that replaces PHIs by their incoming
+ /// values.
+ ///
+ /// @see #NormalizeMap
+ void computeNormalizedPHIs();
+
/// Print the current state of all MemoryAccesses to @p.
void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const;
+ /// Is @p MA a PHI READ access that can be normalized?
+ ///
+ /// @see #NormalizeMap
+ bool isNormalizable(MemoryAccess *MA);
+
+ /// @{
+ /// Determine whether the argument does not map to any computed PHI. Those
+ /// should have been replaced by their incoming values.
+ ///
+ /// @see #NormalizedPHI
+ bool isNormalized(isl::map Map);
+ bool isNormalized(isl::union_map Map);
+ /// @}
+
public:
/// Return the SCoP this object is analyzing.
Scop *getScop() const { return S; }
return std::make_pair(DefUses, Lifetime);
}
- /// For each 'execution' of a PHINode, get the incoming block that was
- /// executed before.
- ///
- /// For each PHI instance we can directly determine which was the incoming
- /// block, and hence derive which value the PHI has.
- ///
- /// @param SAI The ScopArrayInfo representing the PHI's storage.
- ///
- /// @return { DomainPHIRead[] -> DomainPHIWrite[] }
- isl::union_map computePerPHI(const ScopArrayInfo *SAI) {
- assert(SAI->isPHIKind());
-
- // { DomainPHIWrite[] -> Scatter[] }
- auto PHIWriteScatter = makeEmptyUnionMap();
-
- // Collect all incoming block timepoint.
- for (auto *MA : S->getPHIIncomings(SAI)) {
- auto Scatter = getScatterFor(MA);
- PHIWriteScatter =
- give(isl_union_map_add_map(PHIWriteScatter.take(), Scatter.take()));
- }
-
- // { DomainPHIRead[] -> Scatter[] }
- auto PHIReadScatter = getScatterFor(S->getPHIRead(SAI));
-
- // { DomainPHIRead[] -> Scatter[] }
- auto BeforeRead = beforeScatter(PHIReadScatter, true);
-
- // { Scatter[] }
- auto WriteTimes = singleton(
- give(isl_union_map_range(PHIWriteScatter.copy())), ScatterSpace);
-
- // { DomainPHIRead[] -> Scatter[] }
- auto PHIWriteTimes =
- give(isl_map_intersect_range(BeforeRead.take(), WriteTimes.take()));
- auto LastPerPHIWrites = give(isl_map_lexmax(PHIWriteTimes.take()));
-
- // { DomainPHIRead[] -> DomainPHIWrite[] }
- auto Result = give(isl_union_map_apply_range(
- isl_union_map_from_map(LastPerPHIWrites.take()),
- isl_union_map_reverse(PHIWriteScatter.take())));
- assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true);
- assert(isl_union_map_is_injective(Result.keep()) == isl_bool_true);
- return Result;
- }
-
/// Try to map a MemoryKind::Value to a given array element.
///
/// @param SAI Representation of the scalar's memory to map.
cl::desc("Analyze array contents for load forwarding"),
cl::cat(PollyCategory), cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ NormalizePHIs("polly-optree-normalize-phi",
+ cl::desc("Replace PHIs by their incoming values"),
+ cl::cat(PollyCategory), cl::init(false), cl::Hidden);
+
static cl::opt<unsigned>
MaxOps("polly-optree-max-ops",
cl::desc("Maximum number of ISL operations to invest for known "
IslQuotaScope QuotaScope = MaxOpGuard.enter();
computeCommon();
+ if (NormalizePHIs)
+ computeNormalizedPHIs();
Known = computeKnown(true, true);
// Preexisting ValInsts use the known content analysis of themselves.
Translator = makeIdentityMap(Known.range(), false);
}
- if (!Known || !Translator) {
+ if (!Known || !Translator || !NormalizeMap) {
assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
Known = nullptr;
Translator = nullptr;
+ NormalizeMap = nullptr;
DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
return false;
}
// { DomainDef[] -> ValInst[] }
isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
+ assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized");
// { DomainTarget[] -> ValInst[] }
isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
}
// { DomainDef[] -> ValInst[] }
- isl::union_map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
+ isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
// { DomainTarget[] -> ValInst[] }
isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
STATISTIC(NumIncompatibleArrays, "Number of not zone-analyzable arrays");
STATISTIC(NumCompatibleArrays, "Number of zone-analyzable arrays");
+STATISTIC(NumRecursivePHIs, "Number of recursive PHIs");
+STATISTIC(NumNormalizablePHIs, "Number of normalizable PHIs");
+STATISTIC(NumPHINormialization, "Number of PHI executed normalizations");
using namespace polly;
using namespace llvm;
}
}
-isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) {
+isl::union_map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA,
+ isl::map AccRel) {
if (!MA->isMustWrite())
return {};
if (AccVal &&
AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType() &&
AccRel.is_single_valued().is_true())
- return makeValInst(AccVal, Stmt, L);
+ return makeNormalizedValInst(AccVal, Stmt, L);
// memset(_, '0', ) is equivalent to writing the null value to all touched
// elements. isMustWrite() ensures that all of an element's bytes are
Type *Ty = MA->getLatestScopArrayInfo()->getElementType();
if (WrittenConstant && WrittenConstant->isZeroValue()) {
Constant *Zero = Constant::getNullValue(Ty);
- return makeValInst(Zero, Stmt, L);
+ return makeNormalizedValInst(Zero, Stmt, L);
}
}
AllMayWrites = AllMayWrites.add_map(AccRel);
// { Domain[] -> ValInst[] }
- isl::map WriteValInstance = getWrittenValue(MA, AccRel);
+ isl::union_map WriteValInstance = getWrittenValue(MA, AccRel);
if (!WriteValInstance)
WriteValInstance = makeUnknownForDomain(Stmt);
isl::map IncludeElement = AccRel.domain_map().curry();
// { [Element[] -> DomainWrite[]] -> ValInst[] }
- isl::map EltWriteValInst = WriteValInstance.apply_domain(IncludeElement);
+ isl::union_map EltWriteValInst =
+ WriteValInstance.apply_domain(IncludeElement);
- AllWriteValInst = AllWriteValInst.add_map(EltWriteValInst);
+ AllWriteValInst = AllWriteValInst.unite(EltWriteValInst);
+}
+
+/// Return whether @p PHI refers (also transitively through other PHIs) to
+/// itself.
+///
+/// loop:
+/// %phi1 = phi [0, %preheader], [%phi1, %loop]
+/// br i1 %c, label %loop, label %exit
+///
+/// exit:
+/// %phi2 = phi [%phi1, %bb]
+///
+/// In this example, %phi1 is recursive, but %phi2 is not.
+static bool isRecursivePHI(const PHINode *PHI) {
+ SmallVector<const PHINode *, 8> Worklist;
+ SmallPtrSet<const PHINode *, 8> Visited;
+ Worklist.push_back(PHI);
+
+ while (!Worklist.empty()) {
+ const PHINode *Cur = Worklist.pop_back_val();
+
+ if (Visited.count(Cur))
+ continue;
+ Visited.insert(Cur);
+
+ for (const Use &Incoming : Cur->incoming_values()) {
+ Value *IncomingVal = Incoming.get();
+ auto *IncomingPHI = dyn_cast<PHINode>(IncomingVal);
+ if (!IncomingPHI)
+ continue;
+
+ if (IncomingPHI == PHI)
+ return true;
+ Worklist.push_back(IncomingPHI);
+ }
+ }
+ return false;
+}
+
+isl::union_map ZoneAlgorithm::computePerPHI(const ScopArrayInfo *SAI) {
+ // TODO: If the PHI has an incoming block from before the SCoP, it is not
+ // represented in any ScopStmt.
+
+ auto *PHI = cast<PHINode>(SAI->getBasePtr());
+ auto It = PerPHIMaps.find(PHI);
+ if (It != PerPHIMaps.end())
+ return It->second;
+
+ assert(SAI->isPHIKind());
+
+ // { DomainPHIWrite[] -> Scatter[] }
+ isl::union_map PHIWriteScatter = makeEmptyUnionMap();
+
+ // Collect all incoming block timepoints.
+ for (MemoryAccess *MA : S->getPHIIncomings(SAI)) {
+ isl::map Scatter = getScatterFor(MA);
+ PHIWriteScatter = PHIWriteScatter.add_map(Scatter);
+ }
+
+ // { DomainPHIRead[] -> Scatter[] }
+ isl::map PHIReadScatter = getScatterFor(S->getPHIRead(SAI));
+
+ // { DomainPHIRead[] -> Scatter[] }
+ isl::map BeforeRead = beforeScatter(PHIReadScatter, true);
+
+ // { Scatter[] }
+ isl::set WriteTimes = singleton(PHIWriteScatter.range(), ScatterSpace);
+
+ // { DomainPHIRead[] -> Scatter[] }
+ isl::map PHIWriteTimes = BeforeRead.intersect_range(WriteTimes);
+ isl::map LastPerPHIWrites = PHIWriteTimes.lexmax();
+
+ // { DomainPHIRead[] -> DomainPHIWrite[] }
+ isl::union_map Result =
+ isl::union_map(LastPerPHIWrites).apply_range(PHIWriteScatter.reverse());
+ assert(!Result.is_single_valued().is_false());
+ assert(!Result.is_injective().is_false());
+
+ PerPHIMaps.insert({PHI, Result});
+ return Result;
}
isl::union_set ZoneAlgorithm::makeEmptyUnionSet() const {
llvm_unreachable("Unhandled use type");
}
+/// Remove all computed PHIs out of @p Input and replace by their incoming
+/// value.
+///
+/// @param Input { [] -> ValInst[] }
+/// @param ComputedPHIs Set of PHIs that are replaced. Its ValInst must appear
+/// on the LHS of @p NormalizeMap.
+/// @param NormalizeMap { ValInst[] -> ValInst[] }
+static isl::union_map normalizeValInst(isl::union_map Input,
+ const DenseSet<PHINode *> &ComputedPHIs,
+ isl::union_map NormalizeMap) {
+ isl::union_map Result = isl::union_map::empty(Input.get_space());
+ Input.foreach_map(
+ [&Result, &ComputedPHIs, &NormalizeMap](isl::map Map) -> isl::stat {
+ isl::space Space = Map.get_space();
+ isl::space RangeSpace = Space.range();
+
+ // Instructions within the SCoP are always wrapped. Non-wrapped tuples
+ // are therefore invariant in the SCoP and don't need normalization.
+ if (!RangeSpace.is_wrapping()) {
+ Result = Result.add_map(Map);
+ return isl::stat::ok;
+ }
+
+ auto *PHI = dyn_cast<PHINode>(static_cast<Value *>(
+ RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user()));
+
+ // If no normalization is necessary, then the ValInst stands for itself.
+ if (!ComputedPHIs.count(PHI)) {
+ Result = Result.add_map(Map);
+ return isl::stat::ok;
+ }
+
+ // Otherwise, apply the normalization.
+ isl::union_map Mapped = isl::union_map(Map).apply_range(NormalizeMap);
+ Result = Result.unite(Mapped);
+ NumPHINormialization++;
+ return isl::stat::ok;
+ });
+ return Result;
+}
+
+isl::union_map ZoneAlgorithm::makeNormalizedValInst(llvm::Value *Val,
+ ScopStmt *UserStmt,
+ llvm::Loop *Scope,
+ bool IsCertain) {
+ isl::map ValInst = makeValInst(Val, UserStmt, Scope, IsCertain);
+ isl::union_map Normalized =
+ normalizeValInst(ValInst, ComputedPHIs, NormalizeMap);
+ return Normalized;
+}
+
bool ZoneAlgorithm::isCompatibleAccess(MemoryAccess *MA) {
if (!MA)
return false;
return isa<StoreInst>(AccInst) || isa<LoadInst>(AccInst);
}
+bool ZoneAlgorithm::isNormalizable(MemoryAccess *MA) {
+ assert(MA->isRead());
+
+ // Exclude ExitPHIs, we are assuming that a normalizable PHI has a READ
+ // MemoryAccess.
+ if (!MA->isOriginalPHIKind())
+ return false;
+
+ // Exclude recursive PHIs, normalizing them would require a transitive
+ // closure.
+ auto *PHI = cast<PHINode>(MA->getAccessInstruction());
+ if (RecursivePHIs.count(PHI))
+ return false;
+
+ // Ensure that each incoming value can be represented by a ValInst[].
+ // We do represent values from statements associated to multiple incoming
+ // value by the PHI itself, but we do not handle this case yet (especially
+ // isNormalized()) when normalizing.
+ const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo();
+ auto Incomings = S->getPHIIncomings(SAI);
+ for (MemoryAccess *Incoming : Incomings) {
+ if (Incoming->getIncoming().size() != 1)
+ return false;
+ }
+
+ return true;
+}
+
+bool ZoneAlgorithm::isNormalized(isl::map Map) {
+ isl::space Space = Map.get_space();
+ isl::space RangeSpace = Space.range();
+
+ if (!RangeSpace.is_wrapping())
+ return true;
+
+ auto *PHI = dyn_cast<PHINode>(static_cast<Value *>(
+ RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user()));
+ if (!PHI)
+ return true;
+
+ auto *IncomingStmt = static_cast<ScopStmt *>(
+ RangeSpace.unwrap().get_tuple_id(isl::dim::in).get_user());
+ MemoryAccess *PHIRead = IncomingStmt->lookupPHIReadOf(PHI);
+ if (!isNormalizable(PHIRead))
+ return true;
+
+ return false;
+}
+
+bool ZoneAlgorithm::isNormalized(isl::union_map UMap) {
+ auto Result = UMap.foreach_map([this](isl::map Map) -> isl::stat {
+ if (isNormalized(Map))
+ return isl::stat::ok;
+ return isl::stat::error;
+ });
+ return Result == isl::stat::ok;
+}
+
void ZoneAlgorithm::computeCommon() {
AllReads = makeEmptyUnionMap();
AllMayWrites = makeEmptyUnionMap();
AllWriteValInst = makeEmptyUnionMap();
AllReadValInst = makeEmptyUnionMap();
+ // Default to empty, i.e. no normalization/replacement is taking place. Call
+ // computeNormalizedPHIs() to initialize.
+ NormalizeMap = makeEmptyUnionMap();
+ ComputedPHIs.clear();
+
for (auto &Stmt : *S) {
for (auto *MA : Stmt) {
if (!MA->isLatestArrayKind())
simplify(WriteReachDefZone);
}
+void ZoneAlgorithm::computeNormalizedPHIs() {
+ // Determine which PHIs can reference themselves. They are excluded from
+ // normalization to avoid problems with transitive closures.
+ for (ScopStmt &Stmt : *S) {
+ for (MemoryAccess *MA : Stmt) {
+ if (!MA->isPHIKind())
+ continue;
+ if (!MA->isRead())
+ continue;
+
+ // TODO: Can be more efficient since isRecursivePHI can theoretically
+ // determine recursiveness for multiple values and/or cache results.
+ auto *PHI = cast<PHINode>(MA->getAccessInstruction());
+ if (isRecursivePHI(PHI)) {
+ NumRecursivePHIs++;
+ RecursivePHIs.insert(PHI);
+ }
+ }
+ }
+
+ // { PHIValInst[] -> IncomingValInst[] }
+ isl::union_map AllPHIMaps = makeEmptyUnionMap();
+
+ // Discover new PHIs and try to normalize them.
+ DenseSet<PHINode *> AllPHIs;
+ for (ScopStmt &Stmt : *S) {
+ for (MemoryAccess *MA : Stmt) {
+ if (!MA->isOriginalPHIKind())
+ continue;
+ if (!MA->isRead())
+ continue;
+ if (!isNormalizable(MA))
+ continue;
+
+ auto *PHI = cast<PHINode>(MA->getAccessInstruction());
+ const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo();
+
+ // { PHIDomain[] -> PHIValInst[] }
+ isl::map PHIValInst = makeValInst(PHI, &Stmt, Stmt.getSurroundingLoop());
+
+ // { IncomingDomain[] -> IncomingValInst[] }
+ isl::union_map IncomingValInsts = makeEmptyUnionMap();
+
+ // Get all incoming values.
+ for (MemoryAccess *MA : S->getPHIIncomings(SAI)) {
+ ScopStmt *IncomingStmt = MA->getStatement();
+
+ auto Incoming = MA->getIncoming();
+ assert(Incoming.size() == 1 && "The incoming value must be "
+ "representable by something else than "
+ "the PHI itself");
+ Value *IncomingVal = Incoming[0].second;
+
+ // { IncomingDomain[] -> IncomingValInst[] }
+ isl::map IncomingValInst = makeValInst(
+ IncomingVal, IncomingStmt, IncomingStmt->getSurroundingLoop());
+
+ IncomingValInsts = IncomingValInsts.add_map(IncomingValInst);
+ }
+
+ // Determine which instance of the PHI statement corresponds to which
+ // incoming value.
+ // { PHIDomain[] -> IncomingDomain[] }
+ isl::union_map PerPHI = computePerPHI(SAI);
+
+ // { PHIValInst[] -> IncomingValInst[] }
+ isl::union_map PHIMap =
+ PerPHI.apply_domain(PHIValInst).apply_range(IncomingValInsts);
+ assert(!PHIMap.is_single_valued().is_false());
+
+ // Resolve transitiveness: The incoming value of the newly discovered PHI
+ // may reference a previously normalized PHI. At the same time, already
+ // normalized PHIs might be normalized to the new PHI. At the end, none of
+ // the PHIs may appear on the right-hand-side of the normalization map.
+ PHIMap = normalizeValInst(PHIMap, AllPHIs, AllPHIMaps);
+ AllPHIs.insert(PHI);
+ AllPHIMaps = normalizeValInst(AllPHIMaps, AllPHIs, PHIMap);
+
+ AllPHIMaps = AllPHIMaps.unite(PHIMap);
+ NumNormalizablePHIs++;
+ }
+ }
+ simplify(AllPHIMaps);
+
+ // Apply the normalization.
+ ComputedPHIs = AllPHIs;
+ NormalizeMap = AllPHIMaps;
+
+ assert(!NormalizeMap || isNormalized(NormalizeMap));
+}
+
void ZoneAlgorithm::printAccesses(llvm::raw_ostream &OS, int Indent) const {
OS.indent(Indent) << "After accesses {\n";
for (auto &Stmt : *S) {
--- /dev/null
+; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define internal fastcc void @kernel_atax([2100 x double]* nocapture readonly %A, double* nocapture readonly %x, double* nocapture %y, double* nocapture %tmp) unnamed_addr #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %y15 = bitcast double* %y to i8*
+ call void @llvm.memset.p0i8.i64(i8* %y15, i8 0, i64 16800, i32 8, i1 false)
+ br label %for.body3
+
+for.body3: ; preds = %for.inc40, %entry.split
+ %indvars.iv8 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next9, %for.inc40 ]
+ %arrayidx5 = getelementptr inbounds double, double* %tmp, i64 %indvars.iv8
+ store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !6
+ br label %for.body8
+
+for.body8: ; preds = %for.body8, %for.body3
+ %0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ]
+ %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
+ %arrayidx14 = getelementptr inbounds [2100 x double], [2100 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv
+ %1 = load double, double* %arrayidx14, align 8, !tbaa !6
+ %arrayidx16 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+ %2 = load double, double* %arrayidx16, align 8, !tbaa !6
+ %mul = fmul double %1, %2
+ %add = fadd double %0, %mul
+ store double %add, double* %arrayidx5, align 8, !tbaa !6
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 2
+ br i1 %exitcond, label %for.end21, label %for.body8
+
+for.end21: ; preds = %for.body8
+ br label %for.body24
+
+for.body24: ; preds = %for.body24.for.body24_crit_edge, %for.end21
+ %3 = phi double [ %add, %for.end21 ], [ %.pre, %for.body24.for.body24_crit_edge ]
+ %indvars.iv5 = phi i64 [ 0, %for.end21 ], [ %indvars.iv.next6, %for.body24.for.body24_crit_edge ]
+ %arrayidx26 = getelementptr inbounds double, double* %y, i64 %indvars.iv5
+ %4 = load double, double* %arrayidx26, align 8, !tbaa !6
+ %arrayidx30 = getelementptr inbounds [2100 x double], [2100 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv5
+ %5 = load double, double* %arrayidx30, align 8, !tbaa !6
+ %mul33 = fmul double %5, %3
+ %add34 = fadd double %4, %mul33
+ store double %add34, double* %arrayidx26, align 8, !tbaa !6
+ %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
+ %exitcond7 = icmp eq i64 %indvars.iv.next6, 2
+ br i1 %exitcond7, label %for.inc40, label %for.body24.for.body24_crit_edge
+
+for.body24.for.body24_crit_edge: ; preds = %for.body24
+ %.pre = load double, double* %arrayidx5, align 8, !tbaa !6
+ br label %for.body24
+
+for.inc40: ; preds = %for.body24
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+ %exitcond10 = icmp eq i64 %indvars.iv.next9, 2
+ br i1 %exitcond10, label %for.end42, label %for.body3
+
+for.end42: ; preds = %for.inc40
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
+
+attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 6.0.0 (trunk 312565) (llvm/trunk 312564)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"double", !4, i64 0}
+
+
+; CHECK: Statistics {
+; CHECK: Operand trees forwarded: 2
+; CHECK: Statements with forwarded operand trees: 2
+; CHECK: }
+
+; CHECK-NEXT: After statements {
+; CHECK-NEXT: Stmt_for_body3
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body3[i0] -> MemRef_tmp[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body3[i0] -> MemRef1__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !2
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_for_body8
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef1__phi[] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef1__phi[] };
+; CHECK-NEXT: new: { Stmt_for_body8[i0, i1] -> MemRef_tmp[i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_A[i0, i1] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_x[i1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_tmp[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_add[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ]
+; CHECK-NEXT: %1 = load double, double* %arrayidx14, align 8, !tbaa !2
+; CHECK-NEXT: %2 = load double, double* %arrayidx16, align 8, !tbaa !2
+; CHECK-NEXT: %mul = fmul double %1, %2
+; CHECK-NEXT: %add = fadd double %0, %mul
+; CHECK-NEXT: store double %add, double* %arrayidx5, align 8, !tbaa !2
+; CHECK-NEXT: %exitcond = icmp eq i64 %indvars.iv.next, 2
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_for_end21
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_end21[i0] -> MemRef_add[] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_end21[i0] -> MemRef5__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_for_body24
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef5__phi[] };
+; CHECK-NEXT: new: { Stmt_for_body24[i0, i1] -> MemRef_tmp[i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_y[i1] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_A[i0, i1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_y[i1] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %3 = phi double [ %add, %for.end21 ], [ %.pre, %for.body24.for.body24_crit_edge ]
+; CHECK-NEXT: %4 = load double, double* %arrayidx26, align 8, !tbaa !2
+; CHECK-NEXT: %5 = load double, double* %arrayidx30, align 8, !tbaa !2
+; CHECK-NEXT: %mul33 = fmul double %5, %3
+; CHECK-NEXT: %add34 = fadd double %4, %mul33
+; CHECK-NEXT: store double %add34, double* %arrayidx26, align 8, !tbaa !2
+; CHECK-NEXT: %exitcond7 = icmp eq i64 %indvars.iv.next6, 2
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_for_body24_for_body24_crit_edge
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body24_for_body24_crit_edge[i0, i1] -> MemRef5__phi[] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body24_for_body24_crit_edge[i0, i1] -> MemRef_tmp[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %.pre = load double, double* %arrayidx5, align 8, !tbaa !2
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Rematerialize a load.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+; double val = B[j];
+;
+; bodyB:
+; double phi = val;
+;
+; bodyC:
+; A[j] = phi;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %B_idx = getelementptr inbounds double, double* %B, i32 %j
+ %val = load double, double* %B_idx
+ br label %bodyB
+
+ bodyB:
+ %phi = phi double [%val, %bodyA]
+ br label %bodyC
+
+ bodyC:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %phi, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Reloads: 2
+; CHECK: }
+
+; CHECK-NEXT: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = load double, double* %B_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %phi = phi double [ %val, %bodyA ]
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyC
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_phi[] };
+; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_B[i0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: store double %phi, double* %A_idx
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define internal fastcc void @kernel_jacobi_1d(double* noalias nocapture %A, double* noalias nocapture %B) unnamed_addr #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %arrayidx6.phi.trans.insert = getelementptr inbounds double, double* %A, i64 1
+ %arrayidx21.phi.trans.insert = getelementptr inbounds double, double* %B, i64 1
+ br label %for.body
+
+for.body: ; preds = %for.inc33, %entry.split
+ %t.03 = phi i32 [ 0, %entry.split ], [ %inc34, %for.inc33 ]
+ %.pre = load double, double* %A, align 8, !tbaa !6
+ %.pre10 = load double, double* %arrayidx6.phi.trans.insert, align 8, !tbaa !6
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body
+ %0 = phi double [ %.pre10, %for.body ], [ %2, %for.body3 ]
+ %1 = phi double [ %.pre, %for.body ], [ %0, %for.body3 ]
+ %indvars.iv = phi i64 [ 1, %for.body ], [ %indvars.iv.next, %for.body3 ]
+ %add = fadd double %1, %0
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx9 = getelementptr inbounds double, double* %A, i64 %indvars.iv.next
+ %2 = load double, double* %arrayidx9, align 8, !tbaa !6
+ %add10 = fadd double %add, %2
+ %mul = fmul double %add10, 3.333300e-01
+ %arrayidx12 = getelementptr inbounds double, double* %B, i64 %indvars.iv
+ store double %mul, double* %arrayidx12, align 8, !tbaa !6
+ %exitcond = icmp eq i64 %indvars.iv.next, 3
+ br i1 %exitcond, label %for.end, label %for.body3
+
+for.end: ; preds = %for.body3
+ %.pre11 = load double, double* %B, align 8, !tbaa !6
+ %.pre12 = load double, double* %arrayidx21.phi.trans.insert, align 8, !tbaa !6
+ br label %for.inc33
+
+for.inc33: ; preds = %for.body16
+ %inc34 = add nuw nsw i32 %t.03, 1
+ %exitcond9 = icmp eq i32 %inc34, 2
+ br i1 %exitcond9, label %for.end35, label %for.body
+
+for.end35: ; preds = %for.inc33
+ ret void
+}
+
+attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 6.0.0 (llvm/trunk 312874)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"double", !4, i64 0}
+
+
+; CHECK: Statistics {
+; CHECK: Operand trees forwarded: 2
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK-NEXT: After statements {
+; CHECK-NEXT: Stmt_for_body
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef1__phi[] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef2__phi[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %.pre = load double, double* %A, align 8, !tbaa !2
+; CHECK-NEXT: %.pre10 = load double, double* %arrayidx6.phi.trans.insert, align 8, !tbaa !2
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_for_body3
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef1__phi[] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef1__phi[] };
+; CHECK-NEXT: new: { Stmt_for_body3[i0, i1] -> MemRef_A[1 + i1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef2__phi[] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef2__phi[] };
+; CHECK-NEXT: new: { Stmt_for_body3[i0, i1] -> MemRef_A[i1] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_A[2 + i1] };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_B[1 + i1] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %0 = phi double [ %.pre10, %for.body ], [ %2, %for.body3 ]
+; CHECK-NEXT: %1 = phi double [ %.pre, %for.body ], [ %0, %for.body3 ]
+; CHECK-NEXT: %add = fadd double %1, %0
+; CHECK-NEXT: %2 = load double, double* %arrayidx9, align 8, !tbaa !2
+; CHECK-NEXT: %add10 = fadd double %add, %2
+; CHECK-NEXT: %mul = fmul double %add10, 3.333300e-01
+; CHECK-NEXT: store double %mul, double* %arrayidx12, align 8, !tbaa !2
+; CHECK-NEXT: %exitcond = icmp eq i64 %indvars.iv.next, 3
+; CHECK-NEXT: }
+; CHECK-NEXT: }
--- /dev/null
+; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Contains a self-referencing PHINode that would require a
+; transitive closure to handle.
+;
+; for (int j = 0; j < n; j += 1) {
+; double phi = 0.0;
+; for (int i = 0; i < m; i += 1)
+; phi = phi;
+; A[j] = phi;
+; }
+;
+define void @func(i32 %n, i32 %m, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %for.preheader, label %exit
+
+ for.preheader:
+ br label %for.inner
+
+ for.inner:
+ %i = phi i32 [0, %for.preheader], [%i.inc, %for.inner]
+ %phi = phi double [0.0, %for.preheader], [%phi, %for.inner]
+ %i.inc = add nuw nsw i32 %i, 1
+ %i.cmp = icmp slt i32 %i.inc, %m
+ br i1 %i.cmp, label %for.inner, label %for.exit
+
+ for.exit:
+ %A_idx = getelementptr inbounds double, double* %A, i32 %j
+ store double %phi, double* %A_idx
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: ForwardOpTree executed, but did not modify anything