[ForwardOpTree] Use known array content analysis to forward load instructions.

author Michael Kruse <llvm@meinersbur.de>

Mon, 7 Aug 2017 18:40:29 +0000 (18:40 +0000)

committer Michael Kruse <llvm@meinersbur.de>

Mon, 7 Aug 2017 18:40:29 +0000 (18:40 +0000)
author Michael Kruse <llvm@meinersbur.de>
Mon, 7 Aug 2017 18:40:29 +0000 (18:40 +0000)
committer Michael Kruse <llvm@meinersbur.de>
Mon, 7 Aug 2017 18:40:29 +0000 (18:40 +0000)
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h

index f679835..a3e1bcd 100644 (file)
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1528,7 +1528,11 @@ public:
    }
  
    /// Add @p Access to this statement's list of accesses.
-  void addAccess(MemoryAccess *Access);
+  ///
+  /// @param Access  The access to add.
+  /// @param Prepend If true, will add @p Access before all other instructions
+  ///                (instead of appending it).
+  void addAccess(MemoryAccess *Access, bool Preprend = false);
  
    /// Remove a MemoryAccess from this statement.
    ///
diff --git a/polly/include/polly/Support/ISLTools.h b/polly/include/polly/Support/ISLTools.h

index 25bb1bf..4fd3962 100644 (file)
--- a/polly/include/polly/Support/ISLTools.h
+++ b/polly/include/polly/Support/ISLTools.h
@@ -368,6 +368,11 @@ isl::union_set convertZoneToTimepoints(isl::union_set Zone, bool InclStart,
  isl::union_map convertZoneToTimepoints(isl::union_map Zone, isl::dim Dim,
                                         bool InclStart, bool InclEnd);
  
+/// Overload of convertZoneToTimepoints(isl::map,InclStart,InclEnd) to process
+/// only a single map.
+isl::map convertZoneToTimepoints(isl::map Zone, isl::dim Dim, bool InclStart,
+                                 bool InclEnd);
+
  /// Distribute the domain to the tuples of a wrapped range map.
  ///
  /// @param Map { Domain[] -> [Range1[] -> Range2[]] }
diff --git a/polly/include/polly/ZoneAlgo.h b/polly/include/polly/ZoneAlgo.h

index a5f4d3e..a88b918 100644 (file)
--- a/polly/include/polly/ZoneAlgo.h
+++ b/polly/include/polly/ZoneAlgo.h
@@ -67,6 +67,10 @@ protected:
    /// { DomainRead[] -> Element[] }
    isl::union_map AllReads;
  
+  /// The loaded values (llvm::LoadInst) of all reads.
+  /// { [Element[] -> DomainRead[]] -> ValInst[] }
+  isl::union_map AllReadValInst;
+
    /// Combined access relations of all MemoryKind::Array, MAY_WRITE accesses.
    /// { DomainMayWrite[] -> Element[] }
    isl::union_map AllMayWrites;
@@ -75,6 +79,11 @@ protected:
    /// { DomainMustWrite[] -> Element[] }
    isl::union_map AllMustWrites;
  
+  /// Combined access relations of all MK_Array write accesses (union of
+  /// AllMayWrites and AllMustWrites).
+  /// { DomainWrite[] -> Element[] }
+  isl::union_map AllWrites;
+
    /// The value instances written to array elements of all write accesses.
    /// { [Element[] -> DomainWrite[]] -> ValInst[] }
    isl::union_map AllWriteValInst;
@@ -220,6 +229,26 @@ protected:
  public:
    /// Return the SCoP this object is analyzing.
    Scop *getScop() const { return S; }
+
+  /// A reaching definition zone is known to have the definition's written value
+  /// if the definition is a MUST_WRITE.
+  ///
+  /// @return { [Element[] -> Zone[]] -> ValInst[] }
+  isl::union_map computeKnownFromMustWrites() const;
+
+  /// A reaching definition zone is known to be the same value as any load that
+  /// reads from that array element in that period.
+  ///
+  /// @return { [Element[] -> Zone[]] -> ValInst[] }
+  isl::union_map computeKnownFromLoad() const;
+
+  /// Compute which value an array element stores at every instant.
+  ///
+  /// @param FromWrite Use stores as source of information.
+  /// @param FromRead  Use loads as source of information.
+  ///
+  /// @return { [Element[] -> Zone[]] -> ValInst[] }
+  isl::union_map computeKnown(bool FromWrite, bool FromRead) const;
  };
  
  /// Create a domain-to-unknown value mapping.
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp

index 17c4453..82d0592 100644 (file)
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -1276,7 +1276,7 @@ void ScopStmt::buildAccessRelations() {
    }
  }
  
-void ScopStmt::addAccess(MemoryAccess *Access) {
+void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) {
    Instruction *AccessInst = Access->getAccessInstruction();
  
    if (Access->isArrayKind()) {
@@ -1305,6 +1305,10 @@ void ScopStmt::addAccess(MemoryAccess *Access) {
      PHIReads[PHI] = Access;
    }
  
+  if (Prepend) {
+    MemAccs.insert(MemAccs.begin(), Access);
+    return;
+  }
    MemAccs.push_back(Access);
  }
  
diff --git a/polly/lib/Support/ISLTools.cpp b/polly/lib/Support/ISLTools.cpp

index 883ff91..06d4973 100644 (file)
--- a/polly/lib/Support/ISLTools.cpp
+++ b/polly/lib/Support/ISLTools.cpp
@@ -431,6 +431,21 @@ isl::union_map polly::convertZoneToTimepoints(isl::union_map Zone, isl::dim Dim,
    return give(isl_union_map_union(Zone.take(), ShiftedZone.take()));
  }
  
+isl::map polly::convertZoneToTimepoints(isl::map Zone, isl::dim Dim,
+                                        bool InclStart, bool InclEnd) {
+  if (!InclStart && InclEnd)
+    return Zone;
+
+  auto ShiftedZone = shiftDim(Zone, Dim, -1, -1);
+  if (InclStart && !InclEnd)
+    return ShiftedZone;
+  else if (!InclStart && !InclEnd)
+    return give(isl_map_intersect(Zone.take(), ShiftedZone.take()));
+
+  assert(InclStart && InclEnd);
+  return give(isl_map_union(Zone.take(), ShiftedZone.take()));
+}
+
  isl::map polly::distributeDomain(isl::map Map) {
    // Note that we cannot take Map apart into { Domain[] -> Range1[] } and {
    // Domain[] -> Range2[] } and combine again. We would loose any relation
diff --git a/polly/lib/Transform/DeLICM.cpp b/polly/lib/Transform/DeLICM.cpp

index de115cf..a67bd18 100644 (file)
--- a/polly/lib/Transform/DeLICM.cpp
+++ b/polly/lib/Transform/DeLICM.cpp
@@ -778,9 +778,15 @@ private:
          isl_map_wrap(isl_map_apply_domain(Lifetime.copy(), DefTarget.copy())));
      simplify(EltZone);
  
+    // When known knowledge is disabled, just return the unknown value. It will
+    // either get filtered out or conflict with itself.
      // { DomainDef[] -> ValInst[] }
-    auto ValInst = makeValInst(V, DefMA->getStatement(),
-                               LI->getLoopFor(DefInst->getParent()));
+    isl::map ValInst;
+    if (DelicmComputeKnown)
+      ValInst = makeValInst(V, DefMA->getStatement(),
+                            LI->getLoopFor(DefInst->getParent()));
+    else
+      ValInst = makeUnknownForDomain(DefMA->getStatement());
  
      // { DomainDef[] -> [Element[] -> Zone[]] }
      auto EltKnownTranslator =
@@ -1209,21 +1215,6 @@ private:
      return Result;
    }
  
-  /// Compute which value an array element stores at every instant.
-  ///
-  /// @return { [Element[] -> Zone[]] -> ValInst[] }
-  isl::union_map computeKnown() const {
-    // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] }
-    auto EltReachdDef =
-        distributeDomain(give(isl_union_map_curry(WriteReachDefZone.copy())));
-
-    // { [Element[] -> DomainWrite[]] -> ValInst[] }
-    auto AllKnownWriteValInst = filterKnownValInst(AllWriteValInst);
-
-    // { [Element[] -> Zone[]] -> ValInst[] }
-    return EltReachdDef.apply_range(AllKnownWriteValInst);
-  }
-
    /// Determine when an array element is written to, and which value instance is
    /// written.
    ///
@@ -1290,7 +1281,7 @@ public:
        computeCommon();
  
        EltUnused = computeLifetime();
-      EltKnown = computeKnown();
+      EltKnown = computeKnown(true, false);
        EltWritten = computeWritten();
      }
      DeLICMAnalyzed++;
diff --git a/polly/lib/Transform/ForwardOpTree.cpp b/polly/lib/Transform/ForwardOpTree.cpp

index 507478f..1691730 100644 (file)
--- a/polly/lib/Transform/ForwardOpTree.cpp
+++ b/polly/lib/Transform/ForwardOpTree.cpp
@@ -13,11 +13,16 @@
  
  #include "polly/ForwardOpTree.h"
  
+#include "polly/Options.h"
+#include "polly/RegisterPasses.h"
  #include "polly/ScopBuilder.h"
  #include "polly/ScopInfo.h"
  #include "polly/ScopPass.h"
  #include "polly/Support/GICHelper.h"
+#include "polly/Support/ISLOStream.h"
+#include "polly/Support/ISLTools.h"
  #include "polly/Support/VirtualInstruction.h"
+#include "polly/ZoneAlgo.h"
  #include "llvm/Analysis/ValueTracking.h"
  
  #define DEBUG_TYPE "polly-delicm"
@@ -25,7 +30,25 @@
  using namespace polly;
  using namespace llvm;
  
+static cl::opt<bool>
+    AnalyzeKnown("polly-optree-analyze-known",
+                 cl::desc("Analyze array contents for load forwarding"),
+                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
+
+static cl::opt<unsigned long>
+    MaxOps("polly-optree-max-ops",
+           cl::desc("Maximum number of ISL operations to invest for known "
+                    "analysis; 0=no limit"),
+           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
+
+STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
+STATISTIC(KnownOutOfQuota,
+          "Analyses aborted because max_operations was reached");
+STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis");
+
  STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
+STATISTIC(TotalKnownLoadsForwarded,
+          "Number of forwarded loads because their value was known");
  STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
  STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
  STATISTIC(TotalModifiedStmts,
@@ -81,17 +104,14 @@ enum ForwardingDecision {
  /// the MemoryAccess is removed and the all the operand tree instructions are
  /// moved into the statement. All original instructions are left in the source
  /// statements. The simplification pass can clean these up.
-class ForwardOpTreeImpl {
+class ForwardOpTreeImpl : ZoneAlgorithm {
  private:
-  /// The SCoP we are currently processing.
-  Scop *S;
-
-  /// LoopInfo is required for VirtualUse.
-  LoopInfo *LI;
-
    /// How many instructions have been copied to other statements.
    int NumInstructionsCopied = 0;
  
+  /// Number of loads forwarded because their value was known.
+  int NumKnownLoadsForwarded = 0;
+
    /// How many read-only accesses have been copied.
    int NumReadOnlyCopied = 0;
  
@@ -104,10 +124,150 @@ private:
    /// Whether we carried out at least one change to the SCoP.
    bool Modified = false;
  
+  /// Contains the zones where array elements are known to contain a specific
+  /// value.
+  /// { [Element[] -> Zone[]] -> ValInst[] }
+  /// @see computeKnown()
+  isl::union_map Known;
+
+  /// Translator for newly introduced ValInsts to already existing ValInsts such
+  /// that new introduced load instructions can reuse the Known analysis of its
+  /// original load. { ValInst[] -> ValInst[] }
+  isl::union_map Translator;
+
+  /// Get list of array elements that do contain the same ValInst[] at Domain[].
+  ///
+  /// @param ValInst { Domain[] -> ValInst[] }
+  ///                The values for which we search for alternative locations,
+  ///                per statement instance.
+  ///
+  /// @return { Domain[] -> Element[] }
+  ///         For each statement instance, the array elements that contain the
+  ///         same ValInst.
+  isl::union_map findSameContentElements(isl::union_map ValInst) {
+    assert(ValInst.is_single_valued().is_true());
+
+    // { Domain[] }
+    isl::union_set Domain = ValInst.domain();
+
+    // { Domain[] -> Scatter[] }
+    isl::union_map Schedule = getScatterFor(Domain);
+
+    // { Element[] -> [Scatter[] -> ValInst[]] }
+    isl::union_map MustKnownCurried =
+        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
+
+    // { [Domain[] -> ValInst[]] -> Scatter[] }
+    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
+
+    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
+    isl::union_map SchedValDomVal =
+        DomValSched.range_product(ValInst.range_map()).reverse();
+
+    // { Element[] -> [Domain[] -> ValInst[]] }
+    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
+
+    // { Domain[] -> Element[] }
+    isl::union_map MustKnownMap =
+        MustKnownInst.uncurry().domain().unwrap().reverse();
+    simplify(MustKnownMap);
+
+    return MustKnownMap;
+  }
+
+  /// Find a single array element for each statement instance, within a single
+  /// array.
+  ///
+  /// @param MustKnown { Domain[] -> Element[] }
+  ///                  Set of candidate array elements.
+  /// @param Domain    { Domain[] }
+  ///                  The statement instance for which we need elements for.
+  ///
+  /// @return { Domain[] -> Element[] }
+  ///         For each statement instance, an array element out of @p MustKnown.
+  ///         All array elements must be in the same array (Polly does not yet
+  ///         support reading from different accesses using the same
+  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
+  ///         null.
+  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
+    // { Domain[] -> Element[] }
+    isl::map Result;
+
+    // MemoryAccesses can read only elements from a single array
+    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
+    // Look through all spaces until we find one that contains at least the
+    // wanted statement instance.s
+    MustKnown.foreach_map([&, this](isl::map Map) -> isl::stat {
+      // Get the array this is accessing.
+      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
+      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
+
+      // No support for generation of indirect array accesses.
+      if (SAI->getBasePtrOriginSAI())
+        return isl::stat::ok; // continue
+
+      // Determine whether this map contains all wanted values.
+      isl::set MapDom = Map.domain();
+      if (!Domain.is_subset(MapDom).is_true())
+        return isl::stat::ok; // continue
+
+      // There might be multiple array elements that contain the same value, but
+      // choose only one of them. lexmin is used because it returns a one-value
+      // mapping, we do not care about which one.
+      // TODO: Get the simplest access function.
+      Result = Map.lexmin();
+      return isl::stat::error; // break
+    });
+
+    return Result;
+  }
+
+public:
+  /// Compute the zones of known array element contents.
+  ///
+  /// @return True if the computed #Known is usable.
+  bool computeKnownValues() {
+    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
+
+    // Check that nothing strange occurs.
+    if (!isCompatibleScop()) {
+      KnownIncompatible++;
+      return false;
+    }
+
+    isl_ctx_reset_error(IslCtx.get());
+    {
+      IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps);
+
+      computeCommon();
+      Known = computeKnown(true, true);
+      simplify(Known);
+
+      // Preexisting ValInsts use the known content analysis of themselves.
+      Translator = makeIdentityMap(Known.range(), false);
+    }
+
+    if (!Known || !Translator) {
+      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
+      KnownOutOfQuota++;
+      Known = nullptr;
+      Translator = nullptr;
+      DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
+      return false;
+    }
+
+    KnownAnalyzed++;
+    DEBUG(dbgs() << "All known: " << Known << "\n");
+
+    return true;
+  }
+
    void printStatistics(raw_ostream &OS, int Indent = 0) {
      OS.indent(Indent) << "Statistics {\n";
      OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
                            << '\n';
+    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
+                          << '\n';
      OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
                            << '\n';
      OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
@@ -130,17 +290,233 @@ private:
      OS.indent(Indent) << "}\n";
    }
  
+  /// Create a new MemoryAccess of type read and MemoryKind::Array.
+  ///
+  /// @param Stmt           The statement in which the access occurs.
+  /// @param LI             The instruction that does the access.
+  /// @param AccessRelation The array element that each statement instance
+  ///                       accesses.
+  ///
+  /// @param The newly created access.
+  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
+                                    isl::map AccessRelation) {
+    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
+    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
+
+    // Create a dummy SCEV access, to be replaced anyway.
+    SmallVector<const SCEV *, 4> Sizes;
+    Sizes.reserve(SAI->getNumberOfDimensions());
+    SmallVector<const SCEV *, 4> Subscripts;
+    Subscripts.reserve(SAI->getNumberOfDimensions());
+    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
+      Sizes.push_back(SAI->getDimensionSize(i));
+      Subscripts.push_back(nullptr);
+    }
+
+    MemoryAccess *Access =
+        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
+                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
+    S->addAccessFunction(Access);
+    Stmt->addAccess(Access, true);
+
+    Access->setNewAccessRelation(AccessRelation);
+
+    return Access;
+  }
+
+  /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a
+  /// use in every instance of @p UseStmt.
+  ///
+  /// @param UseStmt Statement a scalar is used in.
+  /// @param DefStmt Statement a scalar is defined in.
+  ///
+  /// @return { DomainUse[] -> DomainDef[] }
+  isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) {
+    // { DomainUse[] -> Scatter[] }
+    isl::map UseScatter = getScatterFor(UseStmt);
+
+    // { Zone[] -> DomainDef[] }
+    isl::map ReachDefZone = getScalarReachingDefinition(DefStmt);
+
+    // { Scatter[] -> DomainDef[] }
+    isl::map ReachDefTimepoints =
+        convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true);
+
+    // { DomainUse[] -> DomainDef[] }
+    return UseScatter.apply_range(ReachDefTimepoints);
+  }
+
+  /// Forward a load by reading from an array element that contains the same
+  /// value. Typically the location it was loaded from.
+  ///
+  /// @param TargetStmt  The statement the operand tree will be copied to.
+  /// @param Inst        The (possibly speculatable) instruction to forward.
+  /// @param UseStmt     The statement that uses @p Inst.
+  /// @param UseLoop     The loop @p Inst is used in.
+  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
+  ///                    A mapping from the statement instance @p Inst is used
+  ///                    to the statement instance it is forwarded to.
+  /// @param DefStmt     The statement @p Inst is defined in.
+  /// @param DefLoop     The loop which contains @p Inst.
+  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
+  ///                    A mapping from the statement instance @p Inst is
+  ///                    defined to the statement instance it is forwarded to.
+  /// @param DoIt        If false, only determine whether an operand tree can be
+  ///                    forwarded. If true, carry out the forwarding. Do not
+  ///                    use DoIt==true if an operand tree is not known to be
+  ///                    forwardable.
+  ///
+  /// @return FD_NotApplicable  if @p Inst is not a LoadInst.
+  ///         FD_CannotForward  if no array element to load from was found.
+  ///         FD_CanForwardLeaf if the load is already in the target statement
+  ///                           instance.
+  ///         FD_CanForwardTree if @p Inst is forwardable.
+  ///         FD_DidForward     if @p DoIt was true.
+  ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
+                                      ScopStmt *UseStmt, Loop *UseLoop,
+                                      isl::map UseToTarget, ScopStmt *DefStmt,
+                                      Loop *DefLoop, isl::map DefToTarget,
+                                      bool DoIt) {
+    // Cannot do anything without successful known analysis.
+    if (Known.is_null())
+      return FD_NotApplicable;
+
+    LoadInst *LI = dyn_cast<LoadInst>(Inst);
+    if (!LI)
+      return FD_NotApplicable;
+
+    // If the load is already in the statement, not forwarding is necessary.
+    // However, it might happen that the LoadInst is already present in the
+    // statement's instruction list. In that case we do as follows:
+    // - For the evaluation (DoIt==false), we can trivially forward it as it is
+    //   benefit of forwarding an already present instruction.
+    // - For the execution (DoIt==false), prepend the instruction (to make it
+    //   available to all instructions following in the instruction list), but
+    //   do not add another MemoryAccess.
+    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
+    if (Access && !DoIt)
+      return FD_CanForwardLeaf;
+
+    if (DoIt)
+      TargetStmt->prependInstruction(LI);
+
+    ForwardingDecision OpDecision =
+        forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop,
+                    DefToTarget, DoIt);
+    switch (OpDecision) {
+    case FD_CannotForward:
+      assert(!DoIt);
+      return OpDecision;
+
+    case FD_CanForwardLeaf:
+    case FD_CanForwardTree:
+      assert(!DoIt);
+      break;
+
+    case FD_DidForward:
+      assert(DoIt);
+      break;
+
+    default:
+      llvm_unreachable("Shouldn't return this");
+    }
+
+    // { DomainDef[] -> ValInst[] }
+    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
+
+    // { DomainTarget[] -> ValInst[] }
+    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
+    isl::union_map TranslatedExpectedVal =
+        isl::union_map(TargetExpectedVal).apply_range(Translator);
+
+    // { DomainTarget[] -> Element[] }
+    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
+
+    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
+    if (!SameVal)
+      return FD_CannotForward;
+
+    if (!DoIt)
+      return FD_CanForwardTree;
+
+    if (Access) {
+      DEBUG(dbgs() << "    forwarded known load with preexisting MemoryAccess"
+                   << Access << "\n");
+    } else {
+      Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
+      DEBUG(dbgs() << "    forwarded known load with new MemoryAccess" << Access
+                   << "\n");
+
+      // { ValInst[] }
+      isl::space ValInstSpace = ExpectedVal.get_space().range();
+
+      // After adding a new load to the SCoP, also update the Known content
+      // about it. The new load will have a known ValInst of
+      // { [DomainTarget[] -> Value[]] }
+      // but which -- because it is a copy of it -- has same value as the
+      // { [DomainDef[] -> Value[]] }
+      // that it replicates. Instead of  cloning the known content of
+      // [DomainDef[] -> Value[]]
+      // for DomainTarget[], we add a 'translator' that maps
+      // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
+      // before comparing to the known content.
+      // TODO: 'Translator' could also be used to map PHINodes to their incoming
+      // ValInsts.
+      if (ValInstSpace.is_wrapping()) {
+        // { DefDomain[] -> Value[] }
+        isl::map ValInsts = ExpectedVal.range().unwrap();
+
+        // { DefDomain[] }
+        isl::set DefDomain = ValInsts.domain();
+
+        // { Value[] }
+        isl::space ValSpace = ValInstSpace.unwrap().range();
+
+        // { Value[] -> Value[] }
+        isl::map ValToVal =
+            isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
+
+        // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
+        isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
+
+        Translator = Translator.add_map(LocalTranslator);
+        DEBUG(dbgs() << "      local translator is " << LocalTranslator
+                     << "\n");
+      }
+    }
+    DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
+                 << "\n");
+    DEBUG(dbgs() << "      candidate elements where " << Candidates << "\n");
+    assert(Access);
+
+    NumKnownLoadsForwarded++;
+    TotalKnownLoadsForwarded++;
+    return FD_DidForward;
+  }
+
    /// Forwards a speculatively executable instruction.
    ///
-  /// If the instruction itself cannot be executed speculatively, returns
-  /// FD_NotApplicable.
+  /// @param TargetStmt  The statement the operand tree will be copied to.
+  /// @param UseInst     The (possibly speculatable) instruction to forward.
+  /// @param DefStmt     The statement @p UseInst is defined in.
+  /// @param DefLoop     The loop which contains @p UseInst.
+  /// @param DefToTarget { DomainDef[] -> DomainTarget[] }
+  ///                    A mapping from the statement instance @p UseInst is
+  ///                    defined to the statement instance it is forwarded to.
+  /// @param DoIt        If false, only determine whether an operand tree can be
+  ///                    forwarded. If true, carry out the forwarding. Do not
+  ///                    use DoIt==true if an operand tree is not known to be
+  ///                    forwardable.
    ///
-  /// The parameters the same as for
-  /// @see forwardTree()
+  /// @return FD_NotApplicable  if @p UseInst is not speculatable.
+  ///         FD_CannotForward  if one of @p UseInst's operands is not
+  ///                           forwardable.
+  ///         FD_CanForwardTree if @p UseInst is forwardable.
+  ///         FD_DidForward     if @p DoIt was true.
    ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
                                           Instruction *UseInst,
-                                         ScopStmt *UseStmt, Loop *UseLoop,
-                                         bool DoIt) {
+                                         ScopStmt *DefStmt, Loop *DefLoop,
+                                         isl::map DefToTarget, bool DoIt) {
      // PHIs, unless synthesizable, are not yet supported.
      if (isa<PHINode>(UseInst))
        return FD_NotApplicable;
@@ -160,10 +536,6 @@ private:
      if (mayBeMemoryDependent(*UseInst))
        return FD_NotApplicable;
  
-    Loop *DefLoop = LI->getLoopFor(UseInst->getParent());
-    ScopStmt *DefStmt = S->getStmtFor(UseInst);
-    assert(DefStmt && "Value must be defined somewhere");
-
      if (DoIt) {
        // To ensure the right order, prepend this instruction before its
        // operands. This ensures that its operands are inserted before the
@@ -177,7 +549,7 @@ private:
  
      for (Value *OpVal : UseInst->operand_values()) {
        ForwardingDecision OpDecision =
-          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
+          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt);
        switch (OpDecision) {
        case FD_CannotForward:
          assert(!DoIt);
@@ -205,20 +577,30 @@ private:
    /// Determines whether an operand tree can be forwarded or carries out a
    /// forwarding, depending on the @p DoIt flag.
    ///
-  /// @param TargetStmt The statement the operand tree will be copied to.
-  /// @param UseVal     The value (usually an instruction) which is root of an
-  ///                   operand tree.
-  /// @param UseStmt    The statement that uses @p UseVal.
-  /// @param UseLoop    The loop @p UseVal is used in.
-  /// @param DoIt       If false, only determine whether an operand tree can be
-  ///                   forwarded. If true, carry out the forwarding. Do not use
-  ///                   DoIt==true if an operand tree is not known to be
-  ///                   forwardable.
+  /// @param TargetStmt  The statement the operand tree will be copied to.
+  /// @param UseVal      The value (usually an instruction) which is root of an
+  ///                    operand tree.
+  /// @param UseStmt     The statement that uses @p UseVal.
+  /// @param UseLoop     The loop @p UseVal is used in.
+  /// @param UseToTarget { DomainUse[] -> DomainTarget[] }
+  ///                    A mapping from the statement instance @p UseVal is used
+  ///                    to the statement instance it is forwarded to.
+  /// @param DoIt        If false, only determine whether an operand tree can be
+  ///                    forwarded. If true, carry out the forwarding. Do not
+  ///                    use DoIt==true if an operand tree is not known to be
+  ///                    forwardable.
    ///
    /// @return If DoIt==false, return whether the operand tree can be forwarded.
    ///         If DoIt==true, return FD_DidForward.
-  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
-                                 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
+  ForwardingDecision forwardTree(ScopStmt *TargetStmt, llvm::Value *UseVal,
+                                 ScopStmt *UseStmt, llvm::Loop *UseLoop,
+                                 isl::map UseToTarget, bool DoIt) {
+    ScopStmt *DefStmt = nullptr;
+    Loop *DefLoop = nullptr;
+
+    // { DefDomain[] -> TargetDomain[] }
+    isl::map DefToTarget;
+
      VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
      switch (VUse.getKind()) {
      case VirtualUse::Constant:
@@ -275,14 +657,41 @@ private:
        return FD_DidForward;
  
      case VirtualUse::Intra:
+      // Knowing that UseStmt and DefStmt are the same statement instance, just
+      // reuse the information about UseStmt for DefStmt
+      DefStmt = UseStmt;
+      DefToTarget = UseToTarget;
+
+      LLVM_FALLTHROUGH;
      case VirtualUse::Inter:
-      auto Inst = cast<Instruction>(UseVal);
+      Instruction *Inst = cast<Instruction>(UseVal);
+
+      if (!DefStmt)
+        DefStmt = S->getStmtFor(Inst);
+      assert(DefStmt && "Value must be defined somewhere");
+      DefLoop = LI->getLoopFor(Inst->getParent());
+
+      if (DefToTarget.is_null() && !Known.is_null()) {
+        // { UseDomain[] -> DefDomain[] }
+        isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt);
+
+        // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to
+        // { DefDomain[] -> TargetDomain[] }
+        DefToTarget = UseToTarget.apply_domain(UseToDef);
+        simplify(DefToTarget);
+      }
  
-      ForwardingDecision SpeculativeResult =
-          forwardSpeculatable(TargetStmt, Inst, UseStmt, UseLoop, DoIt);
+      ForwardingDecision SpeculativeResult = forwardSpeculatable(
+          TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt);
        if (SpeculativeResult != FD_NotApplicable)
          return SpeculativeResult;
  
+      ForwardingDecision KnownResult =
+          forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget,
+                           DefStmt, DefLoop, DefToTarget, DoIt);
+      if (KnownResult != FD_NotApplicable)
+        return KnownResult;
+
        // When no method is found to forward the operand tree, we effectively
        // cannot handle it.
        DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
@@ -300,14 +709,21 @@ private:
      ScopStmt *Stmt = RA->getStatement();
      Loop *InLoop = Stmt->getSurroundingLoop();
  
-    ForwardingDecision Assessment =
-        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
+    isl::map TargetToUse;
+    if (!Known.is_null()) {
+      isl::space DomSpace = Stmt->getDomainSpace();
+      TargetToUse =
+          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
+    }
+
+    ForwardingDecision Assessment = forwardTree(
+        Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false);
      assert(Assessment != FD_DidForward);
      if (Assessment != FD_CanForwardTree)
        return false;
  
-    ForwardingDecision Execution =
-        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
+    ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt,
+                                               InLoop, TargetToUse, true);
      assert(Execution == FD_DidForward &&
             "A previous positive assessment must also be executable");
      (void)Execution;
@@ -317,7 +733,8 @@ private:
    }
  
  public:
-  ForwardOpTreeImpl(Scop *S, LoopInfo *LI) : S(S), LI(LI) {}
+  ForwardOpTreeImpl(Scop *S, LoopInfo *LI)
+      : ZoneAlgorithm("polly-optree", S, LI) {}
  
    /// Return which SCoP this instance is processing.
    Scop *getScop() const { return S; }
@@ -413,6 +830,11 @@ public:
      LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
      Impl = make_unique<ForwardOpTreeImpl>(&S, &LI);
  
+    if (AnalyzeKnown) {
+      DEBUG(dbgs() << "Prepare forwarders...\n");
+      Impl->computeKnownValues();
+    }
+
      DEBUG(dbgs() << "Forwarding operand trees...\n");
      Impl->forwardOperandTrees();
  
diff --git a/polly/lib/Transform/ZoneAlgo.cpp b/polly/lib/Transform/ZoneAlgo.cpp

index 63ac92f..c6e2771 100644 (file)
--- a/polly/lib/Transform/ZoneAlgo.cpp
+++ b/polly/lib/Transform/ZoneAlgo.cpp
@@ -237,6 +237,30 @@ static isl::map makeUnknownForDomain(isl::set Domain) {
    return give(isl_map_from_domain(Domain.take()));
  }
  
+/// Return whether @p Map maps to an unknown value.
+///
+/// @param { [] -> ValInst[] }
+static bool isMapToUnknown(const isl::map &Map) {
+  isl::space Space = Map.get_space().range();
+  return Space.has_tuple_id(isl::dim::set).is_false() &&
+         Space.is_wrapping().is_false() && Space.dim(isl::dim::set) == 0;
+}
+
+/// Return only the mappings that map to known values.
+///
+/// @param UMap { [] -> ValInst[] }
+///
+/// @return { [] -> ValInst[] }
+static isl::union_map filterKnownValInst(const isl::union_map &UMap) {
+  isl::union_map Result = isl::union_map::empty(UMap.get_space());
+  UMap.foreach_map([=, &Result](isl::map Map) -> isl::stat {
+    if (!isMapToUnknown(Map))
+      Result = Result.add_map(Map);
+    return isl::stat::ok;
+  });
+  return Result;
+}
+
  static std::string printInstruction(Instruction *Instr,
                                      bool IsForDebug = false) {
    std::string Result;
@@ -330,10 +354,28 @@ bool ZoneAlgorithm::isCompatibleStmt(ScopStmt *Stmt) {
  void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) {
    assert(MA->isLatestArrayKind());
    assert(MA->isRead());
+  ScopStmt *Stmt = MA->getStatement();
  
    // { DomainRead[] -> Element[] }
    auto AccRel = getAccessRelationFor(MA);
    AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy()));
+
+  if (LoadInst *Load = dyn_cast_or_null<LoadInst>(MA->getAccessInstruction())) {
+    // { DomainRead[] -> ValInst[] }
+    isl::map LoadValInst = makeValInst(
+        Load, Stmt, LI->getLoopFor(Load->getParent()), Stmt->isBlockStmt());
+
+    // { DomainRead[] -> [Element[] -> DomainRead[]] }
+    isl::map IncludeElement =
+        give(isl_map_curry(isl_map_domain_map(AccRel.take())));
+
+    // { [Element[] -> DomainRead[]] -> ValInst[] }
+    isl::map EltLoadValInst =
+        give(isl_map_apply_domain(LoadValInst.take(), IncludeElement.take()));
+
+    AllReadValInst = give(
+        isl_union_map_add_map(AllReadValInst.take(), EltLoadValInst.take()));
+  }
  }
  
  void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
@@ -578,6 +620,7 @@ void ZoneAlgorithm::computeCommon() {
    AllMayWrites = makeEmptyUnionMap();
    AllMustWrites = makeEmptyUnionMap();
    AllWriteValInst = makeEmptyUnionMap();
+  AllReadValInst = makeEmptyUnionMap();
  
    for (auto &Stmt : *S) {
      for (auto *MA : Stmt) {
@@ -593,7 +636,7 @@ void ZoneAlgorithm::computeCommon() {
    }
  
    // { DomainWrite[] -> Element[] }
-  auto AllWrites =
+  AllWrites =
        give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy()));
  
    // { [Element[] -> Zone[]] -> DomainWrite[] }
@@ -611,3 +654,76 @@ void ZoneAlgorithm::printAccesses(llvm::raw_ostream &OS, int Indent) const {
    }
    OS.indent(Indent) << "}\n";
  }
+
+isl::union_map ZoneAlgorithm::computeKnownFromMustWrites() const {
+  // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] }
+  isl::union_map EltReachdDef = distributeDomain(WriteReachDefZone.curry());
+
+  // { [Element[] -> DomainWrite[]] -> ValInst[] }
+  isl::union_map AllKnownWriteValInst = filterKnownValInst(AllWriteValInst);
+
+  // { [Element[] -> Zone[]] -> ValInst[] }
+  return EltReachdDef.apply_range(AllKnownWriteValInst);
+}
+
+isl::union_map ZoneAlgorithm::computeKnownFromLoad() const {
+  // { Element[] }
+  isl::union_set AllAccessedElts = AllReads.range().unite(AllWrites.range());
+
+  // { Element[] -> Scatter[] }
+  isl::union_map EltZoneUniverse = isl::union_map::from_domain_and_range(
+      AllAccessedElts, isl::set::universe(ScatterSpace));
+
+  // This assumes there are no "holes" in
+  // isl_union_map_domain(WriteReachDefZone); alternatively, compute the zone
+  // before the first write or that are not written at all.
+  // { Element[] -> Scatter[] }
+  isl::union_set NonReachDef =
+      EltZoneUniverse.wrap().subtract(WriteReachDefZone.domain());
+
+  // { [Element[] -> Zone[]] -> ReachDefId[] }
+  isl::union_map DefZone =
+      WriteReachDefZone.unite(isl::union_map::from_domain(NonReachDef));
+
+  // { [Element[] -> Scatter[]] -> Element[] }
+  isl::union_map EltZoneElt = EltZoneUniverse.domain_map();
+
+  // { [Element[] -> Zone[]] -> [Element[] -> ReachDefId[]] }
+  isl::union_map DefZoneEltDefId = EltZoneElt.range_product(DefZone);
+
+  // { Element[] -> [Zone[] -> ReachDefId[]] }
+  isl::union_map EltDefZone = DefZone.curry();
+
+  // { [Element[] -> Zone[] -> [Element[] -> ReachDefId[]] }
+  isl::union_map EltZoneEltDefid = distributeDomain(EltDefZone);
+
+  // { [Element[] -> Scatter[]] -> DomainRead[] }
+  isl::union_map Reads = AllReads.range_product(Schedule).reverse();
+
+  // { [Element[] -> Scatter[]] -> [Element[] -> DomainRead[]] }
+  isl::union_map ReadsElt = EltZoneElt.range_product(Reads);
+
+  // { [Element[] -> Scatter[]] -> ValInst[] }
+  isl::union_map ScatterKnown = ReadsElt.apply_range(AllReadValInst);
+
+  // { [Element[] -> ReachDefId[]] -> ValInst[] }
+  isl::union_map DefidKnown =
+      DefZoneEltDefId.apply_domain(ScatterKnown).reverse();
+
+  // { [Element[] -> Zone[]] -> ValInst[] }
+  return DefZoneEltDefId.apply_range(DefidKnown);
+}
+
+isl::union_map ZoneAlgorithm::computeKnown(bool FromWrite,
+                                           bool FromRead) const {
+  isl::union_map Result = makeEmptyUnionMap();
+
+  if (FromWrite)
+    Result = Result.unite(computeKnownFromMustWrites());
+
+  if (FromRead)
+    Result = Result.unite(computeKnownFromLoad());
+
+  simplify(Result);
+  return Result;
+}
diff --git a/polly/test/ForwardOpTree/forward_load.ll b/polly/test/ForwardOpTree/forward_load.ll

new file mode 100644 (file)

index 0000000..176ea97
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_load.ll
@@ -0,0 +1,51 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Rematerialize a load.
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      %val = load double, double* %B_idx
+      br label %bodyB
+
+    bodyB:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      store double %val, double* %A_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Known loads forwarded: 1
+; CHECK:     Operand trees forwarded: 1
+; CHECK:     Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK:      Stmt_bodyB
+; CHECK-NEXT:         ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             null;
+; CHECK-NEXT:        new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:               %val = load double, double* %B_idx
+; CHECK-NEXT:               store double %val, double* %A_idx
+; CHECK-NEXT:         }
diff --git a/polly/test/ForwardOpTree/forward_load_differentarray.ll b/polly/test/ForwardOpTree/forward_load_differentarray.ll

new file mode 100644 (file)

index 0000000..7580ddb
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_load_differentarray.ll
@@ -0,0 +1,78 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      %val = load double, double* %B_idx
+      br label %bodyB
+
+    bodyB:
+      store double 0.0, double* %B_idx
+      %C_idx = getelementptr inbounds double, double* %C, i32 %j
+      store double %val, double* %C_idx
+      br label %bodyC
+
+    bodyC:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      store double %val, double* %A_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Known loads forwarded: 2
+; CHECK:     Operand trees forwarded: 2
+; CHECK:     Statements with forwarded operand trees: 2
+; CHECK: }
+
+; CHECK-NEXT: After statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyB
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 null;
+; CHECK-NEXT:            new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:                   store double 0.000000e+00, double* %B_idx
+; CHECK-NEXT:                   store double %val, double* %C_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyC
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 null;
+; CHECK-NEXT:            new: [n] -> { Stmt_bodyC[i0] -> MemRef_C[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:                   store double %val, double* %A_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/forward_load_fromloop.ll b/polly/test/ForwardOpTree/forward_load_fromloop.ll

new file mode 100644 (file)

index 0000000..ef24924
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_load_fromloop.ll
@@ -0,0 +1,63 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp sle i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %i = phi i32 [0, %for], [%i.inc, %bodyA]
+      %B_idx = getelementptr inbounds double, double* %B, i32 %i
+      %val = load double, double* %B_idx
+      %i.inc = add nuw nsw i32 %i, 1
+      %i.cmp = icmp slt i32 %i, %n
+      br i1 %i.cmp, label %bodyA, label %bodyB
+
+    bodyB:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      store double %val, double* %A_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Known loads forwarded: 1
+; CHECK:     Operand trees forwarded: 1
+; CHECK:     Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK:      After statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0, i1] -> MemRef_B[i1] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0, i1] -> MemRef_val[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:                   %i.cmp = icmp slt i32 %i, %n
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyB
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 null;
+; CHECK-NEXT:            new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[n] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:                   store double %val, double* %A_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/forward_load_indirect.ll b/polly/test/ForwardOpTree/forward_load_indirect.ll

new file mode 100644 (file)

index 0000000..58ef667
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_load_indirect.ll
@@ -0,0 +1,63 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      %val = load double, double* %B_idx
+      %add = fadd double %val, 42.0
+      br label %bodyB
+
+    bodyB:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      store double %add, double* %A_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Instructions copied: 1
+; CHECK:     Known loads forwarded: 1
+; CHECK:     Operand trees forwarded: 1
+; CHECK:     Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK:      After statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_add[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:                   %add = fadd double %val, 4.200000e+01
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyB
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 null;
+; CHECK-NEXT:            new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val = load double, double* %B_idx
+; CHECK-NEXT:                   %add = fadd double %val, 4.200000e+01
+; CHECK-NEXT:                   store double %add, double* %A_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/forward_load_tripleuse.ll b/polly/test/ForwardOpTree/forward_load_tripleuse.ll

new file mode 100644 (file)

index 0000000..2d4f789
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_load_tripleuse.ll
@@ -0,0 +1,145 @@
+; RUN: opt %loadPolly -polly-optree -polly-codegen -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func1(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      %val1 = load double, double* %A_idx
+      %val2 = fadd double %val1, %val1
+      br label %bodyB
+
+    bodyB:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      store double %val1, double* %B_idx
+      %C_idx = getelementptr inbounds double, double* %C, i32 %j
+      store double %val2, double* %C_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Instructions copied: 1
+; CHECK:     Known loads forwarded: 3
+; CHECK:     Operand trees forwarded: 2
+; CHECK:     Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK:      After statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val1[] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val2[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyB
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 null;
+; CHECK-NEXT:            new: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   store double %val1, double* %B_idx
+; CHECK-NEXT:                   store double %val2, double* %C_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT: }
+
+
+define void @func2(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      %val1 = load double, double* %A_idx
+      %val2 = fadd double %val1, %val1
+      br label %bodyB
+
+    bodyB:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      store double %val2, double* %B_idx
+      %C_idx = getelementptr inbounds double, double* %C, i32 %j
+      store double %val1, double* %C_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK:     Instructions copied: 1
+; CHECK:     Known loads forwarded: 2
+; CHECK:     Operand trees forwarded: 1
+; CHECK:     Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK:      After statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val2[] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyA[i0] -> MemRef_val1[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
+; CHECK-NEXT:             }
+; CHECK-NEXT:     Stmt_bodyB
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 null;
+; CHECK-NEXT:            new: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
+; CHECK-NEXT:             MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] };
+; CHECK-NEXT:             ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:                 [n] -> { Stmt_bodyB[i0] -> MemRef_val1[] };
+; CHECK-NEXT:             Instructions {
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   %val1 = load double, double* %A_idx
+; CHECK-NEXT:                   %val2 = fadd double %val1, %val1
+; CHECK-NEXT:                   store double %val2, double* %B_idx
+; CHECK-NEXT:                   store double %val1, double* %C_idx
+; CHECK-NEXT:             }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/noforward_load_conditional.ll b/polly/test/ForwardOpTree/noforward_load_conditional.ll

new file mode 100644 (file)

index 0000000..a4b31d1
--- /dev/null
+++ b/polly/test/ForwardOpTree/noforward_load_conditional.ll
@@ -0,0 +1,39 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      %val = load double, double* %B_idx
+      %cond = icmp slt i32 %j, 1
+      br i1 %cond, label %bodyA_true, label %bodyB
+
+    bodyA_true:
+      store double 0.0, double* %B_idx
+      br label %bodyB
+
+    bodyB:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      store double %val, double* %A_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: ForwardOpTree executed, but did not modify anything
diff --git a/polly/test/ForwardOpTree/noforward_load_writebetween.ll b/polly/test/ForwardOpTree/noforward_load_writebetween.ll

new file mode 100644 (file)

index 0000000..f4fc701
--- /dev/null
+++ b/polly/test/ForwardOpTree/noforward_load_writebetween.ll
@@ -0,0 +1,41 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Cannot rematerialize %val from B[0] at bodyC because B[0] has been
+; overwritten in bodyB.
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %B_idx = getelementptr inbounds double, double* %B, i32 %j
+      %val = load double, double* %B_idx
+      br label %bodyB
+
+    bodyB:
+      store double 0.0, double* %B_idx
+      br label %bodyC
+
+    bodyC:
+      %A_idx = getelementptr inbounds double, double* %A, i32 %j
+      store double %val, double* %A_idx
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK: ForwardOpTree executed, but did not modify anything
author	Michael Kruse <llvm@meinersbur.de>
	Mon, 7 Aug 2017 18:40:29 +0000 (18:40 +0000)
committer	Michael Kruse <llvm@meinersbur.de>
	Mon, 7 Aug 2017 18:40:29 +0000 (18:40 +0000)
polly/include/polly/ScopInfo.h		patch \| blob \| history
polly/include/polly/Support/ISLTools.h		patch \| blob \| history
polly/include/polly/ZoneAlgo.h		patch \| blob \| history
polly/lib/Analysis/ScopInfo.cpp		patch \| blob \| history
polly/lib/Support/ISLTools.cpp		patch \| blob \| history
polly/lib/Transform/DeLICM.cpp		patch \| blob \| history
polly/lib/Transform/ForwardOpTree.cpp		patch \| blob \| history
polly/lib/Transform/ZoneAlgo.cpp		patch \| blob \| history
polly/test/ForwardOpTree/forward_load.ll	[new file with mode: 0644]	patch \| blob
polly/test/ForwardOpTree/forward_load_differentarray.ll	[new file with mode: 0644]	patch \| blob
polly/test/ForwardOpTree/forward_load_fromloop.ll	[new file with mode: 0644]	patch \| blob
polly/test/ForwardOpTree/forward_load_indirect.ll	[new file with mode: 0644]	patch \| blob
polly/test/ForwardOpTree/forward_load_tripleuse.ll	[new file with mode: 0644]	patch \| blob
polly/test/ForwardOpTree/noforward_load_conditional.ll	[new file with mode: 0644]	patch \| blob
polly/test/ForwardOpTree/noforward_load_writebetween.ll	[new file with mode: 0644]	patch \| blob