};
// DependenceComponent contains state about the direction of a dependence as an
-// interval [lb, ub].
+// interval [lb, ub] for an AffineForOp.
// Distance vectors components are represented by the interval [lb, ub] with
// lb == ub.
// Direction vectors components are represented by the interval [lb, ub] with
// lb < ub. Note that ub/lb == None means unbounded.
struct DependenceComponent {
+ // The AffineForOp Operation associated with this dependence component.
+ Operation *op;
// The lower bound of the dependence distance.
llvm::Optional<int64_t> lb;
// The upper bound of the dependence distance (inclusive).
unsigned loopDepth, FlatAffineConstraints *dependenceConstraints,
llvm::SmallVector<DependenceComponent, 2> *dependenceComponents,
bool allowRAR = false);
+
+/// Returns in 'depCompsVec', dependence components for dependences between all
+/// load and store ops in loop nest rooted at 'forOp', at loop depths in range
+/// [1, maxLoopDepth].
+void getDependenceComponents(
+ AffineForOp forOp, unsigned maxLoopDepth,
+ std::vector<llvm::SmallVector<DependenceComponent, 2>> *depCompsVec);
+
} // end namespace mlir
#endif // MLIR_ANALYSIS_AFFINE_ANALYSIS_H
}
// Returns the number of outer loop common to 'src/dstDomain'.
-static unsigned getNumCommonLoops(const FlatAffineConstraints &srcDomain,
- const FlatAffineConstraints &dstDomain) {
+// Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
+static unsigned
+getNumCommonLoops(const FlatAffineConstraints &srcDomain,
+ const FlatAffineConstraints &dstDomain,
+ SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
// Find the number of common loops shared by src and dst accesses.
unsigned minNumLoops =
std::min(srcDomain.getNumDimIds(), dstDomain.getNumDimIds());
!isForInductionVar(dstDomain.getIdValue(i)) ||
srcDomain.getIdValue(i) != dstDomain.getIdValue(i))
break;
+ if (commonLoops != nullptr)
+ commonLoops->push_back(getForInductionVarOwner(srcDomain.getIdValue(i)));
++numCommonLoops;
}
+ if (commonLoops != nullptr)
+ assert(commonLoops->size() == numCommonLoops);
return numCommonLoops;
}
FlatAffineConstraints *dependenceDomain,
llvm::SmallVector<DependenceComponent, 2> *dependenceComponents) {
// Find the number of common loops shared by src and dst accesses.
- unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
+ SmallVector<AffineForOp, 4> commonLoops;
+ unsigned numCommonLoops =
+ getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
if (numCommonLoops == 0)
return;
// Compute direction vectors for requested loop depth.
// on eliminated constraint system.
dependenceComponents->resize(numCommonLoops);
for (unsigned j = 0; j < numCommonLoops; ++j) {
+ (*dependenceComponents)[j].op = commonLoops[j].getOperation();
auto lbConst = dependenceDomain->getConstantLowerBound(j);
(*dependenceComponents)[j].lb =
lbConst.getValueOr(std::numeric_limits<int64_t>::min());
LLVM_DEBUG(dependenceConstraints->dump());
return true;
}
+
+/// Gathers dependence components for dependences between all ops in loop nest
+/// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
+void mlir::getDependenceComponents(
+ AffineForOp forOp, unsigned maxLoopDepth,
+ std::vector<llvm::SmallVector<DependenceComponent, 2>> *depCompsVec) {
+ // Collect all load and store ops in loop nest rooted at 'forOp'.
+ SmallVector<Operation *, 8> loadAndStoreOpInsts;
+ forOp.getOperation()->walk([&](Operation *opInst) {
+ if (opInst->isa<LoadOp>() || opInst->isa<StoreOp>())
+ loadAndStoreOpInsts.push_back(opInst);
+ });
+
+ unsigned numOps = loadAndStoreOpInsts.size();
+ for (unsigned d = 1; d <= maxLoopDepth; ++d) {
+ for (unsigned i = 0; i < numOps; ++i) {
+ auto *srcOpInst = loadAndStoreOpInsts[i];
+ MemRefAccess srcAccess(srcOpInst);
+ for (unsigned j = 0; j < numOps; ++j) {
+ auto *dstOpInst = loadAndStoreOpInsts[j];
+ MemRefAccess dstAccess(dstOpInst);
+
+ FlatAffineConstraints dependenceConstraints;
+ llvm::SmallVector<DependenceComponent, 2> depComps;
+ // TODO(andydavis,bondhugula) Explore whether it would be profitable
+ // to pre-compute and store deps instead of repeatedly checking.
+ if (checkMemrefAccessDependence(srcAccess, dstAccess, d,
+ &dependenceConstraints, &depComps)) {
+ depCompsVec->push_back(depComps);
+ }
+ }
+ }
+ }
+}
}
// Compute loop interchange permutation:
-// *) Computes dependence components between all op pairs in 'ops' for loop
-// depths in range [1, 'maxLoopDepth'].
+// *) Computes dependence components between all op pairs of ops in loop nest
+// rooted at 'loops[0]', for loop depths in range [1, 'maxLoopDepth'].
// *) Classifies the outermost 'maxLoopDepth' loops surrounding 'ops' as either
// parallel or sequential.
// *) Computes the loop permutation which sinks sequential loops deeper into
// dependence componenent lexicographically negative.
// TODO(andydavis) Move this function to LoopUtils.
static bool
-computeLoopInterchangePermutation(ArrayRef<Operation *> ops,
- unsigned maxLoopDepth,
+computeLoopInterchangePermutation(ArrayRef<AffineForOp> loops,
SmallVectorImpl<unsigned> *loopPermMap) {
- // Gather dependence components for dependences between all ops in 'ops'
- // at loop depths in range [1, maxLoopDepth].
- // TODO(andydavis) Refactor this loop into a LoopUtil utility function:
- // mlir::getDependenceComponents().
- // TODO(andydavis) Split this loop into two: first check all dependences,
- // and construct dep vectors. Then, scan through them to detect the parallel
- // ones.
+ assert(loops.size() > 1);
+ // Gather dependence components for dependences between all ops in loop nest
+ // rooted at 'loops[0]', at loop depths in range [1, maxLoopDepth].
+ unsigned maxLoopDepth = loops.size();
std::vector<llvm::SmallVector<DependenceComponent, 2>> depCompsVec;
+ getDependenceComponents(loops[0], maxLoopDepth, &depCompsVec);
+ // Mark loops as either parallel or sequential.
llvm::SmallVector<bool, 8> isParallelLoop(maxLoopDepth, true);
- unsigned numOps = ops.size();
- for (unsigned d = 1; d <= maxLoopDepth; ++d) {
- for (unsigned i = 0; i < numOps; ++i) {
- auto *srcOpInst = ops[i];
- MemRefAccess srcAccess(srcOpInst);
- for (unsigned j = 0; j < numOps; ++j) {
- auto *dstOpInst = ops[j];
- MemRefAccess dstAccess(dstOpInst);
-
- FlatAffineConstraints dependenceConstraints;
- llvm::SmallVector<DependenceComponent, 2> depComps;
- // TODO(andydavis,bondhugula) Explore whether it would be profitable
- // to pre-compute and store deps instead of repeatedly checking.
- if (checkMemrefAccessDependence(srcAccess, dstAccess, d,
- &dependenceConstraints, &depComps)) {
- isParallelLoop[d - 1] = false;
- depCompsVec.push_back(depComps);
- }
- }
+ for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) {
+ llvm::SmallVector<DependenceComponent, 2> &depComps = depCompsVec[i];
+ assert(depComps.size() >= maxLoopDepth);
+ for (unsigned j = 0; j < maxLoopDepth; ++j) {
+ DependenceComponent &depComp = depComps[j];
+ assert(depComp.lb.hasValue() && depComp.ub.hasValue());
+ if (depComp.lb.getValue() != 0 || depComp.ub.getValue() != 0)
+ isParallelLoop[j] = false;
}
}
if (loops.size() < 2)
return;
- // Merge loads and stores into the same array.
- SmallVector<Operation *, 2> memOps(node->loads.begin(), node->loads.end());
- memOps.append(node->stores.begin(), node->stores.end());
-
// Compute loop permutation in 'loopPermMap'.
llvm::SmallVector<unsigned, 4> loopPermMap;
- if (!computeLoopInterchangePermutation(memOps, loops.size(), &loopPermMap))
+ if (!computeLoopInterchangePermutation(loops, &loopPermMap))
return;
int loopNestRootIndex = -1;