if (srcAccess.memref != dstAccess.memref)
continue;
// Check if 'loopDepth' exceeds nesting depth of src/dst ops.
- if ((!isBackwardSlice && loopDepth > getNestingDepth(*opsA[i])) ||
- (isBackwardSlice && loopDepth > getNestingDepth(*opsB[j]))) {
+ if ((!isBackwardSlice && loopDepth > getNestingDepth(opsA[i])) ||
+ (isBackwardSlice && loopDepth > getNestingDepth(opsB[j]))) {
LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n.");
return failure();
}
/// Returns the nesting depth of this statement, i.e., the number of loops
/// surrounding this statement.
-unsigned mlir::getNestingDepth(Operation &op) {
- Operation *currOp = &op;
+unsigned mlir::getNestingDepth(Operation *op) {
+ Operation *currOp = op;
unsigned depth = 0;
while ((currOp = currOp->getParentOp())) {
if (isa<AffineForOp>(currOp))
auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
if (failed(
region->compute(opInst,
- /*loopDepth=*/getNestingDepth(*block.begin())))) {
+ /*loopDepth=*/getNestingDepth(&*block.begin())))) {
return opInst->emitError("error obtaining memory region\n");
}
return false;
// Dep check depth would be number of enclosing loops + 1.
- unsigned depth = getNestingDepth(*forOp.getOperation()) + 1;
+ unsigned depth = getNestingDepth(forOp) + 1;
// Check dependences between all pairs of ops in 'loadAndStoreOpInsts'.
for (auto *srcOpInst : loadAndStoreOpInsts) {
// This is a straightforward implementation not optimized for speed. Optimize
// if needed.
void MemRefDataFlowOpt::forwardStoreToLoad(AffineLoadOp loadOp) {
- Operation *loadOpInst = loadOp.getOperation();
-
- // First pass over the use list to get minimum number of surrounding
+ // First pass over the use list to get the minimum number of surrounding
// loops common between the load op and the store op, with min taken across
// all store ops.
SmallVector<Operation *, 8> storeOps;
- unsigned minSurroundingLoops = getNestingDepth(*loadOpInst);
+ unsigned minSurroundingLoops = getNestingDepth(loadOp);
for (auto *user : loadOp.getMemRef().getUsers()) {
auto storeOp = dyn_cast<AffineStoreOp>(user);
if (!storeOp)
continue;
- auto *storeOpInst = storeOp.getOperation();
- unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
+ unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
- storeOps.push_back(storeOpInst);
+ storeOps.push_back(storeOp);
}
// The list of store op candidates for forwarding that satisfy conditions
// post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
SmallVector<Operation *, 8> depSrcStores;
- for (auto *storeOpInst : storeOps) {
- MemRefAccess srcAccess(storeOpInst);
- MemRefAccess destAccess(loadOpInst);
+ for (auto *storeOp : storeOps) {
+ MemRefAccess srcAccess(storeOp);
+ MemRefAccess destAccess(loadOp);
// Find stores that may be reaching the load.
FlatAffineConstraints dependenceConstraints;
- unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
+ unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
unsigned d;
// Dependences at loop depth <= minSurroundingLoops do NOT matter.
for (d = nsLoops + 1; d > minSurroundingLoops; d--) {
continue;
// Stores that *may* be reaching the load.
- depSrcStores.push_back(storeOpInst);
+ depSrcStores.push_back(storeOp);
// 1. Check if the store and the load have mathematically equivalent
// affine access functions; this implies that they statically refer to the
continue;
// 2. The store has to dominate the load op to be candidate.
- if (!domInfo->dominates(storeOpInst, loadOpInst))
+ if (!domInfo->dominates(storeOp, loadOp))
continue;
// We now have a candidate for forwarding.
- fwdingCandidates.push_back(storeOpInst);
+ fwdingCandidates.push_back(storeOp);
}
// 3. Of all the store op's that meet the above criteria, the store that
// Note: this can be implemented in a cleaner way with postdominator tree
// traversals. Consider this for the future if needed.
Operation *lastWriteStoreOp = nullptr;
- for (auto *storeOpInst : fwdingCandidates) {
+ for (auto *storeOp : fwdingCandidates) {
if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
- return postDomInfo->postDominates(storeOpInst, depStore);
+ return postDomInfo->postDominates(storeOp, depStore);
})) {
- lastWriteStoreOp = storeOpInst;
+ lastWriteStoreOp = storeOp;
break;
}
}
// Record the memref for a later sweep to optimize away.
memrefsToErase.insert(loadOp.getMemRef());
// Record this to erase later.
- loadOpsToErase.push_back(loadOpInst);
+ loadOpsToErase.push_back(loadOp);
}
void MemRefDataFlowOpt::runOnFunction() {
loadOpsToErase.clear();
memrefsToErase.clear();
- // Walk all load's and perform load/store forwarding.
+ // Walk all load's and perform store to load forwarding.
f.walk([&](AffineLoadOp loadOp) { forwardStoreToLoad(loadOp); });
// Erase all load op's whose results were replaced with store fwd'ed ones.
- for (auto *loadOp : loadOpsToErase) {
+ for (auto *loadOp : loadOpsToErase)
loadOp->erase();
- }
// Check if the store fwd'ed memrefs are now left with only stores and can
// thus be completely deleted. Note: the canonicalize pass should be able
// to do this as well, but we'll do it here since we collected these anyway.
for (auto memref : memrefsToErase) {
// If the memref hasn't been alloc'ed in this function, skip.
- Operation *defInst = memref.getDefiningOp();
- if (!defInst || !isa<AllocOp>(defInst))
+ Operation *defOp = memref.getDefiningOp();
+ if (!defOp || !isa<AllocOp>(defOp))
// TODO(mlir-team): if the memref was returned by a 'call' operation, we
// could still erase it if the call had no side-effects.
continue;
- if (llvm::any_of(memref.getUsers(), [&](Operation *ownerInst) {
- return (!isa<AffineStoreOp>(ownerInst) && !isa<DeallocOp>(ownerInst));
+ if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
+ return (!isa<AffineStoreOp>(ownerOp) && !isa<DeallocOp>(ownerOp));
}))
continue;
// Erase all stores, the dealloc, and the alloc on the memref.
for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
user->erase();
- defInst->erase();
+ defOp->erase();
}
}
//
//===----------------------------------------------------------------------===//
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopFusionUtils.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
#define DEBUG_TYPE "test-loop-fusion"
using namespace mlir;
std::string result;
llvm::raw_string_ostream os(result);
// Slice insertion point format [loop-depth, operation-block-index]
- unsigned ipd = getNestingDepth(*sliceUnion.insertPoint);
+ unsigned ipd = getNestingDepth(&*sliceUnion.insertPoint);
unsigned ipb = getBlockIndex(*sliceUnion.insertPoint);
os << "insert point: (" << std::to_string(ipd) << ", " << std::to_string(ipb)
<< ")";