From a560f2c646d7a762a3cf0a74ce55fc9876c1d974 Mon Sep 17 00:00:00 2001 From: Andy Davis Date: Fri, 24 May 2019 10:54:22 -0700 Subject: [PATCH] Affine Loop Fusion Utility Module (1/n). *) Adds LoopFusionUtils which will expose a set of loop fusion utilities (e.g. dependence checks, fusion cost/storage reduction, loop fusion transformation) for use by loop fusion algorithms. Support for checking block-level fusion-preventing dependences is added in this CL (additional loop fusion utilities will be added in subsequent CLs). *) Adds TestLoopFusion test pass for testing LoopFusionUtils at a fine granularity. *) Adds unit test for testing dependence check for block-level fusion-preventing dependences. -- PiperOrigin-RevId: 249861071 --- mlir/include/mlir/Transforms/LoopFusionUtils.h | 59 ++++ mlir/include/mlir/Transforms/Passes.h | 3 + mlir/lib/Transforms/LoopFusion.cpp | 8 + mlir/lib/Transforms/TestLoopFusion.cpp | 112 +++++++ mlir/lib/Transforms/Utils/LoopFusionUtils.cpp | 202 ++++++++++++ .../Transforms/loop-fusion-dependence-check.mlir | 337 +++++++++++++++++++++ 6 files changed, 721 insertions(+) create mode 100644 mlir/include/mlir/Transforms/LoopFusionUtils.h create mode 100644 mlir/lib/Transforms/TestLoopFusion.cpp create mode 100644 mlir/lib/Transforms/Utils/LoopFusionUtils.cpp create mode 100644 mlir/test/Transforms/loop-fusion-dependence-check.mlir diff --git a/mlir/include/mlir/Transforms/LoopFusionUtils.h b/mlir/include/mlir/Transforms/LoopFusionUtils.h new file mode 100644 index 0000000..ccda669 --- /dev/null +++ b/mlir/include/mlir/Transforms/LoopFusionUtils.h @@ -0,0 +1,59 @@ +//===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This header file defines prototypes for various loop fusion utility +// methods: these are not passes by themselves but are used either by passes, +// optimization sequences, or in turn by other transformation utilities. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H +#define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H + +namespace mlir { +class AffineForOp; +struct ComputationSliceState; + +// TODO(andydavis) Extend this module to include utility functions for querying +// fusion cost/storage reduction, and for performing the loop fusion +// transformation. + +struct FusionResult { + enum ResultEnum { + Success, + FailPrecondition, // Failed precondition for fusion. (e.g. same block). + FailBlockDependence, // Fusion would violate another dependence in block. + FailFusionDependence, // Fusion would reverse dependences between loops. + FailComputationSlice, // Unable to compute src loop computation slice. + } value; + FusionResult(ResultEnum v) : value(v) {} +}; + +/// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the +/// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult +/// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are +/// in the same block and dependences would not be violated). Otherwise +/// returns a FusionResult explaining why fusion is not feasible. +/// NOTE: This function is not feature complete and should only be used in +/// testing. +/// TODO(andydavis) Update comments when this function is fully implemented. +FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, + unsigned dstLoopDepth, + ComputationSliceState *srcSlice); +} // end namespace mlir + +#endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index dc5d57f..48822cd 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -116,6 +116,9 @@ FunctionPassBase *createMemRefDataFlowOptPass(); /// Creates a pass to strip debug information from a function. FunctionPassBase *createStripDebugInfoPass(); +/// Creates a pass which tests loop fusion utilities. +FunctionPassBase *createTestLoopFusionPass(); + } // end namespace mlir #endif // MLIR_TRANSFORMS_PASSES_H diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index 7999e0a..1f475f1 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -29,6 +29,7 @@ #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" #include "mlir/StandardOps/Ops.h" +#include "mlir/Transforms/LoopFusionUtils.h" #include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" #include "mlir/Transforms/Utils.h" @@ -1810,6 +1811,13 @@ public: dstLoadOpInsts, dstStoreOpInsts, &sliceState, &bestDstLoopDepth, maximalFusion)) continue; + // TODO(andydavis) Remove assert and surrounding code when + // canFuseLoops is fully functional. + FusionResult result = mlir::canFuseLoops( + cast(srcNode->op), cast(dstNode->op), + bestDstLoopDepth, /*srcSlice=*/nullptr); + assert(result.value == FusionResult::Success); + (void)result; // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'. auto sliceLoopNest = mlir::insertBackwardComputationSlice( diff --git a/mlir/lib/Transforms/TestLoopFusion.cpp b/mlir/lib/Transforms/TestLoopFusion.cpp new file mode 100644 index 0000000..9ace2fb --- /dev/null +++ b/mlir/lib/Transforms/TestLoopFusion.cpp @@ -0,0 +1,112 @@ +//===- TestLoopFusion.cpp - Test loop fusion ------------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements a pass to test various loop fusion utility functions. +// +//===----------------------------------------------------------------------===// + +#include "mlir/AffineOps/AffineOps.h" +#include "mlir/Analysis/AffineAnalysis.h" +#include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/Passes.h" +#include "mlir/Analysis/Utils.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "mlir/StandardOps/Ops.h" +#include "mlir/Transforms/LoopFusionUtils.h" +#include "mlir/Transforms/Passes.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "test-loop-fusion" + +using namespace mlir; + +static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); + +static llvm::cl::opt clTestDependenceCheck( + "test-loop-fusion-dependence-check", + llvm::cl::desc("Enable testing of loop fusion dependence check"), + llvm::cl::cat(clOptionsCategory)); + +namespace { + +struct TestLoopFusion : public FunctionPass { + void runOnFunction() override; +}; + +} // end anonymous namespace + +FunctionPassBase *mlir::createTestLoopFusionPass() { + return new TestLoopFusion; +} + +// Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'. +static void +gatherLoops(Block *block, unsigned currLoopDepth, + DenseMap> &depthToLoops) { + auto &loopsAtDepth = depthToLoops[currLoopDepth]; + for (auto &op : *block) { + if (auto forOp = dyn_cast(op)) { + loopsAtDepth.push_back(forOp); + gatherLoops(forOp.getBody(), currLoopDepth + 1, depthToLoops); + } + } +} + +// Run fusion dependence check on 'loops[i]' and 'loops[j]' at 'loopDepth'. +// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists. +static void testDependenceCheck(SmallVector &loops, unsigned i, + unsigned j, unsigned loopDepth) { + AffineForOp srcForOp = loops[i]; + AffineForOp dstForOp = loops[j]; + FusionResult result = mlir::canFuseLoops(srcForOp, dstForOp, loopDepth, + /*srcSlice=*/nullptr); + if (result.value == FusionResult::FailBlockDependence) { + srcForOp.getOperation()->emitRemark("block-level dependence preventing" + " fusion of loop nest ") + << i << " into loop nest " << j << " at depth " << loopDepth; + } +} + +void TestLoopFusion::runOnFunction() { + // Gather all AffineForOps by loop depth. + DenseMap> depthToLoops; + for (auto &block : getFunction()) { + gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops); + } + + // Run tests on all combinations of src/dst loop nests in 'depthToLoops'. + for (auto &depthAndLoops : depthToLoops) { + unsigned loopDepth = depthAndLoops.first; + auto &loops = depthAndLoops.second; + unsigned numLoops = loops.size(); + for (unsigned j = 0; j < numLoops; ++j) { + for (unsigned k = 0; k < numLoops; ++k) { + if (j == k) + continue; + if (clTestDependenceCheck) + testDependenceCheck(loops, j, k, loopDepth); + } + } + } +} + +static PassRegistration + pass("test-loop-fusion", "Tests loop fusion utility functions."); diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp new file mode 100644 index 0000000..9de6766 --- /dev/null +++ b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp @@ -0,0 +1,202 @@ +//===- LoopFusionUtils.cpp ---- Utilities for loop fusion ----------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements loop fusion transformation utility functions. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/LoopFusionUtils.h" + +#include "mlir/AffineOps/AffineOps.h" +#include "mlir/Analysis/AffineAnalysis.h" +#include "mlir/Analysis/AffineStructures.h" +#include "mlir/Analysis/Utils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Operation.h" +#include "mlir/StandardOps/Ops.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "loop-fusion-utils" + +using namespace mlir; + +// Gathers all load and store operations in 'opA' into 'values', where +// 'values[memref] == true' for each store operation. +static void getLoadsAndStores(Operation *opA, DenseMap &values) { + opA->walk([&](Operation *op) { + if (auto loadOp = dyn_cast(op)) { + if (values.count(loadOp.getMemRef()) == 0) + values[loadOp.getMemRef()] = false; + } else if (auto storeOp = dyn_cast(op)) { + values[storeOp.getMemRef()] = true; + } + }); +} + +// Returns true if 'op' is a load or store operation which access an memref +// accessed 'values' and at least one of the access is a store operation. +// Returns false otherwise. +static bool isDependentLoadOrStoreOp(Operation *op, + DenseMap &values) { + if (auto loadOp = dyn_cast(op)) { + return values.count(loadOp.getMemRef()) > 0 && + values[loadOp.getMemRef()] == true; + } else if (auto storeOp = dyn_cast(op)) { + return values.count(storeOp.getMemRef()) > 0; + } + return false; +} + +// Returns the first operation in range ('opA', 'opB') which has a data +// dependence on 'opA'. Returns 'nullptr' of no dependence exists. +static Operation *getFirstDependentOpInRange(Operation *opA, Operation *opB) { + // Record memref values from all loads/store in loop nest rooted at 'opA'. + // Map from memref value to bool which is true if store, false otherwise. + DenseMap values; + getLoadsAndStores(opA, values); + + // For each 'opX' in block in range ('opA', 'opB'), check if there is a data + // dependence from 'opA' to 'opX' ('opA' and 'opX' access the same memref + // and at least one of the accesses is a store). + Operation *firstDepOp = nullptr; + for (Block::iterator it = std::next(Block::iterator(opA)); + it != Block::iterator(opB); ++it) { + Operation *opX = &(*it); + opX->walk([&](Operation *op) { + if (!firstDepOp && isDependentLoadOrStoreOp(op, values)) + firstDepOp = opX; + }); + if (firstDepOp) + break; + } + return firstDepOp; +} + +// Returns the last operation 'opX' in range ('opA', 'opB'), for which there +// exists a data dependence from 'opX' to 'opB'. +// Returns 'nullptr' of no dependence exists. +static Operation *getLastDependentOpInRange(Operation *opA, Operation *opB) { + // Record memref values from all loads/store in loop nest rooted at 'opB'. + // Map from memref value to bool which is true if store, false otherwise. + DenseMap values; + getLoadsAndStores(opB, values); + + // For each 'opX' in block in range ('opA', 'opB') in reverse order, + // check if there is a data dependence from 'opX' to 'opB': + // *) 'opX' and 'opB' access the same memref and at least one of the accesses + // is a store. + // *) 'opX' produces an SSA Value which is used by 'opB'. + Operation *lastDepOp = nullptr; + for (Block::reverse_iterator it = std::next(Block::reverse_iterator(opB)); + it != Block::reverse_iterator(opA); ++it) { + Operation *opX = &(*it); + opX->walk([&](Operation *op) { + if (lastDepOp) + return; + if (isa(op) || isa(op)) { + if (isDependentLoadOrStoreOp(op, values)) + lastDepOp = opX; + return; + } + for (auto *value : op->getResults()) { + for (auto *user : value->getUsers()) { + SmallVector loops; + // Check if any loop in loop nest surrounding 'user' is 'opB'. + getLoopIVs(*user, &loops); + if (llvm::is_contained(loops, cast(opB))) { + lastDepOp = opX; + } + } + } + }); + if (lastDepOp) + break; + } + return lastDepOp; +} + +// Computes and returns an insertion point operation, before which the +// the fused loop nest can be inserted while preserving +// dependences. Returns nullptr if no such insertion point is found. +static Operation *getFusedLoopNestInsertionPoint(AffineForOp srcForOp, + AffineForOp dstForOp) { + bool isSrcForOpBeforeDstForOp = + srcForOp.getOperation()->isBeforeInBlock(dstForOp.getOperation()); + auto forOpA = isSrcForOpBeforeDstForOp ? srcForOp : dstForOp; + auto forOpB = isSrcForOpBeforeDstForOp ? dstForOp : srcForOp; + + auto *firstDepOpA = + getFirstDependentOpInRange(forOpA.getOperation(), forOpB.getOperation()); + auto *lastDepOpB = + getLastDependentOpInRange(forOpA.getOperation(), forOpB.getOperation()); + // Block: + // ... + // |-- opA + // | ... + // | lastDepOpB --| + // | ... | + // |-> firstDepOpA | + // ... | + // opB <--------- + // + // Valid insertion point range: (lastDepOpB, firstDepOpA) + // + if (firstDepOpA != nullptr) { + if (lastDepOpB != nullptr) { + if (firstDepOpA->isBeforeInBlock(lastDepOpB) || firstDepOpA == lastDepOpB) + // No valid insertion point exists which preserves dependences. + return nullptr; + } + // Return insertion point in valid range closest to 'opB'. + // TODO(andydavis) Consider other insertion points in valid range. + return firstDepOpA; + } + // No dependences from 'opA' to operation in range ('opA', 'opB'), return + // 'opB' insertion point. + return forOpB.getOperation(); +} + +// TODO(andydavis) Add support for the following features in subsequent CLs: +// *) Computing union of slices computed between src/dst loads and stores. +// *) Compute dependences of unfused src/dst loops. +// *) Compute dependences of src/dst loop as if they were fused. +// *) Check for fusion preventing dependences (e.g. a dependence which changes +// from loop-independent to backward loop-carried after fusion). +FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, + unsigned dstLoopDepth, + ComputationSliceState *srcSlice) { + // Return 'false' if 'srcForOp' and 'dstForOp' are not in the same block. + auto *block = srcForOp.getOperation()->getBlock(); + if (block != dstForOp.getOperation()->getBlock()) { + LLVM_DEBUG(llvm::dbgs() << "Cannot fuse loop nests in different blocks\n."); + return FusionResult::FailPrecondition; + } + + // Return 'false' if no valid insertion point for fused loop nest in 'block' + // exists which would preserve dependences. + if (!getFusedLoopNestInsertionPoint(srcForOp, dstForOp)) { + LLVM_DEBUG(llvm::dbgs() << "Fusion would violate dependences in block\n."); + return FusionResult::FailBlockDependence; + } + return FusionResult::Success; +} diff --git a/mlir/test/Transforms/loop-fusion-dependence-check.mlir b/mlir/test/Transforms/loop-fusion-dependence-check.mlir new file mode 100644 index 0000000..3174f89 --- /dev/null +++ b/mlir/test/Transforms/loop-fusion-dependence-check.mlir @@ -0,0 +1,337 @@ +// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-dependence-check -split-input-file -verify | FileCheck %s + +// ----- + +// CHECK-LABEL: func @cannot_fuse_would_create_cycle() { +func @cannot_fuse_would_create_cycle() { + %a = alloc() : memref<10xf32> + %b = alloc() : memref<10xf32> + %c = alloc() : memref<10xf32> + + %cf7 = constant 7.0 : f32 + + // Set up the following dependences: + // 1) loop0 -> loop1 on memref '%a' + // 2) loop0 -> loop2 on memref '%b' + // 3) loop1 -> loop2 on memref '%c' + + // Fusing loop nest '%i0' and loop nest '%i2' would create a cycle. + affine.for %i0 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}} + %v0 = load %a[%i0] : memref<10xf32> + store %cf7, %b[%i0] : memref<10xf32> + } + affine.for %i1 = 0 to 10 { + store %cf7, %a[%i1] : memref<10xf32> + %v1 = load %c[%i1] : memref<10xf32> + } + affine.for %i2 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}} + %v2 = load %b[%i2] : memref<10xf32> + store %cf7, %c[%i2] : memref<10xf32> + } + return +} + +// ----- + +// CHECK-LABEL: func @can_fuse_rar_dependence() { +func @can_fuse_rar_dependence() { + %a = alloc() : memref<10xf32> + %b = alloc() : memref<10xf32> + %c = alloc() : memref<10xf32> + + %cf7 = constant 7.0 : f32 + + // Set up the following dependences: + // Make dependence from 0 to 1 on '%a' read-after-read. + // 1) loop0 -> loop1 on memref '%a' + // 2) loop0 -> loop2 on memref '%b' + // 3) loop1 -> loop2 on memref '%c' + + // Should fuse: no fusion preventing remarks should be emitted for this test. + affine.for %i0 = 0 to 10 { + %v0 = load %a[%i0] : memref<10xf32> + store %cf7, %b[%i0] : memref<10xf32> + } + affine.for %i1 = 0 to 10 { + %v1 = load %a[%i1] : memref<10xf32> + %v2 = load %c[%i1] : memref<10xf32> + } + affine.for %i2 = 0 to 10 { + %v3 = load %b[%i2] : memref<10xf32> + store %cf7, %c[%i2] : memref<10xf32> + } + return +} + +// ----- + +// CHECK-LABEL: func @can_fuse_different_memrefs() { +func @can_fuse_different_memrefs() { + %a = alloc() : memref<10xf32> + %b = alloc() : memref<10xf32> + %c = alloc() : memref<10xf32> + %d = alloc() : memref<10xf32> + + %cf7 = constant 7.0 : f32 + + // Set up the following dependences: + // Make dependence from 0 to 1 on unrelated memref '%d'. + // 1) loop0 -> loop1 on memref '%a' + // 2) loop0 -> loop2 on memref '%b' + // 3) loop1 -> loop2 on memref '%c' + + // Should fuse: no fusion preventing remarks should be emitted for this test. + affine.for %i0 = 0 to 10 { + %v0 = load %a[%i0] : memref<10xf32> + store %cf7, %b[%i0] : memref<10xf32> + } + affine.for %i1 = 0 to 10 { + store %cf7, %d[%i1] : memref<10xf32> + %v1 = load %c[%i1] : memref<10xf32> + } + affine.for %i2 = 0 to 10 { + %v2 = load %b[%i2] : memref<10xf32> + store %cf7, %c[%i2] : memref<10xf32> + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_intermediate_store() { +func @should_not_fuse_across_intermediate_store() { + %0 = alloc() : memref<10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}} + %v0 = load %0[%i0] : memref<10xf32> + "op0"(%v0) : (f32) -> () + } + + // Should not fuse loop nests '%i0' and '%i1' across top-level store. + store %cf7, %0[%c0] : memref<10xf32> + + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}} + %v1 = load %0[%i1] : memref<10xf32> + "op1"(%v1) : (f32) -> () + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_intermediate_load() { +func @should_not_fuse_across_intermediate_load() { + %0 = alloc() : memref<10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}} + store %cf7, %0[%i0] : memref<10xf32> + } + + // Should not fuse loop nests '%i0' and '%i1' across top-level load. + %v0 = load %0[%c0] : memref<10xf32> + "op0"(%v0) : (f32) -> () + + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}} + store %cf7, %0[%i1] : memref<10xf32> + } + + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() { +func @should_not_fuse_across_ssa_value_def() { + %0 = alloc() : memref<10xf32> + %1 = alloc() : memref<10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}} + %v0 = load %0[%i0] : memref<10xf32> + store %v0, %1[%i0] : memref<10xf32> + } + + // Loop nest '%i0" cannot be fused past load from '%1' due to RAW dependence. + %v1 = load %1[%c0] : memref<10xf32> + "op0"(%v1) : (f32) -> () + + // Loop nest '%i1' cannot be fused past SSA value def '%c2' which it uses. + %c2 = constant 2 : index + + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}} + store %cf7, %0[%c2] : memref<10xf32> + } + + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_store_before_load() { +func @should_not_fuse_store_before_load() { + %0 = alloc() : memref<10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}} + store %cf7, %0[%i0] : memref<10xf32> + %v0 = load %0[%i0] : memref<10xf32> + } + + affine.for %i1 = 0 to 10 { + %v1 = load %0[%i1] : memref<10xf32> + } + + affine.for %i2 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}} + store %cf7, %0[%i2] : memref<10xf32> + %v2 = load %0[%i2] : memref<10xf32> + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() { +func @should_not_fuse_across_load_at_depth1() { + %0 = alloc() : memref<10x10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}} + store %cf7, %0[%i0, %i1] : memref<10x10xf32> + } + + %v1 = load %0[%i0, %c0] : memref<10x10xf32> + + affine.for %i3 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}} + store %cf7, %0[%i0, %i3] : memref<10x10xf32> + } + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() { +func @should_not_fuse_across_load_in_loop_at_depth1() { + %0 = alloc() : memref<10x10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}} + store %cf7, %0[%i0, %i1] : memref<10x10xf32> + } + + affine.for %i2 = 0 to 10 { + %v1 = load %0[%i0, %i2] : memref<10x10xf32> + } + + affine.for %i3 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}} + store %cf7, %0[%i0, %i3] : memref<10x10xf32> + } + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() { +func @should_not_fuse_across_store_at_depth1() { + %0 = alloc() : memref<10x10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}} + %v0 = load %0[%i0, %i1] : memref<10x10xf32> + } + + store %cf7, %0[%i0, %c0] : memref<10x10xf32> + + affine.for %i3 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}} + %v1 = load %0[%i0, %i3] : memref<10x10xf32> + } + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() { +func @should_not_fuse_across_store_in_loop_at_depth1() { + %0 = alloc() : memref<10x10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}} + %v0 = load %0[%i0, %i1] : memref<10x10xf32> + } + + affine.for %i2 = 0 to 10 { + store %cf7, %0[%i0, %i2] : memref<10x10xf32> + } + + affine.for %i3 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}} + %v1 = load %0[%i0, %i3] : memref<10x10xf32> + } + } + return +} + +// ----- + +// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() { +func @should_not_fuse_across_ssa_value_def_at_depth1() { + %0 = alloc() : memref<10x10xf32> + %1 = alloc() : memref<10x10xf32> + %c0 = constant 0 : index + %cf7 = constant 7.0 : f32 + + affine.for %i0 = 0 to 10 { + affine.for %i1 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}} + %v0 = load %0[%i0, %i1] : memref<10x10xf32> + store %v0, %1[%i0, %i1] : memref<10x10xf32> + } + + // RAW dependence from store in loop nest '%i1' to 'load %1' prevents + // fusion loop nest '%i1' into loops after load. + %v1 = load %1[%i0, %c0] : memref<10x10xf32> + "op0"(%v1) : (f32) -> () + + // Loop nest '%i2' cannot be fused past SSA value def '%c2' which it uses. + %c2 = constant 2 : index + + affine.for %i2 = 0 to 10 { + // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}} + store %cf7, %0[%i0, %c2] : memref<10x10xf32> + } + } + return +} \ No newline at end of file -- 2.7.4