From 1d541bd92044432e98c58e2d291c9776516b42f9 Mon Sep 17 00:00:00 2001 From: Kai Sasaki Date: Sun, 4 Dec 2022 20:09:01 +0900 Subject: [PATCH] [mlir][affine] Support affine.parallel in the index set analysis Support affine.parallel in the index set analysis. It allows us to do dependence analysis containing affine.parallel in addition to affine.for and affine.if. This change only supports the constant lower/upper bound in affine.parallel. Other complicated affine map bounds will be supported in further commits. See https://github.com/llvm/llvm-project/issues/57327 Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D136056 --- .../Dialect/Affine/Analysis/AffineStructures.h | 8 +++ mlir/include/mlir/Dialect/Affine/IR/AffineOps.h | 5 ++ .../lib/Dialect/Affine/Analysis/AffineAnalysis.cpp | 37 ++++++++++---- .../Dialect/Affine/Analysis/AffineStructures.cpp | 27 ++++++++++ mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 13 +++++ mlir/test/Dialect/Affine/scalrep.mlir | 58 ++++++++++++++++++++++ mlir/test/Transforms/memref-dependence-check.mlir | 16 ++++++ .../lib/Analysis/TestMemRefDependenceCheck.cpp | 21 ++++---- 8 files changed, 165 insertions(+), 20 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h index 29c059f..d6cd025 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -24,6 +24,7 @@ namespace mlir { class AffineCondition; class AffineForOp; class AffineIfOp; +class AffineParallelOp; class AffineMap; class AffineValueMap; class IntegerSet; @@ -141,6 +142,13 @@ public: // TODO: add support for non-unit strides. LogicalResult addAffineForOpDomain(AffineForOp forOp); + /// Add constraints (lower and upper bounds) for the specified + /// 'affine.parallel' operation's Value using IR information stored in its + /// bound maps. Returns failure for the yet unimplemented/unsupported cases. + /// Asserts if the Value corresponding to the 'affine.parallel' operation + /// isn't found in the constraint system. + LogicalResult addAffineParallelOpDomain(AffineParallelOp parallelOp); + /// Adds constraints (lower and upper bounds) for each loop in the loop nest /// described by the bound maps `lbMaps` and `ubMaps` of a computation slice. /// Every pair (`lbMaps[i]`, `ubMaps[i]`) describes the bounds of a loop in diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h index 686a51a..e08b206 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -450,6 +450,11 @@ AffineForOp getForInductionVarOwner(Value val); void extractForInductionVars(ArrayRef forInsts, SmallVectorImpl *ivs); +/// Extracts the induction variables from a list of either AffineForOp or +/// AffineParallelOp and places them in the output argument `ivs`. +void extractInductionVars(ArrayRef affineOps, + SmallVectorImpl &ivs); + /// Builds a perfect nest of affine.for loops, i.e., each loop except the /// innermost one contains only another loop and a terminator. The loops iterate /// from "lbs" to "ubs" with "steps". The body of the innermost loop is diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp index 0fddcd7..82ffbbc 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -240,27 +240,36 @@ void mlir::getReachableAffineApplyOps( LogicalResult mlir::getIndexSet(MutableArrayRef ops, FlatAffineValueConstraints *domain) { SmallVector indices; - SmallVector forOps; + SmallVector loopOps; + size_t numDims = 0; for (Operation *op : ops) { - if (!isa(op)) { - // TODO: Support affine.parallel ops. - LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if ops"); + if (!isa(op)) { + LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/" + "parallel ops"); return failure(); } - if (AffineForOp forOp = dyn_cast(op)) - forOps.push_back(forOp); + if (AffineForOp forOp = dyn_cast(op)) { + loopOps.push_back(forOp); + // An AffineForOp retains only 1 induction variable. + numDims += 1; + } else if (AffineParallelOp parallelOp = dyn_cast(op)) { + loopOps.push_back(parallelOp); + numDims += parallelOp.getNumDims(); + } } - extractForInductionVars(forOps, &indices); - // Reset while associated Values in 'indices' to the domain. - domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); + extractInductionVars(loopOps, indices); + // Reset while associating Values in 'indices' to the domain. + domain->reset(numDims, /*numSymbols=*/0, /*numLocals=*/0, indices); for (Operation *op : ops) { // Add constraints from forOp's bounds. if (AffineForOp forOp = dyn_cast(op)) { if (failed(domain->addAffineForOpDomain(forOp))) return failure(); - } else if (AffineIfOp ifOp = dyn_cast(op)) { + } else if (auto ifOp = dyn_cast(op)) { domain->addAffineIfOpDomain(ifOp); - } + } else if (auto parallelOp = dyn_cast(op)) + if (failed(domain->addAffineParallelOpDomain(parallelOp))) + return failure(); } return success(); } @@ -594,6 +603,12 @@ DependenceResult mlir::checkMemrefAccessDependence( if (srcAccess.memref != dstAccess.memref) return DependenceResult::NoDependence; + // TODO: Support affine.parallel which does not specify the ordering. + auto srcParent = srcAccess.opInst->getParentOfType(); + auto dstParent = dstAccess.opInst->getParentOfType(); + if (srcParent || dstParent) + return DependenceResult::Failure; + // Return 'NoDependence' if one of these accesses is not an // AffineWriteOpInterface. if (!allowRAR && !isa(srcAccess.opInst) && diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp index 5c284c1..15b50a4 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -639,6 +639,33 @@ FlatAffineValueConstraints::addAffineForOpDomain(AffineForOp forOp) { forOp.getUpperBoundOperands()); } +LogicalResult FlatAffineValueConstraints::addAffineParallelOpDomain( + AffineParallelOp parallelOp) { + size_t ivPos = 0; + for (auto iv : parallelOp.getIVs()) { + unsigned pos; + if (!findVar(iv, &pos)) { + assert(false && "variable expected for the IV value"); + return failure(); + } + + AffineMap lowerBound = parallelOp.getLowerBoundMap(ivPos); + if (lowerBound.isConstant()) + addBound(BoundType::LB, pos, lowerBound.getSingleConstantResult()); + else if (failed(addBound(BoundType::LB, pos, lowerBound, + parallelOp.getLowerBoundsOperands()))) + return failure(); + + auto upperBound = parallelOp.getUpperBoundMap(ivPos); + if (upperBound.isConstant()) + addBound(BoundType::UB, pos, upperBound.getSingleConstantResult()); + else if (failed(addBound(BoundType::UB, pos, upperBound, + parallelOp.getUpperBoundsOperands()))) + return failure(); + } + return success(); +} + LogicalResult FlatAffineValueConstraints::addDomainFromSliceMaps(ArrayRef lbMaps, ArrayRef ubMaps, diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index d7d21c8..ffa354f 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -2318,6 +2318,19 @@ void mlir::extractForInductionVars(ArrayRef forInsts, ivs->push_back(forInst.getInductionVar()); } +void mlir::extractInductionVars(ArrayRef affineOps, + SmallVectorImpl &ivs) { + ivs.reserve(affineOps.size()); + for (Operation *op : affineOps) { + // Add constraints from forOp's bounds. + if (auto forOp = dyn_cast(op)) + ivs.push_back(forOp.getInductionVar()); + else if (auto parallelOp = dyn_cast(op)) + for (size_t i = 0; i < parallelOp.getBody()->getNumArguments(); i++) + ivs.push_back(parallelOp.getBody()->getArgument(i)); + } +} + /// Builds an affine loop nest, using "loopCreatorFn" to create individual loop /// operations. template diff --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir index f220ef6..972133a 100644 --- a/mlir/test/Dialect/Affine/scalrep.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -788,3 +788,61 @@ func.func @no_forwarding_across_scopes() -> memref<1xf32> { } return %A : memref<1xf32> } + +// CHECK-LABEL: func @parallel_store_load() { +func.func @parallel_store_load() { + %cf7 = arith.constant 7.0 : f32 + %m = memref.alloc() : memref<10xf32> + affine.parallel (%i0) = (0) to (10) { + affine.store %cf7, %m[%i0] : memref<10xf32> + %v0 = affine.load %m[%i0] : memref<10xf32> + %v1 = arith.addf %v0, %v0 : f32 + } + memref.dealloc %m : memref<10xf32> + return +// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) { +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: return +} + +func.func @non_constant_parallel_store_load(%N : index) { + %cf7 = arith.constant 7.0 : f32 + %m = memref.alloc() : memref<10xf32> + affine.parallel (%i0) = (0) to (%N) { + affine.store %cf7, %m[%i0] : memref<10xf32> + %v0 = affine.load %m[%i0] : memref<10xf32> + %v1 = arith.addf %v0, %v0 : f32 + } + memref.dealloc %m : memref<10xf32> + return +} +// CHECK: func.func @non_constant_parallel_store_load(%[[ARG0:.*]]: index) { +// CHECK-NEXT: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (%[[ARG0]]) { +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: return + +// CHECK-LABEL: func @parallel_surrounding_for() { +func.func @parallel_surrounding_for() { + %cf7 = arith.constant 7.0 : f32 + %m = memref.alloc() : memref<10x10xf32> + affine.parallel (%i0) = (0) to (10) { + affine.for %i1 = 0 to 10 { + affine.store %cf7, %m[%i0,%i1] : memref<10x10xf32> + %v0 = affine.load %m[%i0,%i1] : memref<10x10xf32> + %v1 = arith.addf %v0, %v0 : f32 + } + } + memref.dealloc %m : memref<10x10xf32> + return +// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: affine.parallel (%{{.*}}) = (0) to (10) { +// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: return +} diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir index 3eb5b70..2e28a4cc 100644 --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -1064,3 +1064,19 @@ func.func @test_interleaved_affine_for_if() { return } + +// ----- +// CHECK-LABEL: func @parallel_dependence_check_failure() { +func.func @parallel_dependence_check_failure() { + %0 = memref.alloc() : memref<10xf32> + %cst = arith.constant 7.000000e+00 : f32 + affine.parallel (%i0) = (0) to (10) { + // expected-error @+1 {{dependence check failed}} + affine.store %cst, %0[%i0] : memref<10xf32> + } + affine.parallel (%i1) = (0) to (10) { + // expected-error @+1 {{dependence check failed}} + %1 = affine.load %0[%i1] : memref<10xf32> + } + return +} diff --git a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp index a45e21a..d1bcb8d 100644 --- a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp +++ b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp @@ -86,15 +86,18 @@ static void checkDependences(ArrayRef loadsAndStores) { DependenceResult result = checkMemrefAccessDependence( srcAccess, dstAccess, d, &dependenceConstraints, &dependenceComponents); - assert(result.value != DependenceResult::Failure); - bool ret = hasDependence(result); - // TODO: Print dependence type (i.e. RAW, etc) and print - // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance - // vectors from ([1, 1], [3, 3]) to (1, 3). - srcOpInst->emitRemark("dependence from ") - << i << " to " << j << " at depth " << d << " = " - << getDirectionVectorStr(ret, numCommonLoops, d, - dependenceComponents); + if (result.value == DependenceResult::Failure) { + srcOpInst->emitError("dependence check failed"); + } else { + bool ret = hasDependence(result); + // TODO: Print dependence type (i.e. RAW, etc) and print + // distance vectors as: ([2, 3], [0, 10]). Also, shorten distance + // vectors from ([1, 1], [3, 3]) to (1, 3). + srcOpInst->emitRemark("dependence from ") + << i << " to " << j << " at depth " << d << " = " + << getDirectionVectorStr(ret, numCommonLoops, d, + dependenceComponents); + } } } } -- 2.7.4