From: Alex Zinenko Date: Mon, 7 Dec 2020 15:18:32 +0000 (+0100) Subject: [mlir] Add an option to control the number of loops in affine parallelizer X-Git-Tag: llvmorg-13-init~4084 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=80766ecc65096deeb4ff6f03562dcad94c54b862;p=platform%2Fupstream%2Fllvm.git [mlir] Add an option to control the number of loops in affine parallelizer Add a pass option to control the number of nested parallel loops produced by the parallelization passes. This is useful to build end-to-end passes targeting systems that don't need multiple parallel dimensions (e.g., CPUs typically need only one). Reviewed By: wsmoses, chelini Differential Revision: https://reviews.llvm.org/D92765 --- diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index ace2726..9f2aac7 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -118,6 +118,11 @@ def AffineVectorize : FunctionPass<"affine-super-vectorize"> { def AffineParallelize : FunctionPass<"affine-parallelize"> { let summary = "Convert affine.for ops into 1-D affine.parallel"; let constructor = "mlir::createAffineParallelizePass()"; + let options = [ + Option<"maxNested", "max-nested", "unsigned", /*default=*/"-1u", + "Maximum number of nested parallel loops to produce. " + "Defaults to unlimited (UINT_MAX).">, + ]; } def AffineLoopNormalize : FunctionPass<"affine-loop-normalize"> { diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp index b3651e2..d8511be 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp @@ -36,13 +36,28 @@ struct AffineParallelize : public AffineParallelizeBase { void AffineParallelize::runOnFunction() { FuncOp f = getFunction(); - SmallVector parallelizableLoops; + + // The walker proceeds in post-order, but we need to process outer loops first + // to control the number of outer parallel loops, so push candidate loops to + // the front of a deque. + std::deque parallelizableLoops; f.walk([&](AffineForOp loop) { if (isLoopParallel(loop)) - parallelizableLoops.push_back(loop); + parallelizableLoops.push_front(loop); }); - for (AffineForOp loop : parallelizableLoops) - affineParallelize(loop); + + for (AffineForOp loop : parallelizableLoops) { + unsigned numParentParallelOps = 0; + for (Operation *op = loop->getParentOp(); + op != nullptr && !op->hasTrait(); + op = op->getParentOp()) { + if (isa(op)) + ++numParentParallelOps; + } + + if (numParentParallelOps < maxNested) + affineParallelize(loop); + } } std::unique_ptr> mlir::createAffineParallelizePass() { diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir index cbc80a0..08aaa7f 100644 --- a/mlir/test/Dialect/Affine/parallelize.mlir +++ b/mlir/test/Dialect/Affine/parallelize.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize| FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize='max-nested=1' | FileCheck --check-prefix=MAX-NESTED %s // CHECK-LABEL: func @reduce_window_max() { func @reduce_window_max() { @@ -144,3 +145,18 @@ func @nested_for_with_minmax(%m: memref, %lb0: index, } return } + +// MAX-NESTED-LABEL: @max_nested +func @max_nested(%m: memref, %lb0: index, %lb1: index, + %ub0: index, %ub1: index) { + // MAX-NESTED: affine.parallel + affine.for %i = affine_map<(d0) -> (d0)>(%lb0) to affine_map<(d0) -> (d0)>(%ub0) { + // MAX-NESTED: affine.for + affine.for %j = affine_map<(d0) -> (d0)>(%lb1) to affine_map<(d0) -> (d0)>(%ub1) { + affine.load %m[%i, %j] : memref + } + } + return +} + +