def AffineParallelize : FunctionPass<"affine-parallelize"> {
let summary = "Convert affine.for ops into 1-D affine.parallel";
let constructor = "mlir::createAffineParallelizePass()";
+ let options = [
+ Option<"maxNested", "max-nested", "unsigned", /*default=*/"-1u",
+ "Maximum number of nested parallel loops to produce. "
+ "Defaults to unlimited (UINT_MAX).">,
+ ];
}
def AffineLoopNormalize : FunctionPass<"affine-loop-normalize"> {
void AffineParallelize::runOnFunction() {
FuncOp f = getFunction();
- SmallVector<AffineForOp, 8> parallelizableLoops;
+
+ // The walker proceeds in post-order, but we need to process outer loops first
+ // to control the number of outer parallel loops, so push candidate loops to
+ // the front of a deque.
+ std::deque<AffineForOp> parallelizableLoops;
f.walk([&](AffineForOp loop) {
if (isLoopParallel(loop))
- parallelizableLoops.push_back(loop);
+ parallelizableLoops.push_front(loop);
});
- for (AffineForOp loop : parallelizableLoops)
- affineParallelize(loop);
+
+ for (AffineForOp loop : parallelizableLoops) {
+ unsigned numParentParallelOps = 0;
+ for (Operation *op = loop->getParentOp();
+ op != nullptr && !op->hasTrait<OpTrait::AffineScope>();
+ op = op->getParentOp()) {
+ if (isa<AffineParallelOp>(op))
+ ++numParentParallelOps;
+ }
+
+ if (numParentParallelOps < maxNested)
+ affineParallelize(loop);
+ }
}
std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineParallelizePass() {
// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize| FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize='max-nested=1' | FileCheck --check-prefix=MAX-NESTED %s
// CHECK-LABEL: func @reduce_window_max() {
func @reduce_window_max() {
}
return
}
+
+// MAX-NESTED-LABEL: @max_nested
+func @max_nested(%m: memref<?x?xf32>, %lb0: index, %lb1: index,
+ %ub0: index, %ub1: index) {
+ // MAX-NESTED: affine.parallel
+ affine.for %i = affine_map<(d0) -> (d0)>(%lb0) to affine_map<(d0) -> (d0)>(%ub0) {
+ // MAX-NESTED: affine.for
+ affine.for %j = affine_map<(d0) -> (d0)>(%lb1) to affine_map<(d0) -> (d0)>(%ub1) {
+ affine.load %m[%i, %j] : memref<?x?xf32>
+ }
+ }
+ return
+}
+
+