[MLIR] Clarify (test-scf-)parallel-loop-collapsing

author Tres Popp <tpopp@google.com>

Tue, 4 Apr 2023 09:36:30 +0000 (11:36 +0200)

committer Tres Popp <tpopp@google.com>

Wed, 5 Apr 2023 11:41:15 +0000 (13:41 +0200)
author Tres Popp <tpopp@google.com>
Tue, 4 Apr 2023 09:36:30 +0000 (11:36 +0200)
committer Tres Popp <tpopp@google.com>
Wed, 5 Apr 2023 11:41:15 +0000 (13:41 +0200)
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h

index ce76572..90b315e 100644 (file)
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
@@ -37,7 +37,7 @@ std::unique_ptr<Pass> createSCFForLoopCanonicalizationPass();
  
  /// Creates a pass that transforms a single ParallelLoop over N induction
  /// variables into another ParallelLoop over less than N induction variables.
-std::unique_ptr<Pass> createParallelLoopCollapsingPass();
+std::unique_ptr<Pass> createTestSCFParallelLoopCollapsingPass();
  
  /// Creates a loop fusion pass which fuses parallel loops.
  std::unique_ptr<Pass> createParallelLoopFusionPass();
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td

index 9abffe5..16eb415 100644 (file)
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -50,9 +50,38 @@ def SCFParallelLoopFusion : Pass<"scf-parallel-loop-fusion"> {
    let constructor = "mlir::createParallelLoopFusionPass()";
  }
  
-def SCFParallelLoopCollapsing : Pass<"scf-parallel-loop-collapsing"> {
-  let summary = "Collapse parallel loops to use less induction variables";
-  let constructor = "mlir::createParallelLoopCollapsingPass()";
+def TestSCFParallelLoopCollapsing : Pass<"test-scf-parallel-loop-collapsing"> {
+  let summary = "Test parallel loops collapsing transformation";
+  let constructor = "mlir::createTestSCFParallelLoopCollapsingPass()";
+  let description = [{
+      This pass is purely for testing the scf::collapseParallelLoops
+      transformation. The transformation does not have opinions on how a
+      parallel loop should be collapsed, so this pass is structured for the
+      common case on GPUs of collapsing to a 3d parallel loop. 3 lists can be
+      provided to collapsed-indices-{0,1,2} to represent how the loop should be
+      collapsed and must reference evrey iterator in the original parallel loop.
+
+    ```mlir
+    # Before:
+    scf.parallel (%arg0, %arg1)
+                 = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+      "test.sink"(%5, %3) : (index, index) -> ()
+      scf.yield
+    }
+
+    # After:
+    scf.parallel (%arg0) = (%c0) to (%c4) step (%c1) {
+      %0 = arith.remsi %arg0, %c2 : index
+      %1 = arith.divsi %arg0, %c2 : index
+      %2 = arith.muli %0, %c7 : index
+      %3 = arith.addi %2, %c3 : index
+      %4 = arith.muli %1, %c7 : index
+      %5 = arith.addi %4, %c3 : index
+      "test.sink"(%5, %3) : (index, index) -> ()
+    }
+    ```
+  }];
+
    let options = [
      ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
                 "Which loop indices to combine 0th loop index">,
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp

index 34c5e77..a69df02 100644 (file)
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
@@ -11,11 +11,12 @@
  #include "mlir/Dialect/SCF/IR/SCF.h"
  #include "mlir/Dialect/SCF/Utils/Utils.h"
  #include "mlir/Transforms/RegionUtils.h"
+#include "llvm/ADT/SmallSet.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  
  namespace mlir {
-#define GEN_PASS_DEF_SCFPARALLELLOOPCOLLAPSING
+#define GEN_PASS_DEF_TESTSCFPARALLELLOOPCOLLAPSING
  #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
  } // namespace mlir
  
@@ -24,27 +25,84 @@ namespace mlir {
  using namespace mlir;
  
  namespace {
-struct ParallelLoopCollapsing
-    : public impl::SCFParallelLoopCollapsingBase<ParallelLoopCollapsing> {
+struct TestSCFParallelLoopCollapsing
+    : public impl::TestSCFParallelLoopCollapsingBase<
+          TestSCFParallelLoopCollapsing> {
    void runOnOperation() override {
      Operation *module = getOperation();
  
+    // The common case for GPU dialect will be simplifying the ParallelOp to 3
+    // arguments, so we do that here to simplify things.
+    llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;
+
+    // Gather the input args into the format required by
+    // `collapseParallelLoops`.
+    if (!clCollapsedIndices0.empty())
+      combinedLoops.push_back(clCollapsedIndices0);
+    if (!clCollapsedIndices1.empty()) {
+      if (clCollapsedIndices0.empty()) {
+        llvm::errs()
+            << "collapsed-indices-1 specified but not collapsed-indices-0";
+        signalPassFailure();
+        return;
+      }
+      combinedLoops.push_back(clCollapsedIndices1);
+    }
+    if (!clCollapsedIndices2.empty()) {
+      if (clCollapsedIndices1.empty()) {
+        llvm::errs()
+            << "collapsed-indices-2 specified but not collapsed-indices-1";
+        signalPassFailure();
+        return;
+      }
+      combinedLoops.push_back(clCollapsedIndices2);
+    }
+
+    if (combinedLoops.empty()) {
+      llvm::errs() << "No collapsed-indices were specified. This pass is only "
+                      "for testing and does not automatically collapse all "
+                      "parallel loops or similar.";
+      signalPassFailure();
+      return;
+    }
+
+    // Confirm that the specified loops are [0,N) by testing that N values exist
+    // with the maximum value being N-1.
+    llvm::SmallSet<unsigned, 8> flattenedCombinedLoops;
+    unsigned maxCollapsedIndex = 0;
+    for (auto &loops : combinedLoops) {
+      for (auto &loop : loops) {
+        flattenedCombinedLoops.insert(loop);
+        maxCollapsedIndex = std::max(maxCollapsedIndex, loop);
+      }
+    }
+
+    if (maxCollapsedIndex != flattenedCombinedLoops.size() - 1 ||
+        !flattenedCombinedLoops.contains(maxCollapsedIndex)) {
+      llvm::errs()
+          << "collapsed-indices arguments must include all values [0,N).";
+      signalPassFailure();
+      return;
+    }
+
+    // Only apply the transformation on parallel loops where the specified
+    // transformation is valid, but do NOT early abort in the case of invalid
+    // loops.
      module->walk([&](scf::ParallelOp op) {
-      // The common case for GPU dialect will be simplifying the ParallelOp to 3
-      // arguments, so we do that here to simplify things.
-      llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;
-      if (!clCollapsedIndices0.empty())
-        combinedLoops.push_back(clCollapsedIndices0);
-      if (!clCollapsedIndices1.empty())
-        combinedLoops.push_back(clCollapsedIndices1);
-      if (!clCollapsedIndices2.empty())
-        combinedLoops.push_back(clCollapsedIndices2);
+      if (flattenedCombinedLoops.size() != op.getNumLoops()) {
+        op.emitOpError("has ")
+            << op.getNumLoops()
+            << " iter args while this limited functionality testing pass was "
+               "configured only for loops with exactly "
+            << flattenedCombinedLoops.size() << " iter args.";
+        return;
+      }
        collapseParallelLoops(op, combinedLoops);
      });
    }
  };
  } // namespace
  
-std::unique_ptr<Pass> mlir::createParallelLoopCollapsingPass() {
-  return std::make_unique<ParallelLoopCollapsing>();
+std::unique_ptr<Pass> mlir::createTestSCFParallelLoopCollapsingPass() {
+  return std::make_unique<TestSCFParallelLoopCollapsing>();
  }
diff --git a/mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir b/mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir

new file mode 100644 (file)

index 0000000..6f98d2c
--- /dev/null
+++ b/mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir
@@ -0,0 +1,34 @@
+// First test various sets of invalid arguments
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing))' 2>&1 | FileCheck %s --check-prefix=CL0
+// CL0: No collapsed-indices were specified. This pass is only for testing and does not automatically collapse all parallel loops or similar
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-1=1}))' 2>&1 | FileCheck %s --check-prefix=CL1
+// CL1: collapsed-indices-1 specified but not collapsed-indices-0
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=1  collapsed-indices-2=2}))' 2>&1 | FileCheck %s --check-prefix=CL2
+// CL2: collapsed-indices-2 specified but not collapsed-indices-1
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=1  collapsed-indices-1=2}))' 2>&1 | FileCheck %s --check-prefix=NON-ZERO
+// NON-ZERO: collapsed-indices arguments must include all values [0,N).
+
+// RUN: not mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0  collapsed-indices-1=2}))' 2>&1 | FileCheck %s --check-prefix=NON-CONTIGUOUS
+// NON-CONTIGUOUS: collapsed-indices arguments must include all values [0,N).
+
+
+// Then test for invalid combinations of argument+input-ir
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}))' -verify-diagnostics
+func.func @too_few_iters(%arg0: index, %arg1: index, %arg2: index) {
+  // expected-error @+1 {{op has 1 iter args while this limited functionality testing pass was configured only for loops with exactly 2 iter args.}}
+  scf.parallel (%arg3) = (%arg0) to (%arg1) step (%arg2) {
+    scf.yield
+  }
+  return
+}
+
+func.func @too_many_iters(%arg0: index, %arg1: index, %arg2: index) {
+  // expected-error @+1 {{op has 3 iter args while this limited functionality testing pass was configured only for loops with exactly 2 iter args.}}
+  scf.parallel (%arg3, %arg4, %arg5) = (%arg0, %arg0, %arg0) to (%arg1, %arg1, %arg1) step (%arg2, %arg2, %arg2) {
+    scf.yield
+  }
+  return
+}
diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir

index 90c3f5d..c606fe7 100644 (file)
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s
  
  // CHECK-LABEL: func @parallel_many_dims() {
  func.func @parallel_many_dims() {
diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir

index 91cab12..7b68838 100644 (file)
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s
  
  func.func @collapse_to_single() {
    %c0 = arith.constant 3 : index
author	Tres Popp <tpopp@google.com>
	Tue, 4 Apr 2023 09:36:30 +0000 (11:36 +0200)
committer	Tres Popp <tpopp@google.com>
	Wed, 5 Apr 2023 11:41:15 +0000 (13:41 +0200)
mlir/include/mlir/Dialect/SCF/Transforms/Passes.h		patch \| blob \| history
mlir/include/mlir/Dialect/SCF/Transforms/Passes.td		patch \| blob \| history
mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp		patch \| blob \| history
mlir/test/Transforms/invalid-parallel-loop-collapsing.mlir	[new file with mode: 0644]	patch \| blob
mlir/test/Transforms/parallel-loop-collapsing.mlir		patch \| blob \| history
mlir/test/Transforms/single-parallel-loop-collapsing.mlir		patch \| blob \| history