[MLIR][GPU] Expose GpuParallelLoopMapping as non-test pass.

author Christian Sigg <csigg@google.com>

Mon, 30 May 2022 06:32:01 +0000 (08:32 +0200)

committer Christian Sigg <csigg@google.com>

Mon, 30 May 2022 07:20:48 +0000 (09:20 +0200)
author Christian Sigg <csigg@google.com>
Mon, 30 May 2022 06:32:01 +0000 (08:32 +0200)
committer Christian Sigg <csigg@google.com>
Mon, 30 May 2022 07:20:48 +0000 (09:20 +0200)
diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h

index 07d92e8..9ae3683 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
@@ -60,13 +60,5 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
  LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                               ArrayRef<ParallelLoopDimMapping> mapping);
  } // namespace gpu
-
-/// Maps the parallel loops found in the given function to workgroups. The first
-/// loop encountered will be mapped to the global workgroup and the second loop
-/// encountered to the local workgroup. Within each mapping, the first three
-/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
-/// mapped to sequential loops.
-void greedilyMapParallelSCFToGPU(Region &region);
-
  } // namespace mlir
  #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H
diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h

index b9b127c..53f3f84 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -39,6 +39,13 @@ createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
  /// Rewrites a function region so that GPU ops execute asynchronously.
  std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
  
+/// Maps the parallel loops found in the given function to workgroups. The first
+/// loop encountered will be mapped to the global workgroup and the second loop
+/// encountered to the local workgroup. Within each mapping, the first three
+/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
+/// mapped to sequential loops.
+std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
+
  /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
  void populateGpuAllReducePatterns(RewritePatternSet &patterns);
  
diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td

index 0752839..f5786e8 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Passes.td
@@ -29,4 +29,11 @@ def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> {
    let dependentDialects = ["async::AsyncDialect"];
  }
  
+def GpuMapParallelLoopsPass
+    : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> {
+  let summary = "Greedily maps loops to GPU hardware dimensions.";
+  let constructor = "mlir::createGpuMapParallelLoopsPass()";
+  let description = "Greedily maps loops to GPU hardware dimensions.";
+}
+
  #endif // MLIR_DIALECT_GPU_PASSES
diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp

index b032169..c7a1ef3 100644 (file)
--- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
@@ -13,26 +13,25 @@
  
  #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
  
+#include "PassDetail.h"
  #include "mlir/Dialect/GPU/GPUDialect.h"
  #include "mlir/Dialect/GPU/Passes.h"
  #include "mlir/Dialect/SCF/SCF.h"
  #include "mlir/IR/AffineMap.h"
  #include "mlir/Pass/Pass.h"
  
-using namespace mlir;
-using namespace mlir::gpu;
-using namespace mlir::scf;
-
  #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
  #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
+
  namespace mlir {
-namespace gpu {
  
-StringRef getMappingAttrName() { return "mapping"; }
+using scf::ParallelOp;
  
-ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
-                                                     AffineMap map,
-                                                     AffineMap bound) {
+StringRef gpu::getMappingAttrName() { return "mapping"; }
+
+gpu::ParallelLoopDimMapping
+gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map,
+                                   AffineMap bound) {
    MLIRContext *context = map.getContext();
    OpBuilder builder(context);
    return ParallelLoopDimMapping::get(
@@ -40,8 +39,8 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
        AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
  }
  
-LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
-                             ArrayRef<ParallelLoopDimMapping> mapping) {
+LogicalResult gpu::setMappingAttr(ParallelOp ploopOp,
+                                  ArrayRef<ParallelLoopDimMapping> mapping) {
    // Verify that each processor is mapped to only once.
    llvm::DenseSet<gpu::Processor> specifiedMappings;
    for (auto dimAttr : mapping) {
@@ -56,20 +55,17 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                     ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
    return success();
  }
-} // namespace gpu
-} // namespace mlir
  
+namespace gpu {
  namespace {
-
  enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
+} // namespace
  
  static constexpr int kNumHardwareIds = 3;
  
-} // namespace
-
  /// Bounded increment on MappingLevel. Increments to the next
  /// level unless Sequential was already reached.
-MappingLevel &operator++(MappingLevel &mappingLevel) {
+static MappingLevel &operator++(MappingLevel &mappingLevel) {
    if (mappingLevel < Sequential) {
      mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
    }
@@ -82,8 +78,7 @@ MappingLevel &operator++(MappingLevel &mappingLevel) {
  /// TODO: Make this use x for the inner-most loop that is
  /// distributed to map to x, the next innermost to y and the next innermost to
  /// z.
-static gpu::Processor getHardwareIdForMapping(MappingLevel level,
-                                              int dimension) {
+static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
  
    if (dimension >= kNumHardwareIds || level == Sequential)
      return Processor::Sequential;
@@ -145,6 +140,21 @@ static void mapParallelOp(ParallelOp parallelOp,
    }
  }
  
-void mlir::greedilyMapParallelSCFToGPU(Region &region) {
-  region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+namespace {
+struct GpuMapParallelLoopsPass
+    : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
+  void runOnOperation() override {
+    for (Region &region : getOperation()->getRegions()) {
+      region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+    }
+  }
+};
+
+} // namespace
+} // namespace gpu
+} // namespace mlir
+
+std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
+mlir::createGpuMapParallelLoopsPass() {
+  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
  }
diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir

index ff5b07f..8c23364 100644 (file)
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s
  
  func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                      %arg3 : index) {
diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt

index 97fc669..65f4780 100644 (file)
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@@ -3,7 +3,6 @@ add_mlir_library(MLIRGPUTestPasses
    TestConvertGPUKernelToCubin.cpp
    TestConvertGPUKernelToHsaco.cpp
    TestGpuMemoryPromotion.cpp
-  TestGpuParallelLoopMapping.cpp
    TestGpuRewrite.cpp
  
    EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp

deleted file mode 100644 (file)

index f53abba..0000000
--- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass testing the utilities for mapping parallel
-// loops to gpu hardware ids.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// Simple pass for testing the mapping of parallel loops to hardware ids using
-/// a greedy mapping strategy.
-struct TestGpuGreedyParallelLoopMappingPass
-    : public PassWrapper<TestGpuGreedyParallelLoopMappingPass,
-                         OperationPass<>> {
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
-      TestGpuGreedyParallelLoopMappingPass)
-
-  StringRef getArgument() const final {
-    return "test-gpu-greedy-parallel-loop-mapping";
-  }
-  StringRef getDescription() const final {
-    return "Greedily maps all parallel loops to gpu hardware ids.";
-  }
-  void runOnOperation() override {
-    for (Region &region : getOperation()->getRegions())
-      greedilyMapParallelSCFToGPU(region);
-  }
-};
-} // namespace
-
-namespace mlir {
-namespace test {
-void registerTestGpuParallelLoopMappingPass() {
-  PassRegistration<TestGpuGreedyParallelLoopMappingPass>();
-}
-} // namespace test
-} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp

index 78814df..dcd8946 100644 (file)
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -79,7 +79,6 @@ void registerTestDynamicPipelinePass();
  void registerTestExpandTanhPass();
  void registerTestComposeSubView();
  void registerTestMultiBuffering();
-void registerTestGpuParallelLoopMappingPass();
  void registerTestIRVisitorsPass();
  void registerTestGenericIRVisitorsPass();
  void registerTestGenericIRVisitorsInterruptPass();
@@ -176,7 +175,6 @@ void registerTestPasses() {
    mlir::test::registerTestExpandTanhPass();
    mlir::test::registerTestComposeSubView();
    mlir::test::registerTestMultiBuffering();
-  mlir::test::registerTestGpuParallelLoopMappingPass();
    mlir::test::registerTestIRVisitorsPass();
    mlir::test::registerTestGenericIRVisitorsPass();
    mlir::test::registerTestInterfaces();
author	Christian Sigg <csigg@google.com>
	Mon, 30 May 2022 06:32:01 +0000 (08:32 +0200)
committer	Christian Sigg <csigg@google.com>
	Mon, 30 May 2022 07:20:48 +0000 (09:20 +0200)
mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h		patch \| blob \| history
mlir/include/mlir/Dialect/GPU/Passes.h		patch \| blob \| history
mlir/include/mlir/Dialect/GPU/Passes.td		patch \| blob \| history
mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp		patch \| blob \| history
mlir/test/Dialect/GPU/mapping.mlir		patch \| blob \| history
mlir/test/lib/Dialect/GPU/CMakeLists.txt		patch \| blob \| history
mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp	[deleted file]	patch \| blob \| history
mlir/tools/mlir-opt/mlir-opt.cpp		patch \| blob \| history