[mlir][linalg] Run CSE after every CodegenStrategy transformation.
authorgysit <gysit@google.com>
Tue, 30 Nov 2021 14:48:25 +0000 (14:48 +0000)
committergysit <gysit@google.com>
Tue, 30 Nov 2021 15:07:51 +0000 (15:07 +0000)
Add CSE after every transformation. Transformations such as tiling introduce redundant computation, for example, one AffineMinOp for every operand dimension pair. Follow up transformations such as Padding and Hoisting benefit from CSE since comparing slice sizes simplifies to comparing SSA values instead of analyzing affine expressions.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D114585

mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
mlir/test/Dialect/Linalg/codegen-strategy.mlir

index 8ed43c8b0dbae3389e8c270348c96d6a247e29bc..8beeba759cd0263541cab15c72a28d5d4284c147 100644 (file)
 #include "mlir/Dialect/Vector/VectorTransforms.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
+#include "mlir/Pass/PassManager.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
@@ -335,6 +337,12 @@ struct LinalgStrategyEnablePass
 
     if (options.hoistRedundantVectorTransfersOnTensor)
       hoistRedundantVectorTransfersOnTensor(funcOp);
+
+    // Run CSE to cleanup after canonicalization.
+    OpPassManager dynamicPM("builtin.func");
+    dynamicPM.addPass(createCSEPass());
+    if (failed(runPipeline(dynamicPM, funcOp)))
+      return signalPassFailure();
   }
 
   LinalgEnablingOptions options;
index fc65a593fc4ed6b788692675637e6414acef2bc3..d1deffafaf9370a9fd50a0fe13f03ad3d20a7305 100644 (file)
@@ -40,14 +40,19 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
 
 // -----
 
+//     CHECK-PAD-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (16, -d0 + 72)>
+
 //         CHECK-PAD: func @matmul(
 func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
 
   // Check the padding of the input operands has been hoisted out of the tile loop nest.
   //      CHECK-PAD-COUNT=2: linalg.pad_tensor %{{.*}} nofold
-  //      CHECK-PAD-COUNT=3: scf.for
+  //              CHECK-PAD: scf.for
+  // Check CSE eliminates the duplicate min operations introduced by tiling.
+  //              CHECK-PAD: affine.min #[[MAP0]]
+  //          CHECK-PAD-NOT: affine.min #[[MAP0]]
+  //      CHECK-PAD-COUNT=2: scf.for
   //              CHECK-PAD: linalg.matmul
   %0 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%arg2: tensor<72x72xf32>) -> tensor<72x72xf32>
   return %0 : tensor<72x72xf32>
 }
-