From c3440e213ca02c2fcd91f3f8cf65774a2b1a9e6f Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier@nvidia.com>
Date: Thu, 30 Mar 2023 09:48:45 +0200
Subject: [PATCH] [flang][hlfir] disable region simplification in HLFIR
 pipeline

Block merging is disabled with FIR: some FIR value should not be promoted
to block arguments, and the region simplification block merging is
promoting all SSA value types to block argument when two blocks are
similar except for the usage these values.

Differential Revision: https://reviews.llvm.org/D147130
---
 flang/include/flang/Tools/CLOptions.inc | 18 ++++++++++++++----
 flang/test/HLFIR/no-block-merging.fir   | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 4 deletions(-)
 create mode 100644 flang/test/HLFIR/no-block-merging.fir
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 8a1f7eb..2c5efec 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -99,7 +99,17 @@ void addNestedPassConditionally(
 
 namespace fir {
 
-static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) {
+/// Add MLIR Canonicalizer pass with region simplification disabled.
+/// FIR does not support the promotion of some SSA value to block arguments (or
+/// into arith.select operands) that may be done by mlir block merging in the
+/// region simplification (e.g., !fir.shape<> SSA values are not supported as
+/// block arguments).
+/// Aside from the fir.shape issue, moving some abstract SSA value into block
+/// arguments may have a heavy cost since it forces their code generation that
+/// may be expensive (array temporary). The MLIR pass does not take these
+/// extra costs into account when doing block merging.
+static void addCanonicalizerPassWithoutRegionSimplification(
+    mlir::OpPassManager &pm) {
   mlir::GreedyRewriteConfig config;
   config.enableRegionSimplification = false;
   pm.addPass(mlir::createCanonicalizerPass(config));
@@ -200,8 +210,8 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
   // The default inliner pass adds the canonicalizer pass with the default
   // configuration. Create the inliner pass with tco config.
   llvm::StringMap<mlir::OpPassManager> pipelines;
-  pm.addPass(
-      mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline));
+  pm.addPass(mlir::createInlinerPass(
+      pipelines, addCanonicalizerPassWithoutRegionSimplification));
   pm.addPass(fir::createSimplifyRegionLitePass());
   pm.addPass(mlir::createCSEPass());
 
@@ -225,7 +235,7 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 inline void createHLFIRToFIRPassPipeline(
     mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed())
-    pm.addPass(mlir::createCanonicalizerPass());
+    addCanonicalizerPassWithoutRegionSimplification(pm);
   pm.addPass(hlfir::createLowerHLFIRIntrinsicsPass());
   pm.addPass(hlfir::createBufferizeHLFIRPass());
   pm.addPass(hlfir::createConvertHLFIRtoFIRPass());
diff --git a/flang/test/HLFIR/no-block-merging.fir b/flang/test/HLFIR/no-block-merging.fir
new file mode 100644
index 0000000..987cf8b
--- /dev/null
+++ b/flang/test/HLFIR/no-block-merging.fir
@@ -0,0 +1,33 @@
+// Test that the HLFIR pipeline does not call MLIR canonicalizer with block
+// merging enabled (moving fir.shape to block argument would cause failures
+// when translating the FIR to LLVM).
+// RUN: %flang_fc1 %s -flang-experimental-hlfir -emit-llvm -O2 -o - | FileCheck %s
+
+func.func @no_shape_merge(%cdt: i1, %from: !fir.ref<!fir.array<?xf64>>, %to : !fir.ref<f64>) {
+  %c10 = arith.constant 10 : index
+  %c20 = arith.constant 20 : index
+  %c5 = arith.constant 5 : index
+  %shape1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %shape2 = fir.shape %c20 : (index) -> !fir.shape<1>
+  cf.cond_br %cdt, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  %coor1 = fir.array_coor %from(%shape1) %c5 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %load1 = fir.load %coor1 : !fir.ref<f64>
+  fir.store %load1 to %to : !fir.ref<f64>
+  cf.br ^bb3
+^bb2:  // pred: ^bb0
+  %coor2 = fir.array_coor %from(%shape2) %c5 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %load2 = fir.load %coor2 : !fir.ref<f64>
+  fir.store %load2 to %to : !fir.ref<f64>
+  cf.br ^bb3
+^bb3:  // pred: ^bb1, ^bb2
+  return
+}
+
+// Note: block merging happens in the output below, but after FIR codegen.
+
+// CHECK-LABEL:  define void @no_shape_merge(
+// CHECK:  %[[GEP:.*]] = getelementptr double, ptr %{{.*}}
+// CHECK:  %[[LOAD:.*]] = load double, ptr %[[GEP]]
+// CHECK:  store double %[[LOAD]], ptr %{{.*}}
+// CHECK:  ret void
-- 
2.7.4