[flang][hlfir] disable region simplification in HLFIR pipeline

author Jean Perier <jperier@nvidia.com>

Thu, 30 Mar 2023 07:48:45 +0000 (09:48 +0200)

committer Jean Perier <jperier@nvidia.com>

Thu, 30 Mar 2023 07:49:57 +0000 (09:49 +0200)
author Jean Perier <jperier@nvidia.com>
Thu, 30 Mar 2023 07:48:45 +0000 (09:48 +0200)
committer Jean Perier <jperier@nvidia.com>
Thu, 30 Mar 2023 07:49:57 +0000 (09:49 +0200)
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc

index 8a1f7eb..2c5efec 100644 (file)
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -99,7 +99,17 @@ void addNestedPassConditionally(
  
  namespace fir {
  
-static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) {
+/// Add MLIR Canonicalizer pass with region simplification disabled.
+/// FIR does not support the promotion of some SSA value to block arguments (or
+/// into arith.select operands) that may be done by mlir block merging in the
+/// region simplification (e.g., !fir.shape<> SSA values are not supported as
+/// block arguments).
+/// Aside from the fir.shape issue, moving some abstract SSA value into block
+/// arguments may have a heavy cost since it forces their code generation that
+/// may be expensive (array temporary). The MLIR pass does not take these
+/// extra costs into account when doing block merging.
+static void addCanonicalizerPassWithoutRegionSimplification(
+    mlir::OpPassManager &pm) {
    mlir::GreedyRewriteConfig config;
    config.enableRegionSimplification = false;
    pm.addPass(mlir::createCanonicalizerPass(config));
@@ -200,8 +210,8 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
    // The default inliner pass adds the canonicalizer pass with the default
    // configuration. Create the inliner pass with tco config.
    llvm::StringMap<mlir::OpPassManager> pipelines;
-  pm.addPass(
-      mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline));
+  pm.addPass(mlir::createInlinerPass(
+      pipelines, addCanonicalizerPassWithoutRegionSimplification));
    pm.addPass(fir::createSimplifyRegionLitePass());
    pm.addPass(mlir::createCSEPass());
  
@@ -225,7 +235,7 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
  inline void createHLFIRToFIRPassPipeline(
      mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) {
    if (optLevel.isOptimizingForSpeed())
-    pm.addPass(mlir::createCanonicalizerPass());
+    addCanonicalizerPassWithoutRegionSimplification(pm);
    pm.addPass(hlfir::createLowerHLFIRIntrinsicsPass());
    pm.addPass(hlfir::createBufferizeHLFIRPass());
    pm.addPass(hlfir::createConvertHLFIRtoFIRPass());
diff --git a/flang/test/HLFIR/no-block-merging.fir b/flang/test/HLFIR/no-block-merging.fir

new file mode 100644 (file)

index 0000000..987cf8b
--- /dev/null
+++ b/flang/test/HLFIR/no-block-merging.fir
@@ -0,0 +1,33 @@
+// Test that the HLFIR pipeline does not call MLIR canonicalizer with block
+// merging enabled (moving fir.shape to block argument would cause failures
+// when translating the FIR to LLVM).
+// RUN: %flang_fc1 %s -flang-experimental-hlfir -emit-llvm -O2 -o - | FileCheck %s
+
+func.func @no_shape_merge(%cdt: i1, %from: !fir.ref<!fir.array<?xf64>>, %to : !fir.ref<f64>) {
+  %c10 = arith.constant 10 : index
+  %c20 = arith.constant 20 : index
+  %c5 = arith.constant 5 : index
+  %shape1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %shape2 = fir.shape %c20 : (index) -> !fir.shape<1>
+  cf.cond_br %cdt, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  %coor1 = fir.array_coor %from(%shape1) %c5 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %load1 = fir.load %coor1 : !fir.ref<f64>
+  fir.store %load1 to %to : !fir.ref<f64>
+  cf.br ^bb3
+^bb2:  // pred: ^bb0
+  %coor2 = fir.array_coor %from(%shape2) %c5 : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>, index) -> !fir.ref<f64>
+  %load2 = fir.load %coor2 : !fir.ref<f64>
+  fir.store %load2 to %to : !fir.ref<f64>
+  cf.br ^bb3
+^bb3:  // pred: ^bb1, ^bb2
+  return
+}
+
+// Note: block merging happens in the output below, but after FIR codegen.
+
+// CHECK-LABEL:  define void @no_shape_merge(
+// CHECK:  %[[GEP:.*]] = getelementptr double, ptr %{{.*}}
+// CHECK:  %[[LOAD:.*]] = load double, ptr %[[GEP]]
+// CHECK:  store double %[[LOAD]], ptr %{{.*}}
+// CHECK:  ret void
author	Jean Perier <jperier@nvidia.com>
	Thu, 30 Mar 2023 07:48:45 +0000 (09:48 +0200)
committer	Jean Perier <jperier@nvidia.com>
	Thu, 30 Mar 2023 07:49:57 +0000 (09:49 +0200)
flang/include/flang/Tools/CLOptions.inc		patch \| blob \| history
flang/test/HLFIR/no-block-merging.fir	[new file with mode: 0644]	patch \| blob