From 407832db60c6f51ec5d9e5160670ee9756a340af Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 25 Apr 2023 09:07:11 +0000 Subject: [PATCH] [flang][hlfir] Add pass to inline elementals Implement hlfir.elemental inlining as proposed in flang/docs/HighLevelFIR.md. This is a separate pass to make the code easier to understand. One alternative would have been to modify the hlfir.elemental lowering in the HLFIR bufferization pass. Currently, a hlfir.elemental can only be inlined once; if there are more uses, the existing bufferization is used instead. Usage of mlir::applyPatternsAndFoldGreedily was suggested by @jeanPerier Differential Revision: https://reviews.llvm.org/D149258 --- flang/include/flang/Optimizer/HLFIR/HLFIROps.td | 13 ++ flang/include/flang/Optimizer/HLFIR/Passes.h | 1 + flang/include/flang/Optimizer/HLFIR/Passes.td | 5 + flang/include/flang/Tools/CLOptions.inc | 1 + .../lib/Optimizer/HLFIR/Transforms/CMakeLists.txt | 1 + .../HLFIR/Transforms/InlineElementals.cpp | 119 ++++++++++ flang/test/Driver/mlir-debug-pass-pipeline.f90 | 2 + flang/test/Driver/mlir-pass-pipeline.f90 | 3 +- flang/test/Fir/basic-program.fir | 1 + flang/test/HLFIR/inline-elemental.fir | 245 +++++++++++++++++++++ 10 files changed, 390 insertions(+), 1 deletion(-) create mode 100644 flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp create mode 100644 flang/test/HLFIR/inline-elemental.fir diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 0aed277..1fe3eba 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -606,6 +606,11 @@ def hlfir_ElementalOp : hlfir_Op<"elemental", [RecursiveMemoryEffects]> { The shape and typeparams operands represent the extents and type parameters of the resulting array value. + Currently there is no way to control the iteration order of a hlfir + elemental operation and so operations in the body of the elemental must + not have side effects. If this is changed, an attribute must be added so + that the elemental inlining pass can skip these impure elementals. + Example: Y + X, with Integer :: X(10, 20), Y(10,20) ``` @@ -670,9 +675,17 @@ def hlfir_ApplyOp : hlfir_Op<"apply", [NoMemoryEffect, AttrSizedOperandSegments] let description = [{ Given an hlfir.expr array value, hlfir.apply allow retrieving the value for an element given one based indices. + When hlfir.apply is used on an hlfir.elemental, and if the hlfir.elemental operation evaluation can be moved to the location of the hlfir.apply, it is as if the hlfir.elemental body was evaluated given the hlfir.apply indices. + Therefore, apply operations on hlfir.elemental expressions should be located + such that evaluating the hlfir.elemental at the position of the hlfir.apply + operation produces the same result as evaluating the hlfir.elemental at its + location in the instruction stream. Attention should be paid to + hlfir.elemental memory side effects (in practice these are unlikely). + "10.1.4 Evaluation of operations" says that expression evaluation shall not + impact/be impacted by other expression evaluation in the statement. }]; let arguments = (ins hlfir_ExprType:$expr, diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.h b/flang/include/flang/Optimizer/HLFIR/Passes.h index a5aa35b..eb3cc14 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.h +++ b/flang/include/flang/Optimizer/HLFIR/Passes.h @@ -26,6 +26,7 @@ std::unique_ptr createConvertHLFIRtoFIRPass(); std::unique_ptr createBufferizeHLFIRPass(); std::unique_ptr createLowerHLFIRIntrinsicsPass(); std::unique_ptr createSimplifyHLFIRIntrinsicsPass(); +std::unique_ptr createInlineElementalsPass(); std::unique_ptr createLowerHLFIROrderedAssignmentsPass(); #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td index 4932409..7e832a9 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.td +++ b/flang/include/flang/Optimizer/HLFIR/Passes.td @@ -43,4 +43,9 @@ def SimplifyHLFIRIntrinsics : Pass<"simplify-hlfir-intrinsics", "::mlir::func::F let constructor = "hlfir::createSimplifyHLFIRIntrinsicsPass()"; } +def InlineElementals : Pass<"inline-elementals", "::mlir::func::FuncOp"> { + let summary = "Inline chained hlfir.elemental operations"; + let constructor = "hlfir::createInlineElementalsPass()"; +} + #endif //FORTRAN_DIALECT_HLFIR_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index a799427..16eb998 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -242,6 +242,7 @@ inline void createHLFIRToFIRPassPipeline( addCanonicalizerPassWithoutRegionSimplification(pm); pm.addPass(hlfir::createSimplifyHLFIRIntrinsicsPass()); } + pm.addPass(hlfir::createInlineElementalsPass()); pm.addPass(hlfir::createLowerHLFIROrderedAssignmentsPass()); pm.addPass(hlfir::createLowerHLFIRIntrinsicsPass()); pm.addPass(hlfir::createBufferizeHLFIRPass()); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt index f7e51dc..bde1d47 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt @@ -3,6 +3,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_flang_library(HLFIRTransforms BufferizeHLFIR.cpp ConvertToFIR.cpp + InlineElementals.cpp LowerHLFIRIntrinsics.cpp LowerHLFIROrderedAssignments.cpp ScheduleOrderedAssignments.cpp diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp new file mode 100644 index 0000000..f0acd22 --- /dev/null +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineElementals.cpp @@ -0,0 +1,119 @@ +//===- InlineElementals.cpp - Inline chained hlfir.elemental ops ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Chained elemental operations like a + b + c can inline the first elemental +// at the hlfir.apply in the body of the second one (as described in +// docs/HighLevelFIR.md). This has to be done in a pass rather than in lowering +// so that it happens after the HLFIR intrinsic simplification pass. +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/Support/FIRContext.h" +#include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/HLFIR/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/TypeSwitch.h" +#include + +namespace hlfir { +#define GEN_PASS_DEF_INLINEELEMENTALS +#include "flang/Optimizer/HLFIR/Passes.h.inc" +} // namespace hlfir + +/// If the elemental has only two uses and those two are an apply operation and +/// a destory operation, return those two, otherwise return {} +static std::optional> +getTwoUses(hlfir::ElementalOp elemental) { + mlir::Operation::user_range users = elemental->getUsers(); + // don't inline anything with more than one use (plus hfir.destroy) + if (std::distance(users.begin(), users.end()) != 2) { + return std::nullopt; + } + + hlfir::ApplyOp apply; + hlfir::DestroyOp destroy; + for (mlir::Operation *user : users) + mlir::TypeSwitch(user) + .Case([&](hlfir::ApplyOp op) { apply = op; }) + .Case([&](hlfir::DestroyOp op) { destroy = op; }); + + if (!apply || !destroy) + return std::nullopt; + return std::pair{apply, destroy}; +} + +namespace { +class InlineElementalConversion + : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(hlfir::ElementalOp elemental, + mlir::PatternRewriter &rewriter) const override { + std::optional> maybeTuple = + getTwoUses(elemental); + if (!maybeTuple) { + return rewriter.notifyMatchFailure(elemental.getLoc(), + [](mlir::Diagnostic &) {}); + } + auto [apply, destroy] = *maybeTuple; + + assert(elemental.getRegion().hasOneBlock() && + "expect elemental region to have one block"); + + fir::FirOpBuilder builder{rewriter, + fir::KindMapping{rewriter.getContext()}}; + builder.setInsertionPointAfter(apply); + hlfir::YieldElementOp yield = hlfir::inlineElementalOp( + elemental.getLoc(), builder, elemental, apply.getIndices()); + + // remove the old elemental and all of the bookkeeping + rewriter.replaceAllUsesWith(apply.getResult(), yield.getElementValue()); + rewriter.eraseOp(yield); + rewriter.eraseOp(apply); + rewriter.eraseOp(destroy); + rewriter.eraseOp(elemental); + + return mlir::success(); + } +}; + +class InlineElementalsPass + : public hlfir::impl::InlineElementalsBase { +public: + void runOnOperation() override { + mlir::func::FuncOp func = getOperation(); + mlir::MLIRContext *context = &getContext(); + + mlir::GreedyRewriteConfig config; + // Prevent the pattern driver from merging blocks. + config.enableRegionSimplification = false; + + mlir::RewritePatternSet patterns(context); + patterns.insert(context); + + if (mlir::failed(mlir::applyPatternsAndFoldGreedily( + func, std::move(patterns), config))) { + mlir::emitError(func->getLoc(), "failure in HLFIR elemental inlining"); + signalPassFailure(); + } + } +}; +} // namespace + +std::unique_ptr hlfir::createInlineElementalsPass() { + return std::make_unique(); +} diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90 index 320f06a..a3ff416 100644 --- a/flang/test/Driver/mlir-debug-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90 @@ -25,6 +25,8 @@ end program ! ALL: Pass statistics report ! ALL: Fortran::lower::VerifierPass +! ALL-NEXT: 'func.func' Pipeline +! ALL-NEXT: InlineElementals ! ALL-NEXT: LowerHLFIROrderedAssignments ! ALL-NEXT: LowerHLFIRIntrinsics ! ALL-NEXT: BufferizeHLFIR diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 index 44d253a..7f92ec2 100644 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -15,7 +15,8 @@ end program ! O2-NEXT: Canonicalizer ! O2-NEXT: 'func.func' Pipeline ! O2-NEXT: SimplifyHLFIRIntrinsics -! ALL-NEXT: LowerHLFIROrderedAssignments +! ALL: InlineElementals +! ALL: LowerHLFIROrderedAssignments ! ALL-NEXT: LowerHLFIRIntrinsics ! ALL-NEXT: BufferizeHLFIR ! ALL-NEXT: ConvertHLFIRtoFIR diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index e6d849c..4f0efb1 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -19,6 +19,7 @@ func.func @_QQmain() { // PASSES: Canonicalizer // PASSES-NEXT: 'func.func' Pipeline // PASSES-NEXT: SimplifyHLFIRIntrinsics +// PASSES-NEXT: InlineElementals // PASSES-NEXT: LowerHLFIROrderedAssignments // PASSES-NEXT: LowerHLFIRIntrinsics // PASSES-NEXT: BufferizeHLFIR diff --git a/flang/test/HLFIR/inline-elemental.fir b/flang/test/HLFIR/inline-elemental.fir new file mode 100644 index 0000000..a9bae19 --- /dev/null +++ b/flang/test/HLFIR/inline-elemental.fir @@ -0,0 +1,245 @@ +// RUN: fir-opt --inline-elementals %s | FileCheck %s + +// check inlining one elemental into another +// a = b * c + d +func.func @inline_to_elemental(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}, %arg2: !fir.box> {fir.bindc_name = "c"}, %arg3: !fir.box> {fir.bindc_name = "d"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg1 {uniq_name = "b"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg2 {uniq_name = "c"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.declare %arg3 {uniq_name = "d"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c0 = arith.constant 0 : index + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.designate %1#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %9 = hlfir.designate %2#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %8 : !fir.ref + %11 = fir.load %9 : !fir.ref + %12 = arith.muli %10, %11 : i32 + hlfir.yield_element %12 : i32 + } + %7 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.apply %6, %arg4 : (!hlfir.expr, index) -> i32 + %9 = hlfir.designate %3#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.addi %8, %10 : i32 + hlfir.yield_element %11 : i32 + } + hlfir.assign %7 to %0#0 : !hlfir.expr, !fir.box> + hlfir.destroy %7 : !hlfir.expr + hlfir.destroy %6 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @inline_to_elemental +// CHECK-SAME: %[[A_ARG:.*]]: !fir.box> {fir.bindc_name = "a"} +// CHECK-SAME: %[[B_ARG:.*]]: !fir.box> {fir.bindc_name = "b"} +// CHECK-SAME: %[[C_ARG:.*]]: !fir.box> {fir.bindc_name = "c"} +// CHECK-SAME: %[[D_ARG:.*]]: !fir.box> {fir.bindc_name = "d"} +// CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-DAG: %[[B:.*]]:2 = hlfir.declare %[[B_ARG]] +// CHECK-DAG: %[[C:.*]]:2 = hlfir.declare %[[C_ARG]] +// CHECK-DAG: %[[D:.*]]:2 = hlfir.declare %[[D_ARG]] +// CHECK-NEXT: %[[B_DIM0:.*]]:3 = fir.box_dims %[[B]]#0, %[[C0]] +// CHECK-NEXT: %[[B_SHAPE:.*]] = fir.shape %[[B_DIM0]]#1 +// CHECK-NEXT: %[[EXPR:.*]] = hlfir.elemental %[[B_SHAPE]] +// CHECK-NEXT: ^bb0(%[[I:.*]]: index): +// inline the first elemental: +// CHECK-NEXT: %[[B_I_REF:.*]] = hlfir.designate %[[B]]#0 (%[[I]]) +// CHECK-NEXT: %[[C_I_REF:.*]] = hlfir.designate %[[C]]#0 (%[[I]]) +// CHECK-NEXT: %[[B_I:.*]] = fir.load %[[B_I_REF]] +// CHECK-NEXT: %[[C_I:.*]] = fir.load %[[C_I_REF]] +// CHECK-NEXT: %[[MUL:.*]] = arith.muli %[[B_I]], %[[C_I]] +// second elemental: +// CHECK-NEXT: %[[D_I_REF:.*]] = hlfir.designate %[[D]]#0 (%[[I]]) +// CHECK-NEXT: %[[D_I:.*]] = fir.load %[[D_I_REF]] +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[MUL]], %[[D_I]] +// CHECK-NEXT: hlfir.yield_element %[[ADD]] +// CHECK-NEXT: } +// CHECK-NEXT: hlfir.assign %[[EXPR]] to %[[A]]#0 +// CHECK-NEXT: hlfir.destroy %[[EXPR]] +// CHECK-NEXT: return +// CHECK-NEXT: } + +// check inlining into a do_loop +func.func @inline_to_loop(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}, %arg2: !fir.box> {fir.bindc_name = "c"}, %arg3: !fir.box> {fir.bindc_name = "d"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg1 {uniq_name = "b"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg2 {uniq_name = "c"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.declare %arg3 {uniq_name = "d"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c0 = arith.constant 0 : index + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.designate %1#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %9 = hlfir.designate %2#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %8 : !fir.ref + %11 = fir.load %9 : !fir.ref + %12 = arith.muli %10, %11 : i32 + hlfir.yield_element %12 : i32 + } + %array = fir.array_load %0#0 : (!fir.box>) -> !fir.array + %c1 = arith.constant 1 : index + %max = arith.subi %4#1, %c1 : index + %7 = fir.do_loop %arg4 = %c0 to %max step %c1 unordered iter_args(%arg5 = %array) -> (!fir.array) { + %8 = hlfir.apply %6, %arg4 : (!hlfir.expr, index) -> i32 + %9 = hlfir.designate %3#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.addi %8, %10 : i32 + %12 = fir.array_update %arg5, %11, %arg4 : (!fir.array, i32, index) -> !fir.array + fir.result %12 : !fir.array + } + fir.array_merge_store %array, %7 to %arg0 : !fir.array, !fir.array, !fir.box> + hlfir.destroy %6 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @inline_to_loop +// CHECK-SAME: %[[A_ARG:.*]]: !fir.box> {fir.bindc_name = "a"} +// CHECK-SAME: %[[B_ARG:.*]]: !fir.box> {fir.bindc_name = "b"} +// CHECK-SAME: %[[C_ARG:.*]]: !fir.box> {fir.bindc_name = "c"} +// CHECK-SAME: %[[D_ARG:.*]]: !fir.box> {fir.bindc_name = "d"} +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-DAG: %[[B:.*]]:2 = hlfir.declare %[[B_ARG]] +// CHECK-DAG: %[[C:.*]]:2 = hlfir.declare %[[C_ARG]] +// CHECK-DAG: %[[D:.*]]:2 = hlfir.declare %[[D_ARG]] +// CHECK-NEXT: %[[B_DIM0:.*]]:3 = fir.box_dims %[[B]]#0, %[[C0]] +// CHECK-NEXT: %[[ARRAY:.*]] = fir.array_load %[[A]]#0 +// CHECK-NEXT: %[[MAX:.*]] = arith.subi %[[B_DIM0]]#1, %[[C1]] +// CHECK-NEXT: %[[LOOP:.*]] = fir.do_loop %[[I:.*]] = %[[C0]] to %[[MAX]] step %[[C1]] unordered iter_args(%[[LOOP_ARRAY:.*]] = %[[ARRAY]]) +// inline the elemental: +// CHECK-NEXT: %[[B_I_REF:.*]] = hlfir.designate %[[B]]#0 (%[[I]]) +// CHECK-NEXT: %[[C_I_REF:.*]] = hlfir.designate %[[C]]#0 (%[[I]]) +// CHECK-NEXT: %[[B_I:.*]] = fir.load %[[B_I_REF]] +// CHECK-NEXT: %[[C_I:.*]] = fir.load %[[C_I_REF]] +// CHECK-NEXT: %[[MUL:.*]] = arith.muli %[[B_I]], %[[C_I]] +// loop body: +// CHECK-NEXT: %[[D_I_REF:.*]] = hlfir.designate %[[D]]#0 (%[[I]]) +// CHECK-NEXT: %[[D_I:.*]] = fir.load %[[D_I_REF]] +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[MUL]], %[[D_I]] +// CHECK-NEXT: %[[ARRAY_UPD:.*]] = fir.array_update %[[LOOP_ARRAY]], %[[ADD]], %[[I]] +// CHECK-NEXT: fir.result %[[ARRAY_UPD]] +// CHECK-NEXT: } +// CHECK-NEXT: fir.array_merge_store %[[ARRAY]], %[[LOOP]] to %[[A_ARG]] +// CHECK-NEXT: return +// CHECK-NEXT: } + +// inlining into a single hlfir.apply +// a = (b * c)[1] +func.func @inline_to_apply(%arg0: !fir.ref {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}, %arg2: !fir.box> {fir.bindc_name = "c"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %1:2 = hlfir.declare %arg1 {uniq_name = "b"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg2 {uniq_name = "c"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c0 = arith.constant 0 : index + %4:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %8 = hlfir.designate %1#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %9 = hlfir.designate %2#0 (%arg4) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %8 : !fir.ref + %11 = fir.load %9 : !fir.ref + %12 = arith.muli %10, %11 : i32 + hlfir.yield_element %12 : i32 + } + %c1 = arith.constant 1 : index + %res = hlfir.apply %6, %c1 : (!hlfir.expr, index) -> i32 + fir.store %res to %0#0 : !fir.ref + hlfir.destroy %6 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @inline_to_apply +// CHECK-SAME: %[[A_ARG:.*]]: !fir.ref {fir.bindc_name = "a"} +// CHECK-SAME: %[[B_ARG:.*]]: !fir.box> {fir.bindc_name = "b"} +// CHECK-SAME: %[[C_ARG:.*]]: !fir.box> {fir.bindc_name = "c"} +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-DAG: %[[B:.*]]:2 = hlfir.declare %[[B_ARG]] +// CHECK-DAG: %[[C:.*]]:2 = hlfir.declare %[[C_ARG]] +// inline the elemental: +// CHECK-NEXT: %[[B_1_REF:.*]] = hlfir.designate %[[B]]#0 (%[[C1]]) +// CHECK-NEXT: %[[C_1_REF:.*]] = hlfir.designate %[[C]]#0 (%[[C1]]) +// CHECK-NEXT: %[[B_1:.*]] = fir.load %[[B_1_REF]] +// CHECK-NEXT: %[[C_1:.*]] = fir.load %[[C_1_REF]] +// CHECK-NEXT: %[[MUL:.*]] = arith.muli %[[B_1]], %[[C_1]] +// store: +// CHECK-NEXT: fir.store %[[MUL]] to %0#0 : !fir.ref +// CHECK-NEXT: return +// CHECK-NEXT: } + +// Check long chains of elementals +// subroutine reproducer(a) +// real, dimension(:) :: a +// a = sqrt(a * (a - 1)) +// end subroutine +func.func @_QPreproducer(%arg0: !fir.box> {fir.bindc_name = "a"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFreproducerEa"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %1:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %2 = fir.shape %1#1 : (index) -> !fir.shape<1> + %3 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.designate %0#0 (%arg1) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = arith.subf %10, %cst fastmath : f32 + hlfir.yield_element %11 : f32 + } + %4 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.apply %3, %arg1 : (!hlfir.expr, index) -> f32 + %10 = hlfir.no_reassoc %9 : f32 + hlfir.yield_element %10 : f32 + } + %c0_0 = arith.constant 0 : index + %5:3 = fir.box_dims %0#0, %c0_0 : (!fir.box>, index) -> (index, index, index) + %6 = fir.shape %5#1 : (index) -> !fir.shape<1> + %7 = hlfir.elemental %6 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.designate %0#0 (%arg1) : (!fir.box>, index) -> !fir.ref + %10 = hlfir.apply %4, %arg1 : (!hlfir.expr, index) -> f32 + %11 = fir.load %9 : !fir.ref + %12 = arith.mulf %11, %10 fastmath : f32 + hlfir.yield_element %12 : f32 + } + %8 = hlfir.elemental %6 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %9 = hlfir.apply %7, %arg1 : (!hlfir.expr, index) -> f32 + %10 = math.sqrt %9 fastmath : f32 + hlfir.yield_element %10 : f32 + } + hlfir.assign %8 to %0#0 : !hlfir.expr, !fir.box> + hlfir.destroy %8 : !hlfir.expr + hlfir.destroy %7 : !hlfir.expr + hlfir.destroy %4 : !hlfir.expr + hlfir.destroy %3 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPreproducer +// CHECK-SAME: %[[A_ARG:.*]]: !fir.box> {fir.bindc_name = "a"} +// CHECK-DAG: %[[CST:.*]] = arith.constant 1.0000 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 +// CHECK-DAG: %[[A_VAR:.*]]:2 = hlfir.declare %[[A_ARG]] +// CHECK-NEXT: %[[A_DIMS_0:.*]]:3 = fir.box_dims %[[A_VAR]]#0, %[[C0]] +// CHECK-NEXT: %[[SHAPE_0:.*]] = fir.shape %[[A_DIMS_0]]#1 +// all in one elemental: +// CHECK-NEXT: %[[EXPR:.*]] = hlfir.elemental %[[SHAPE_0]] +// CHECK-NEXT: ^bb0(%[[I:.*]]: index): +// CHECK-NEXT: %[[A_I0:.*]] = hlfir.designate %[[A_VAR]]#0 (%[[I]]) +// CHECK-NEXT: %[[A_I1:.*]] = hlfir.designate %[[A_VAR]]#0 (%[[I]]) +// CHECK-NEXT: %[[A_I1_VAL:.*]] = fir.load %[[A_I1]] +// CHECK-NEXT: %[[SUB:.*]] = arith.subf %[[A_I1_VAL]], %[[CST]] +// CHECK-NEXT: %[[SUB0:.*]] = hlfir.no_reassoc %[[SUB]] : f32 +// CHECK-NEXT: %[[A_I0_VAL:.*]] = fir.load %[[A_I0]] +// CHECK-NEXT: %[[MUL:.*]] = arith.mulf %[[A_I0_VAL]], %[[SUB0]] +// CHECK-NEXT: %[[SQRT:.*]] = math.sqrt %[[MUL]] +// CHECK-NEXT: hlfir.yield_element %[[SQRT]] +// CHECK-NEXT: } +// CHECK-NEXT: hlfir.assign %[[EXPR]] to %[[A_VAR]]#0 +// CHECK-NEXT: hlfir.destroy %[[EXPR]] +// CHECK-NEXT: return +// CHECK-NEXT: } -- 2.7.4