From a56b76d9ca520b4495da2a984ff481a6487736fc Mon Sep 17 00:00:00 2001 From: Nimish Mishra Date: Thu, 14 Jul 2022 18:20:28 +0530 Subject: [PATCH] [flang][OpenMP] Lowering support for atomic update construct This patch adds lowering support for atomic update construct. A region is associated with every `omp.atomic.update` operation wherein resides: (1) the evaluation of the expression on the RHS of the atomic assignment statement, and (2) a `omp.yield` operation that yields the extended value of expression evaluated in (1). Reviewed By: peixin Differential Revision: https://reviews.llvm.org/D125668 --- flang/lib/Lower/OpenMP.cpp | 108 +++++++++++++++++++++++++- flang/test/Lower/OpenMP/atomic-update.f90 | 124 ++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 2 deletions(-) create mode 100644 flang/test/Lower/OpenMP/atomic-update.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index ed462d2..f2efac3 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -1113,6 +1113,68 @@ static void genOmpAtomicHintAndMemoryOrderClauses( } } +static void genOmpAtomicUpdateStatement( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::Variable &assignmentStmtVariable, + const Fortran::parser::Expr &assignmentStmtExpr, + const Fortran::parser::OmpAtomicClauseList *leftHandClauseList, + const Fortran::parser::OmpAtomicClauseList *rightHandClauseList) { + // Generate `omp.atomic.update` operation for atomic assignment statements + auto &firOpBuilder = converter.getFirOpBuilder(); + auto currentLocation = converter.getCurrentLocation(); + Fortran::lower::StatementContext stmtCtx; + + mlir::Value address = fir::getBase(converter.genExprAddr( + *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx)); + const Fortran::semantics::Symbol *updateSymbol; + if (auto varDesignator = std::get_if< + Fortran::common::Indirection>( + &assignmentStmtVariable.u)) { + if (const auto *name = getDesignatorNameIfDataRef(varDesignator->value())) { + updateSymbol = name->symbol; + } + } + // If no hint clause is specified, the effect is as if + // hint(omp_sync_hint_none) had been specified. + mlir::IntegerAttr hint = nullptr; + mlir::omp::ClauseMemoryOrderKindAttr memory_order = nullptr; + if (leftHandClauseList) + genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint, + memory_order); + if (rightHandClauseList) + genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint, + memory_order); + auto atomicUpdateOp = firOpBuilder.create( + currentLocation, address, hint, memory_order); + + //// Generate body of Atomic Update operation + // If an argument for the region is provided then create the block with that + // argument. Also update the symbol's address with the argument mlir value. + mlir::Type varType = + fir::getBase( + converter.genExprValue( + *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx)) + .getType(); + SmallVector varTys = {varType}; + SmallVector locs = {currentLocation}; + firOpBuilder.createBlock(&atomicUpdateOp.getRegion(), {}, varTys, locs); + mlir::Value val = + fir::getBase(atomicUpdateOp.getRegion().front().getArgument(0)); + converter.bindSymbol(*updateSymbol, val); + // Set the insert for the terminator operation to go at the end of the + // block. + mlir::Block &block = atomicUpdateOp.getRegion().back(); + firOpBuilder.setInsertionPointToEnd(&block); + + mlir::Value result = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(assignmentStmtExpr), stmtCtx)); + // Insert the terminator: YieldOp. + firOpBuilder.create(currentLocation, result); + // Reset the insert point to before the terminator. + firOpBuilder.setInsertionPointToStart(&block); +} + static void genOmpAtomicWrite(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, @@ -1177,6 +1239,43 @@ static void genOmpAtomicRead(Fortran::lower::AbstractConverter &converter, } static void +genOmpAtomicUpdate(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpAtomicUpdate &atomicUpdate) { + const Fortran::parser::OmpAtomicClauseList &rightHandClauseList = + std::get<2>(atomicUpdate.t); + const Fortran::parser::OmpAtomicClauseList &leftHandClauseList = + std::get<0>(atomicUpdate.t); + const auto &assignmentStmtExpr = + std::get(std::get<3>(atomicUpdate.t).statement.t); + const auto &assignmentStmtVariable = std::get( + std::get<3>(atomicUpdate.t).statement.t); + + genOmpAtomicUpdateStatement(converter, eval, assignmentStmtVariable, + assignmentStmtExpr, &leftHandClauseList, + &rightHandClauseList); +} + +static void genOmpAtomic(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpAtomic &atomicConstruct) { + const Fortran::parser::OmpAtomicClauseList &atomicClauseList = + std::get(atomicConstruct.t); + const auto &assignmentStmtExpr = std::get( + std::get>( + atomicConstruct.t) + .statement.t); + const auto &assignmentStmtVariable = std::get( + std::get>( + atomicConstruct.t) + .statement.t); + // If atomic-clause is not present on the construct, the behaviour is as if + // the update clause is specified + genOmpAtomicUpdateStatement(converter, eval, assignmentStmtVariable, + assignmentStmtExpr, &atomicClauseList, nullptr); +} + +static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) { @@ -1187,9 +1286,14 @@ genOMP(Fortran::lower::AbstractConverter &converter, [&](const Fortran::parser::OmpAtomicWrite &atomicWrite) { genOmpAtomicWrite(converter, eval, atomicWrite); }, + [&](const Fortran::parser::OmpAtomic &atomicConstruct) { + genOmpAtomic(converter, eval, atomicConstruct); + }, + [&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) { + genOmpAtomicUpdate(converter, eval, atomicUpdate); + }, [&](const auto &) { - TODO(converter.getCurrentLocation(), - "Atomic update & capture"); + TODO(converter.getCurrentLocation(), "Atomic capture"); }, }, atomicConstruct.u); diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 new file mode 100644 index 0000000..3ca7d95 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -0,0 +1,124 @@ +! This test checks lowering of atomic and atomic update constructs +! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s + +program OmpAtomicUpdate + use omp_lib + integer :: x, y, z + integer, pointer :: a, b + integer, target :: c, d + a=>c + b=>d + +!CHECK: func.func @_QQmain() { +!CHECK: %[[A:.*]] = fir.alloca !fir.box> {bindc_name = "a", uniq_name = "_QFEa"} +!CHECK: %[[A_ADDR:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFEa.addr"} +!CHECK: %{{.*}} = fir.zero_bits !fir.ptr +!CHECK: fir.store %{{.*}} to %[[A_ADDR]] : !fir.ref> +!CHECK: %[[B:.*]] = fir.alloca !fir.box> {bindc_name = "b", uniq_name = "_QFEb"} +!CHECK: %[[B_ADDR:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFEb.addr"} +!CHECK: %{{.*}} = fir.zero_bits !fir.ptr +!CHECK: fir.store %{{.*}} to %[[B_ADDR]] : !fir.ref> +!CHECK: %[[C_ADDR:.*]] = fir.address_of(@_QFEc) : !fir.ref +!CHECK: %[[D_ADDR:.*]] = fir.address_of(@_QFEd) : !fir.ref +!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: %[[Z:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"} +!CHECK: %{{.*}} = fir.convert %[[C_ADDR]] : (!fir.ref) -> !fir.ptr +!CHECK: fir.store %{{.*}} to %[[A_ADDR]] : !fir.ref> +!CHECK: %{{.*}} = fir.convert %[[D_ADDR]] : (!fir.ref) -> !fir.ptr +!CHECK: fir.store {{.*}} to %[[B_ADDR]] : !fir.ref> +!CHECK: %[[LOADED_A:.*]] = fir.load %[[A_ADDR]] : !fir.ref> +!CHECK: omp.atomic.update %[[LOADED_A]] : !fir.ptr { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[LOADED_B:.*]] = fir.load %[[B_ADDR]] : !fir.ref> +!CHECK: %{{.*}} = fir.load %[[LOADED_B]] : !fir.ptr +!CHECK: %[[RESULT:.*]] = arith.addi %[[ARG]], %{{.*}} : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } + !$omp atomic update + a = a + b + +!CHECK: omp.atomic.update %[[Y]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: {{.*}} = arith.constant 1 : i32 +!CHECK: %[[RESULT:.*]] = arith.addi %[[ARG]], {{.*}} : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } +!CHECK: omp.atomic.update %[[Z]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref +!CHECK: %[[RESULT:.*]] = arith.muli %[[LOADED_X]], %[[ARG]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } + !$omp atomic + y = y + 1 + !$omp atomic update + z = x * z + +!CHECK: omp.atomic.update memory_order(relaxed) hint(uncontended) %[[X]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %{{.*}} = arith.constant 1 : i32 +!CHECK: %[[RESULT:.*]] = arith.subi %[[ARG]], {{.*}} : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } +!CHECK: omp.atomic.update memory_order(relaxed) %[[Y]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref +!CHECK: %[[LOADED_Z:.*]] = fir.load %[[Z]] : !fir.ref +!CHECK: %{{.*}} = arith.cmpi sgt, %[[LOADED_X]], %[[ARG]] : i32 +!CHECK: %{{.*}} = arith.select %{{.*}}, %[[LOADED_X]], %[[ARG]] : i32 +!CHECK: %{{.*}} = arith.cmpi sgt, %{{.*}}, %[[LOADED_Z]] : i32 +!CHECK: %[[RESULT:.*]] = arith.select %{{.*}}, %{{.*}}, %[[LOADED_Z]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } +!CHECK: omp.atomic.update memory_order(relaxed) hint(contended) %[[Z]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[LOADED_X:.*]] = fir.load %[[X]] : !fir.ref +!CHECK: %[[RESULT:.*]] = arith.addi %[[ARG]], %[[LOADED_X]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } + !$omp atomic relaxed update hint(omp_sync_hint_uncontended) + x = x - 1 + !$omp atomic update relaxed + y = max(x, y, z) + !$omp atomic relaxed hint(omp_sync_hint_contended) + z = z + x + +!CHECK: omp.atomic.update memory_order(release) hint(contended) %[[Z]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %{{.*}} = arith.constant 10 : i32 +!CHECK: %[[RESULT:.*]] = arith.muli {{.*}}, %[[ARG]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } +!CHECK: omp.atomic.update memory_order(release) hint(speculative) %[[X]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[LOADED_Z:.*]] = fir.load %[[Z]] : !fir.ref +!CHECK: %[[RESULT:.*]] = arith.divsi %[[ARG]], %[[LOADED_Z]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } + + !$omp atomic release update hint(omp_lock_hint_contended) + z = z * 10 + !$omp atomic hint(omp_lock_hint_speculative) update release + x = x / z + +!CHECK: omp.atomic.update memory_order(seq_cst) hint(nonspeculative) %[[Y]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %{{.*}} = arith.constant 10 : i32 +!CHECK: %[[RESULT:.*]] = arith.addi %{{.*}}, %[[ARG]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } +!CHECK: omp.atomic.update memory_order(seq_cst) %[[Z]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[LOADED_Y:.*]] = fir.load %[[Y]] : !fir.ref +!CHECK: %[[RESULT:.*]] = arith.addi %[[LOADED_Y]], %[[ARG]] : i32 +!CHECK: omp.yield(%[[RESULT]] : i32) +!CHECK: } +!CHECK: return +!CHECK: } + !$omp atomic hint(omp_sync_hint_nonspeculative) seq_cst + y = 10 + y + !$omp atomic seq_cst update + z = y + z +end program OmpAtomicUpdate -- 2.7.4