llvm::ArrayRef<mlir::Type> argTy;
Op op = builder.create<Op>(loc, argTy, operands);
builder.createBlock(&op.getRegion());
- auto &block = op.getRegion().back();
+ mlir::Block &block = op.getRegion().back();
builder.setInsertionPointToStart(&block);
builder.create<Terminator>(loc);
}
}
-static void genACC(Fortran::lower::AbstractConverter &converter,
- Fortran::lower::pft::Evaluation &eval,
- const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
+static mlir::acc::LoopOp
+createLoopOp(Fortran::lower::AbstractConverter &converter,
+ const Fortran::parser::AccClauseList &accClauseList) {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ mlir::Location currentLocation = converter.getCurrentLocation();
Fortran::lower::StatementContext stmtCtx;
- const auto &beginLoopDirective =
- std::get<Fortran::parser::AccBeginLoopDirective>(loopConstruct.t);
- const auto &loopDirective =
- std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t);
- if (loopDirective.v == llvm::acc::ACCD_loop) {
- auto &firOpBuilder = converter.getFirOpBuilder();
- auto currentLocation = converter.getCurrentLocation();
-
- // Add attribute extracted from clauses.
- const auto &accClauseList =
- std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
+ mlir::Value workerNum;
+ mlir::Value vectorNum;
+ mlir::Value gangNum;
+ mlir::Value gangStatic;
+ llvm::SmallVector<mlir::Value, 2> tileOperands, privateOperands,
+ reductionOperands;
+ std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE;
- mlir::Value workerNum;
- mlir::Value vectorLength;
- mlir::Value gangNum;
- mlir::Value gangStatic;
- llvm::SmallVector<mlir::Value, 2> tileOperands, privateOperands,
- reductionOperands;
- std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE;
-
- // Lower clauses values mapped to operands.
- for (const auto &clause : accClauseList.v) {
- if (const auto *gangClause =
- std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
- if (gangClause->v) {
- const Fortran::parser::AccGangArgument &x = *gangClause->v;
- if (const auto &gangNumValue =
- std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
- x.t)) {
- gangNum = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(gangNumValue.value()), stmtCtx));
- }
- if (const auto &gangStaticValue =
- std::get<std::optional<Fortran::parser::AccSizeExpr>>(x.t)) {
- const auto &expr =
- std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
- gangStaticValue.value().t);
- if (expr) {
- gangStatic = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(*expr), stmtCtx));
- } else {
- // * was passed as value and will be represented as a -1 constant
- // integer.
- gangStatic = firOpBuilder.createIntegerConstant(
- currentLocation, firOpBuilder.getIntegerType(32),
- /* STAR */ -1);
- }
- }
- }
- executionMapping |= mlir::acc::OpenACCExecMapping::GANG;
- } else if (const auto *workerClause =
- std::get_if<Fortran::parser::AccClause::Worker>(
- &clause.u)) {
- if (workerClause->v) {
- workerNum = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx));
- }
- executionMapping |= mlir::acc::OpenACCExecMapping::WORKER;
- } else if (const auto *vectorClause =
- std::get_if<Fortran::parser::AccClause::Vector>(
- &clause.u)) {
- if (vectorClause->v) {
- vectorLength = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx));
+ for (const Fortran::parser::AccClause &clause : accClauseList.v) {
+ if (const auto *gangClause =
+ std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
+ if (gangClause->v) {
+ const Fortran::parser::AccGangArgument &x = *gangClause->v;
+ if (const auto &gangNumValue =
+ std::get<std::optional<Fortran::parser::ScalarIntExpr>>(x.t)) {
+ gangNum = fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(gangNumValue.value()), stmtCtx));
}
- executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR;
- } else if (const auto *tileClause =
- std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
- const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v;
- for (const auto &accTileExpr : accTileExprList.v) {
+ if (const auto &gangStaticValue =
+ std::get<std::optional<Fortran::parser::AccSizeExpr>>(x.t)) {
const auto &expr =
- std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>(
- accTileExpr.t);
+ std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
+ gangStaticValue.value().t);
if (expr) {
- tileOperands.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+ gangStatic = fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*expr), stmtCtx));
} else {
- // * was passed as value and will be represented as a -1 constant
- // integer.
- mlir::Value tileStar = firOpBuilder.createIntegerConstant(
- currentLocation, firOpBuilder.getIntegerType(32),
- /* STAR */ -1);
- tileOperands.push_back(tileStar);
+ // * was passed as value and will be represented as a special
+ // constant.
+ gangStatic = firOpBuilder.createIntegerConstant(
+ currentLocation, firOpBuilder.getIndexType(), starCst);
}
}
- } else if (const auto *privateClause =
- std::get_if<Fortran::parser::AccClause::Private>(
- &clause.u)) {
- genObjectList(privateClause->v, converter, privateOperands);
}
- // Reduction clause is left out for the moment as the clause will probably
- // end up having its own operation.
+ executionMapping |= mlir::acc::OpenACCExecMapping::GANG;
+ } else if (const auto *workerClause =
+ std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
+ if (workerClause->v) {
+ workerNum = fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx));
+ }
+ executionMapping |= mlir::acc::OpenACCExecMapping::WORKER;
+ } else if (const auto *vectorClause =
+ std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
+ if (vectorClause->v) {
+ vectorNum = fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx));
+ }
+ executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR;
+ } else if (const auto *tileClause =
+ std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
+ const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v;
+ for (const auto &accTileExpr : accTileExprList.v) {
+ const auto &expr =
+ std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>(
+ accTileExpr.t);
+ if (expr) {
+ tileOperands.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+ } else {
+ // * was passed as value and will be represented as a -1 constant
+ // integer.
+ mlir::Value tileStar = firOpBuilder.createIntegerConstant(
+ currentLocation, firOpBuilder.getIntegerType(32),
+ /* STAR */ -1);
+ tileOperands.push_back(tileStar);
+ }
+ }
+ } else if (const auto *privateClause =
+ std::get_if<Fortran::parser::AccClause::Private>(
+ &clause.u)) {
+ genObjectList(privateClause->v, converter, privateOperands);
}
+ // Reduction clause is left out for the moment as the clause will probably
+ // end up having its own operation.
+ }
- // Prepare the operand segement size attribute and the operands value range.
- llvm::SmallVector<mlir::Value, 8> operands;
- llvm::SmallVector<int32_t, 8> operandSegments;
- addOperand(operands, operandSegments, gangNum);
- addOperand(operands, operandSegments, gangStatic);
- addOperand(operands, operandSegments, workerNum);
- addOperand(operands, operandSegments, vectorLength);
- addOperands(operands, operandSegments, tileOperands);
- addOperands(operands, operandSegments, privateOperands);
- addOperands(operands, operandSegments, reductionOperands);
-
- auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
- firOpBuilder, currentLocation, operands, operandSegments);
-
- loopOp->setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(),
- firOpBuilder.getI64IntegerAttr(executionMapping));
-
- // Lower clauses mapped to attributes
- for (const auto &clause : accClauseList.v) {
- if (const auto *collapseClause =
- std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
- const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
- const auto collapseValue = Fortran::evaluate::ToInt64(*expr);
- if (collapseValue) {
- loopOp->setAttr(mlir::acc::LoopOp::getCollapseAttrName(),
- firOpBuilder.getI64IntegerAttr(*collapseValue));
- }
- } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
- loopOp->setAttr(mlir::acc::LoopOp::getSeqAttrName(),
- firOpBuilder.getUnitAttr());
- } else if (std::get_if<Fortran::parser::AccClause::Independent>(
- &clause.u)) {
- loopOp->setAttr(mlir::acc::LoopOp::getIndependentAttrName(),
- firOpBuilder.getUnitAttr());
- } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
- loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrName(),
- firOpBuilder.getUnitAttr());
+ // Prepare the operand segement size attribute and the operands value range.
+ llvm::SmallVector<mlir::Value> operands;
+ llvm::SmallVector<int32_t> operandSegments;
+ addOperand(operands, operandSegments, gangNum);
+ addOperand(operands, operandSegments, gangStatic);
+ addOperand(operands, operandSegments, workerNum);
+ addOperand(operands, operandSegments, vectorNum);
+ addOperands(operands, operandSegments, tileOperands);
+ addOperands(operands, operandSegments, privateOperands);
+ addOperands(operands, operandSegments, reductionOperands);
+
+ auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
+ firOpBuilder, currentLocation, operands, operandSegments);
+
+ loopOp->setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(),
+ firOpBuilder.getI64IntegerAttr(executionMapping));
+
+ // Lower clauses mapped to attributes
+ for (const Fortran::parser::AccClause &clause : accClauseList.v) {
+ if (const auto *collapseClause =
+ std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
+ const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
+ const std::optional<int64_t> collapseValue =
+ Fortran::evaluate::ToInt64(*expr);
+ if (collapseValue) {
+ loopOp->setAttr(mlir::acc::LoopOp::getCollapseAttrName(),
+ firOpBuilder.getI64IntegerAttr(*collapseValue));
}
+ } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
+ loopOp->setAttr(mlir::acc::LoopOp::getSeqAttrName(),
+ firOpBuilder.getUnitAttr());
+ } else if (std::get_if<Fortran::parser::AccClause::Independent>(
+ &clause.u)) {
+ loopOp->setAttr(mlir::acc::LoopOp::getIndependentAttrName(),
+ firOpBuilder.getUnitAttr());
+ } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
+ loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrName(),
+ firOpBuilder.getUnitAttr());
}
}
+ return loopOp;
}
-static void
-genACCParallelOp(Fortran::lower::AbstractConverter &converter,
+static void genACC(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::pft::Evaluation &eval,
+ const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
+
+ const auto &beginLoopDirective =
+ std::get<Fortran::parser::AccBeginLoopDirective>(loopConstruct.t);
+ const auto &loopDirective =
+ std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t);
+
+ if (loopDirective.v == llvm::acc::ACCD_loop) {
+ const auto &accClauseList =
+ std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
+ createLoopOp(converter, accClauseList);
+ }
+}
+
+static mlir::acc::ParallelOp
+createParallelOp(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::AccClauseList &accClauseList) {
+
+ // Parallel operation operands
mlir::Value async;
mlir::Value numGangs;
mlir::Value numWorkers;
mlir::Value vectorLength;
mlir::Value ifCond;
mlir::Value selfCond;
+ mlir::Value waitDevnum;
llvm::SmallVector<mlir::Value, 2> waitOperands, reductionOperands,
copyOperands, copyinOperands, copyinReadonlyOperands, copyoutOperands,
copyoutZeroOperands, createOperands, createZeroOperands, noCreateOperands,
- presentOperands, devicePtrOperands, attachOperands, privateOperands,
- firstprivateOperands;
+ presentOperands, devicePtrOperands, attachOperands, firstprivateOperands,
+ privateOperands;
// Async, wait and self clause have optional values but can be present with
// no value as well. When there is no value, the op has an attribute to
bool addWaitAttr = false;
bool addSelfAttr = false;
- auto &firOpBuilder = converter.getFirOpBuilder();
- auto currentLocation = converter.getCurrentLocation();
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ mlir::Location currentLocation = converter.getCurrentLocation();
Fortran::lower::StatementContext stmtCtx;
// Lower clauses values mapped to operands.
// Keep track of each group of operands separatly as clauses can appear
// more than once.
- for (const auto &clause : accClauseList.v) {
+ for (const Fortran::parser::AccClause &clause : accClauseList.v) {
if (const auto *asyncClause =
std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
- const auto &asyncClauseValue = asyncClause->v;
- if (asyncClauseValue) { // async has a value.
- async = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx));
- } else {
- addAsyncAttr = true;
- }
+ genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx);
} else if (const auto *waitClause =
std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
- const auto &waitClauseValue = waitClause->v;
- if (waitClauseValue) { // wait has a value.
- const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue;
- const auto &waitList =
- std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t);
- for (const Fortran::parser::ScalarIntExpr &value : waitList) {
- auto v = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(value), stmtCtx));
- waitOperands.push_back(v);
- }
- } else {
- addWaitAttr = true;
- }
+ genWaitClause(converter, waitClause, waitOperands, waitDevnum,
+ addWaitAttr, stmtCtx);
} else if (const auto *numGangsClause =
std::get_if<Fortran::parser::AccClause::NumGangs>(
&clause.u)) {
*Fortran::semantics::GetExpr(vectorLengthClause->v), stmtCtx));
} else if (const auto *ifClause =
std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
- mlir::Value cond = fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(ifClause->v), stmtCtx));
- ifCond = firOpBuilder.createConvert(currentLocation,
- firOpBuilder.getI1Type(), cond);
+ genIfClause(converter, ifClause, ifCond, stmtCtx);
} else if (const auto *selfClause =
std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) {
const Fortran::parser::AccSelfClause &accSelfClause = selfClause->v;
} else {
addSelfAttr = true;
}
+ } else if (const auto *accClauseList =
+ std::get_if<Fortran::parser::AccObjectList>(
+ &accSelfClause.u)) {
+ // TODO This would be nicer to be done in canonicalization step.
+ if (accClauseList->v.size() == 1) {
+ const auto &accObject = accClauseList->v.front();
+ if (const auto *designator =
+ std::get_if<Fortran::parser::Designator>(&accObject.u)) {
+ if (const auto *name = getDesignatorNameIfDataRef(*designator)) {
+ auto cond = converter.getSymbolAddress(*name->symbol);
+ selfCond = firOpBuilder.createConvert(
+ currentLocation, firOpBuilder.getI1Type(), cond);
+ }
+ }
+ }
}
} else if (const auto *copyClause =
std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
addOperands(operands, operandSegments, privateOperands);
addOperands(operands, operandSegments, firstprivateOperands);
- auto parallelOp = createRegionOp<mlir::acc::ParallelOp, mlir::acc::YieldOp>(
- firOpBuilder, currentLocation, operands, operandSegments);
+ mlir::acc::ParallelOp parallelOp =
+ createRegionOp<mlir::acc::ParallelOp, mlir::acc::YieldOp>(
+ firOpBuilder, currentLocation, operands, operandSegments);
if (addAsyncAttr)
parallelOp->setAttr(mlir::acc::ParallelOp::getAsyncAttrName(),
if (addSelfAttr)
parallelOp->setAttr(mlir::acc::ParallelOp::getSelfAttrName(),
firOpBuilder.getUnitAttr());
+
+ return parallelOp;
+}
+
+static void
+genACCParallelOp(Fortran::lower::AbstractConverter &converter,
+ const Fortran::parser::AccClauseList &accClauseList) {
+ createParallelOp(converter, accClauseList);
}
static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
}
static void
+genACCParallelLoopOps(Fortran::lower::AbstractConverter &converter,
+ const Fortran::parser::AccClauseList &accClauseList) {
+ createParallelOp(converter, accClauseList);
+ createLoopOp(converter, accClauseList);
+}
+
+static void
+genACC(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::pft::Evaluation &eval,
+ const Fortran::parser::OpenACCCombinedConstruct &combinedConstruct) {
+ const auto &beginCombinedDirective =
+ std::get<Fortran::parser::AccBeginCombinedDirective>(combinedConstruct.t);
+ const auto &combinedDirective =
+ std::get<Fortran::parser::AccCombinedDirective>(beginCombinedDirective.t);
+ const auto &accClauseList =
+ std::get<Fortran::parser::AccClauseList>(beginCombinedDirective.t);
+
+ if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
+ TODO(converter.getCurrentLocation(),
+ "OpenACC Kernels Loop construct not lowered yet!");
+ } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) {
+ genACCParallelLoopOps(converter, accClauseList);
+ } else if (combinedDirective.v == llvm::acc::ACCD_serial_loop) {
+ TODO(converter.getCurrentLocation(),
+ "OpenACC Serial Loop construct not lowered yet!");
+ } else {
+ llvm::report_fatal_error("Unknown combined construct encountered");
+ }
+}
+
+static void
genACCEnterDataOp(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::AccClauseList &accClauseList) {
mlir::Value ifCond, async, waitDevnum;
},
[&](const Fortran::parser::OpenACCCombinedConstruct
&combinedConstruct) {
- TODO(converter.getCurrentLocation(),
- "OpenACC Combined construct not lowered yet!");
+ genACC(converter, eval, combinedConstruct);
},
[&](const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
genACC(converter, eval, loopConstruct);
--- /dev/null
+! This test checks lowering of OpenACC parallel loop combined directive.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+subroutine acc_parallel_loop
+ integer :: i, j
+
+ integer :: async = 1
+ integer :: wait1 = 1
+ integer :: wait2 = 2
+ integer :: numGangs = 1
+ integer :: numWorkers = 10
+ integer :: vectorLength = 128
+ logical :: ifCondition = .TRUE.
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b, c
+ real, dimension(n, n) :: d, e
+ real, pointer :: f, g
+
+ integer :: gangNum = 8
+ integer :: gangStatic = 8
+ integer :: vectorNum = 128
+ integer, parameter :: tileSize = 2
+
+!CHECK: [[A:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ea"}
+!CHECK: [[B:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Eb"}
+!CHECK: [[C:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ec"}
+!CHECK: [[F:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "f", uniq_name = "{{.*}}Ef"}
+!CHECK: [[G:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "g", uniq_name = "{{.*}}Eg"}
+!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref<!fir.logical<4>>
+
+ !$acc parallel loop
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop async
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+ !$acc end parallel loop
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {asyncAttr}
+
+ !$acc parallel loop async(1)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[ASYNC1:%.*]] = arith.constant 1 : i32
+!CHECK: acc.parallel async([[ASYNC1]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop async(async)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel async([[ASYNC2]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop wait
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {waitAttr}
+
+ !$acc parallel loop wait(1)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[WAIT1:%.*]] = arith.constant 1 : i32
+!CHECK: acc.parallel wait([[WAIT1]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop wait(1, 2)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[WAIT2:%.*]] = arith.constant 1 : i32
+!CHECK: [[WAIT3:%.*]] = arith.constant 2 : i32
+!CHECK: acc.parallel wait([[WAIT2]]: i32, [[WAIT3]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop wait(wait1, wait2)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel wait([[WAIT4]]: i32, [[WAIT5]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop num_gangs(1)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[NUMGANGS1:%.*]] = arith.constant 1 : i32
+!CHECK: acc.parallel num_gangs([[NUMGANGS1]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop num_gangs(numGangs)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel num_gangs([[NUMGANGS2]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop num_workers(10)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
+!CHECK: acc.parallel num_workers([[NUMWORKERS1]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop num_workers(numWorkers)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel num_workers([[NUMWORKERS2]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop vector_length(128)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
+!CHECK: acc.parallel vector_length([[VECTORLENGTH1]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop vector_length(vectorLength)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel vector_length([[VECTORLENGTH2]]: i32) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop if(.TRUE.)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[IF1:%.*]] = arith.constant true
+!CHECK: acc.parallel if([[IF1]]) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop if(ifCondition)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+!CHECK: [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
+!CHECK: acc.parallel if([[IF2]]) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop self(.TRUE.)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[SELF1:%.*]] = arith.constant true
+!CHECK: acc.parallel self([[SELF1]]) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop self
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {selfAttr}
+
+ !$acc parallel loop self(ifCondition)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref<!fir.logical<4>>) -> i1
+!CHECK: acc.parallel self([[SELF2]]) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop copy(a, b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel copy([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop copy(a) copy(b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel copy([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop copyin(a) copyin(readonly: b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel copyin([[A]]: !fir.ref<!fir.array<10xf32>>) copyin_readonly([[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop copyout(a) copyout(zero: b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel copyout([[A]]: !fir.ref<!fir.array<10xf32>>) copyout_zero([[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop create(b) create(zero: a)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel create([[B]]: !fir.ref<!fir.array<10xf32>>) create_zero([[A]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop no_create(a, b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel no_create([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop present(a, b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel present([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop deviceptr(a) deviceptr(b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel deviceptr([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop attach(f, g)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel attach([[F]]: !fir.ref<!fir.box<!fir.ptr<f32>>>, [[G]]: !fir.ref<!fir.box<!fir.ptr<f32>>>) {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop private(a) firstprivate(b)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel private([[A]]: !fir.ref<!fir.array<10xf32>>) firstprivate([[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: acc.loop private([[A]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop seq
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {seq}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop auto
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {auto}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop independent
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {independent}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop gang
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop gang {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop gang(num: 8)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32
+!CHECK-NEXT: acc.loop gang(num=[[GANGNUM1]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop gang(num: gangNum)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK-NEXT: acc.loop gang(num=[[GANGNUM2]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop gang(num: gangNum, static: gangStatic)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop gang(num=%{{.*}}: i32, static=%{{.*}}: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop vector
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+!CHECK: acc.parallel {
+!CHECK: acc.loop vector {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop vector(128)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32
+!CHECK: acc.loop vector([[CONSTANT128]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop vector(vectorLength)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.loop vector([[VECTORLENGTH]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop worker
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop worker {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop worker(128)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32
+!CHECK: acc.loop worker([[WORKER128]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop collapse(2)
+ DO i = 1, n
+ DO j = 1, n
+ d(i, j) = e(i, j)
+ END DO
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {collapse = 2 : i64}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop
+ DO i = 1, n
+ !$acc loop
+ DO j = 1, n
+ d(i, j) = e(i, j)
+ END DO
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.loop {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(2)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[TILESIZE:%.*]] = arith.constant 2 : i32
+!CHECK: acc.loop tile([[TILESIZE]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(*)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[TILESIZEM1:%.*]] = arith.constant -1 : i32
+!CHECK: acc.loop tile([[TILESIZEM1]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(2, 2)
+ DO i = 1, n
+ DO j = 1, n
+ d(i, j) = e(i, j)
+ END DO
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32
+!CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32
+!CHECK: acc.loop tile([[TILESIZE1]]: i32, [[TILESIZE2]]: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(tileSize)
+ DO i = 1, n
+ a(i) = b(i)
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop tile(%{{.*}}: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(tileSize, tileSize)
+ DO i = 1, n
+ DO j = 1, n
+ d(i, j) = e(i, j)
+ END DO
+ END DO
+
+!CHECK: acc.parallel {
+!CHECK: acc.loop tile(%{{.*}}: i32, %{{.*}}: i32) {
+!CHECK: fir.do_loop
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+end subroutine acc_parallel_loop
--- /dev/null
+! This test checks lowering of OpenACC parallel directive.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+subroutine acc_parallel
+ integer :: i, j
+
+ integer :: async = 1
+ integer :: wait1 = 1
+ integer :: wait2 = 2
+ integer :: numGangs = 1
+ integer :: numWorkers = 10
+ integer :: vectorLength = 128
+ logical :: ifCondition = .TRUE.
+ real, dimension(10, 10) :: a, b, c
+ real, pointer :: d, e
+
+!CHECK: [[A:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ea"}
+!CHECK: [[B:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Eb"}
+!CHECK: [[C:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ec"}
+!CHECK: [[D:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "d", uniq_name = "{{.*}}Ed"}
+!CHECK: [[E:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "e", uniq_name = "{{.*}}Ee"}
+!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref<!fir.logical<4>>
+
+ !$acc parallel
+ !$acc end parallel
+
+!CHECK: acc.parallel {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel async
+ !$acc end parallel
+
+!CHECK: acc.parallel {
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {asyncAttr}
+
+ !$acc parallel async(1)
+ !$acc end parallel
+
+!CHECK: [[ASYNC1:%.*]] = arith.constant 1 : i32
+!CHECK: acc.parallel async([[ASYNC1]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel async(async)
+ !$acc end parallel
+
+!CHECK: [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel async([[ASYNC2]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel wait
+ !$acc end parallel
+
+!CHECK: acc.parallel {
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {waitAttr}
+
+ !$acc parallel wait(1)
+ !$acc end parallel
+
+!CHECK: [[WAIT1:%.*]] = arith.constant 1 : i32
+!CHECK: acc.parallel wait([[WAIT1]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel wait(1, 2)
+ !$acc end parallel
+
+!CHECK: [[WAIT2:%.*]] = arith.constant 1 : i32
+!CHECK: [[WAIT3:%.*]] = arith.constant 2 : i32
+!CHECK: acc.parallel wait([[WAIT2]]: i32, [[WAIT3]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel wait(wait1, wait2)
+ !$acc end parallel
+
+!CHECK: [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel wait([[WAIT4]]: i32, [[WAIT5]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel num_gangs(1)
+ !$acc end parallel
+
+!CHECK: [[NUMGANGS1:%.*]] = arith.constant 1 : i32
+!CHECK: acc.parallel num_gangs([[NUMGANGS1]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel num_gangs(numGangs)
+ !$acc end parallel
+
+!CHECK: [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel num_gangs([[NUMGANGS2]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel num_workers(10)
+ !$acc end parallel
+
+!CHECK: [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
+!CHECK: acc.parallel num_workers([[NUMWORKERS1]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel num_workers(numWorkers)
+ !$acc end parallel
+
+!CHECK: [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel num_workers([[NUMWORKERS2]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel vector_length(128)
+ !$acc end parallel
+
+!CHECK: [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
+!CHECK: acc.parallel vector_length([[VECTORLENGTH1]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel vector_length(vectorLength)
+ !$acc end parallel
+
+!CHECK: [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK: acc.parallel vector_length([[VECTORLENGTH2]]: i32) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel if(.TRUE.)
+ !$acc end parallel
+
+!CHECK: [[IF1:%.*]] = arith.constant true
+!CHECK: acc.parallel if([[IF1]]) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel if(ifCondition)
+ !$acc end parallel
+
+!CHECK: [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+!CHECK: [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
+!CHECK: acc.parallel if([[IF2]]) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel self(.TRUE.)
+ !$acc end parallel
+
+!CHECK: [[SELF1:%.*]] = arith.constant true
+!CHECK: acc.parallel self([[SELF1]]) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel self
+ !$acc end parallel
+
+!CHECK: acc.parallel {
+!CHECK: acc.yield
+!CHECK-NEXT: } attributes {selfAttr}
+
+ !$acc parallel self(ifCondition)
+ !$acc end parallel
+
+!CHECK: [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref<!fir.logical<4>>) -> i1
+!CHECK: acc.parallel self([[SELF2]]) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel copy(a, b, c)
+ !$acc end parallel
+
+!CHECK: acc.parallel copy([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel copy(a) copy(b) copy(c)
+ !$acc end parallel
+
+!CHECK: acc.parallel copy([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel copyin(a) copyin(readonly: b, c)
+ !$acc end parallel
+
+!CHECK: acc.parallel copyin([[A]]: !fir.ref<!fir.array<10x10xf32>>) copyin_readonly([[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel copyout(a) copyout(zero: b) copyout(c)
+ !$acc end parallel
+
+!CHECK: acc.parallel copyout([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) copyout_zero([[B]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel create(a, b) create(zero: c)
+ !$acc end parallel
+
+!CHECK: acc.parallel create([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>) create_zero([[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel no_create(a, b) create(zero: c)
+ !$acc end parallel
+
+!CHECK: acc.parallel create_zero([[C]]: !fir.ref<!fir.array<10x10xf32>>) no_create([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel present(a, b, c)
+ !$acc end parallel
+
+!CHECK: acc.parallel present([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel deviceptr(a) deviceptr(c)
+ !$acc end parallel
+
+!CHECK: acc.parallel deviceptr([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel attach(d, e)
+ !$acc end parallel
+
+!CHECK: acc.parallel attach([[D]]: !fir.ref<!fir.box<!fir.ptr<f32>>>, [[E]]: !fir.ref<!fir.box<!fir.ptr<f32>>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel private(a) firstprivate(b) private(c)
+ !$acc end parallel
+
+!CHECK: acc.parallel private([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) firstprivate([[B]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK: acc.yield
+!CHECK-NEXT: }{{$}}
+
+end subroutine acc_parallel