From 0446bfcc5ca206701b511796ed1c8316daa2d169 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Fri, 30 Jun 2023 08:48:42 +0200 Subject: [PATCH] [flang][hlfir] Codegen of hlfir.region_assign where LHS conflicts When the analysis of hlfir.region_assign determined that the LHS region evaluation may be impacted by the assignment effects, all LHS must be fully evaluated and saved before any assignment is done. This patch adds TemporaryStorage variants to save address, including vector subscripted entities addresses whose shape must be saved. It uses the DescriptorStack runtime to deal with complex cases inside forall. For the sake of simplicity, this is also used for vector subscripted LHS outside of foralls (each element address is saved as a descriptor on this stack. This is a bit suboptimal, but it is a safe start that will work with all kinds of type (polymorphic, PDTs...) without further work). Another approach would be to saved only the values that are conflicting in the LHS computation, but this would require a much more complex analysis of the LHS region DAG. Differential Revision: https://reviews.llvm.org/D154057 --- .../Optimizer/Builder/Runtime/TemporaryStack.h | 11 ++ .../flang/Optimizer/Builder/TemporaryStorage.h | 88 ++++++++- .../Optimizer/Builder/Runtime/TemporaryStack.cpp | 49 +++++ flang/lib/Optimizer/Builder/TemporaryStorage.cpp | 124 ++++++++++++ .../Transforms/LowerHLFIROrderedAssignments.cpp | 149 ++++++++++++-- .../order_assignments/lhs-conflicts-codegen.fir | 216 +++++++++++++++++++++ 6 files changed, 618 insertions(+), 19 deletions(-) create mode 100644 flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir diff --git a/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h b/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h index b35d6d6..085d2ad 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/TemporaryStack.h @@ -30,5 +30,16 @@ void genValueAt(mlir::Location loc, fir::FirOpBuilder &builder, void genDestroyValueStack(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value opaquePtr); +mlir::Value genCreateDescriptorStack(mlir::Location loc, + fir::FirOpBuilder &builder); + +void genPushDescriptor(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value opaquePtr, mlir::Value boxValue); +void genDescriptorAt(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value opaquePtr, mlir::Value i, + mlir::Value retValueBox); + +void genDestroyDescriptorStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value opaquePtr); } // namespace fir::runtime #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_TEMPORARYSTACK_H diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h index 51b8405..5f2e1c4 100644 --- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h +++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h @@ -120,6 +120,30 @@ public: hlfir::AssociateOp copy; }; +/// Structure to keep track of a simple mlir::Value. This is useful +/// when a value does not need an in memory copy because it is +/// already saved in an SSA value that will be accessible at the fetching +/// point. +class SSARegister { +public: + SSARegister(){}; + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value) { + ssaRegister = value; + } + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder){}; + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) { + return ssaRegister; + } + void destroy(mlir::Location loc, fir::FirOpBuilder &builder) {} + bool canBeFetchedAfterPush() const { return true; } + +public: + /// Temporary storage for the copy. + mlir::Value ssaRegister; +}; + /// Data structure to stack any kind of values with the same static type and /// rank. Each value may have different type parameters, bounds, and dynamic /// type. Fetching value N will return a value with the same dynamic type, @@ -150,6 +174,61 @@ private: mlir::Value retValueBox; }; +/// Data structure to stack any kind of variables with the same static type and +/// rank. Each variable may have different type parameters, bounds, and dynamic +/// type. Fetching variable N will return a variable with the same address, +/// dynamic type, bounds, and type parameters as the Nth variable that was +/// pushed. It is implemented using runtime. +class AnyVariableStack { +public: + AnyVariableStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type valueStaticType); + + void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value value); + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder); + mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder); + void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + bool canBeFetchedAfterPush() const { return true; } + +private: + /// Keep the original variable type. + mlir::Type variableStaticType; + /// Runtime cookie created by the runtime. It is a pointer to an opaque + /// runtime data structure that manages the stack. + mlir::Value opaquePtr; + /// Counter to keep track of the fetching position. + Counter counter; + /// Pointer box passed to the runtime when fetching the values. + mlir::Value retValueBox; +}; + +class TemporaryStorage; + +/// Data structure to stack vector subscripted entity shape and +/// element addresses. AnyVariableStack allows saving vector subscripted +/// entities element addresses, but when saving several vector subscripted +/// entities on a stack, and if the context does not allow retrieving the +/// vector subscript entities shapes, these shapes must be saved too. +class AnyVectorSubscriptStack : public AnyVariableStack { +public: + AnyVectorSubscriptStack(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type valueStaticType, + bool shapeCanBeSavedAsRegister, int rank); + void pushShape(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value shape); + void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder); + mlir::Value fetchShape(mlir::Location loc, fir::FirOpBuilder &builder); + void destroy(mlir::Location loc, fir::FirOpBuilder &builder); + bool canBeFetchedAfterPush() const { return true; } + +private: + std::unique_ptr shapeTemp; + // If the shape is saved inside a descriptor (as extents), + // keep track of the descriptor type. + std::optional boxType; +}; + /// Generic wrapper over the different sorts of temporary storages. class TemporaryStorage { public: @@ -178,8 +257,15 @@ public: impl); } + template + T &cast() { + return std::get(impl); + } + private: - std::variant impl; + std::variant + impl; }; } // namespace fir::factory #endif // FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H diff --git a/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp b/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp index f184e40..732152c 100644 --- a/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/TemporaryStack.cpp @@ -56,3 +56,52 @@ void fir::runtime::genDestroyValueStack(mlir::Location loc, auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr); builder.create(loc, func, args); } + +mlir::Value fir::runtime::genCreateDescriptorStack(mlir::Location loc, + fir::FirOpBuilder &builder) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, + builder); + mlir::FunctionType funcType = func.getFunctionType(); + mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); + mlir::Value sourceLine = + fir::factory::locationToLineNo(builder, loc, funcType.getInput(1)); + auto args = fir::runtime::createArguments(builder, loc, funcType, sourceFile, + sourceLine); + return builder.create(loc, func, args).getResult(0); +} + +void fir::runtime::genPushDescriptor(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value opaquePtr, + mlir::Value boxDescriptor) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, builder); + mlir::FunctionType funcType = func.getFunctionType(); + auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr, + boxDescriptor); + builder.create(loc, func, args); +} + +void fir::runtime::genDescriptorAt(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value opaquePtr, mlir::Value i, + mlir::Value retDescriptorBox) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, builder); + mlir::FunctionType funcType = func.getFunctionType(); + auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr, + i, retDescriptorBox); + builder.create(loc, func, args); +} + +void fir::runtime::genDestroyDescriptorStack(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value opaquePtr) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, + builder); + mlir::FunctionType funcType = func.getFunctionType(); + auto args = fir::runtime::createArguments(builder, loc, funcType, opaquePtr); + builder.create(loc, func, args); +} diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp index a108e06..dbc285c 100644 --- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp +++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp @@ -231,3 +231,127 @@ void fir::factory::AnyValueStack::destroy(mlir::Location loc, fir::FirOpBuilder &builder) { fir::runtime::genDestroyValueStack(loc, builder, opaquePtr); } + +//===----------------------------------------------------------------------===// +// fir::factory::AnyVariableStack implementation. +//===----------------------------------------------------------------------===// + +fir::factory::AnyVariableStack::AnyVariableStack(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Type variableStaticType) + : variableStaticType{variableStaticType}, + counter{loc, builder, + builder.createIntegerConstant(loc, builder.getI64Type(), 0), + /*stackThroughLoops=*/true} { + opaquePtr = fir::runtime::genCreateDescriptorStack(loc, builder); + mlir::Type storageType = + hlfir::getFortranElementOrSequenceType(variableStaticType); + mlir::Type ptrType = fir::PointerType::get(storageType); + mlir::Type boxType; + if (hlfir::isPolymorphicType(variableStaticType)) + boxType = fir::ClassType::get(ptrType); + else + boxType = fir::BoxType::get(ptrType); + retValueBox = builder.createTemporary(loc, boxType); +} + +void fir::factory::AnyVariableStack::pushValue(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value variable) { + hlfir::Entity entity{variable}; + mlir::Type storageElementType = + hlfir::getFortranElementType(retValueBox.getType()); + auto [box, maybeCleanUp] = + hlfir::convertToBox(loc, builder, entity, storageElementType); + fir::runtime::genPushDescriptor(loc, builder, opaquePtr, fir::getBase(box)); + if (maybeCleanUp) + (*maybeCleanUp)(); +} + +void fir::factory::AnyVariableStack::resetFetchPosition( + mlir::Location loc, fir::FirOpBuilder &builder) { + counter.reset(loc, builder); +} + +mlir::Value fir::factory::AnyVariableStack::fetch(mlir::Location loc, + fir::FirOpBuilder &builder) { + mlir::Value indexValue = counter.getAndIncrementIndex(loc, builder); + fir::runtime::genDescriptorAt(loc, builder, opaquePtr, indexValue, + retValueBox); + hlfir::Entity retBox{builder.create(loc, retValueBox)}; + // The runtime always tracks variable as address, but the form of the variable + // that was saved may be different (raw address, fir.boxchar), ensure + // the returned variable has the same form of the one that was saved. + if (mlir::isa(variableStaticType)) + return builder.createConvert(loc, variableStaticType, retBox); + if (mlir::isa(variableStaticType)) + return hlfir::genVariableBoxChar(loc, builder, retBox); + mlir::Value rawAddr = genVariableRawAddress(loc, builder, retBox); + return builder.createConvert(loc, variableStaticType, rawAddr); +} + +void fir::factory::AnyVariableStack::destroy(mlir::Location loc, + fir::FirOpBuilder &builder) { + fir::runtime::genDestroyDescriptorStack(loc, builder, opaquePtr); +} + +//===----------------------------------------------------------------------===// +// fir::factory::AnyVectorSubscriptStack implementation. +//===----------------------------------------------------------------------===// + +fir::factory::AnyVectorSubscriptStack::AnyVectorSubscriptStack( + mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Type variableStaticType, bool shapeCanBeSavedAsRegister, int rank) + : AnyVariableStack{loc, builder, variableStaticType} { + if (shapeCanBeSavedAsRegister) { + shapeTemp = + std::unique_ptr(new TemporaryStorage{SSARegister{}}); + return; + } + // The shape will be tracked as the dimension inside a descriptor because + // that is the easiest from a lowering point of view, and this is an + // edge case situation that will probably not very well be exercised. + mlir::Type type = + fir::BoxType::get(builder.getVarLenSeqTy(builder.getI32Type(), rank)); + boxType = type; + shapeTemp = std::unique_ptr( + new TemporaryStorage{AnyVariableStack{loc, builder, type}}); +} + +void fir::factory::AnyVectorSubscriptStack::pushShape( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape) { + if (boxType) { + // The shape is saved as a dimensions inside a descriptors. + mlir::Type refType = fir::ReferenceType::get( + hlfir::getFortranElementOrSequenceType(*boxType)); + mlir::Value null = builder.createNullConstant(loc, refType); + mlir::Value descriptor = + builder.create(loc, *boxType, null, shape); + shapeTemp->pushValue(loc, builder, descriptor); + return; + } + // Otherwise, simply keep track of the fir.shape itself, it is invariant. + shapeTemp->cast().pushValue(loc, builder, shape); +} + +void fir::factory::AnyVectorSubscriptStack::resetFetchPosition( + mlir::Location loc, fir::FirOpBuilder &builder) { + static_cast(this)->resetFetchPosition(loc, builder); + shapeTemp->resetFetchPosition(loc, builder); +} + +mlir::Value +fir::factory::AnyVectorSubscriptStack::fetchShape(mlir::Location loc, + fir::FirOpBuilder &builder) { + if (boxType) { + hlfir::Entity descriptor{shapeTemp->fetch(loc, builder)}; + return hlfir::genShape(loc, builder, descriptor); + } + return shapeTemp->cast().fetch(loc, builder); +} + +void fir::factory::AnyVectorSubscriptStack::destroy( + mlir::Location loc, fir::FirOpBuilder &builder) { + static_cast(this)->destroy(loc, builder); + shapeTemp->destroy(loc, builder); +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp index 9662828..1fecbc0 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -181,14 +181,16 @@ private: std::optional elementalCleanup; mlir::Region *nonElementalCleanup = nullptr; std::optional vectorSubscriptLoopNest; + std::optional vectorSubscriptShape; }; /// Generate the left-hand side. If the left-hand side is vector /// subscripted (hlfir.elemental_addr), this will create a loop nest /// (unless it was already created by a WHERE mask) and return the /// element address. - LhsValueAndCleanUp generateYieldedLHS(mlir::Location loc, - mlir::Region &lhsRegion); + LhsValueAndCleanUp + generateYieldedLHS(mlir::Location loc, mlir::Region &lhsRegion, + std::optional loweredRhs = std::nullopt); /// If \p maybeYield is present and has a clean-up, generate the clean-up /// at the current insertion point (by cloning). @@ -212,6 +214,8 @@ private: /// Save a value for subsequent runs. void generateSaveEntity(hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun); + void saveLeftHandSide(hlfir::SaveEntity savedEntity, + hlfir::RegionAssignOp regionAssignOp); /// Get a value if it was saved in this run or a previous run. Returns /// nullopt if it has not been saved. @@ -421,9 +425,9 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { std::optional elementalLoopNest; auto [rhsValue, oldRhsYield] = generateYieldedEntity(regionAssignOp.getRhsRegion()); - LhsValueAndCleanUp loweredLhs = - generateYieldedLHS(loc, regionAssignOp.getLhsRegion()); hlfir::Entity rhsEntity{rhsValue}; + LhsValueAndCleanUp loweredLhs = + generateYieldedLHS(loc, regionAssignOp.getLhsRegion(), rhsEntity); hlfir::Entity lhsEntity{loweredLhs.lhs}; if (loweredLhs.vectorSubscriptLoopNest) rhsEntity = hlfir::getElementAt( @@ -692,17 +696,53 @@ mlir::Value OrderedAssignmentRewriter::generateYieldedScalarValue( } OrderedAssignmentRewriter::LhsValueAndCleanUp -OrderedAssignmentRewriter::generateYieldedLHS(mlir::Location loc, - mlir::Region &lhsRegion) { +OrderedAssignmentRewriter::generateYieldedLHS( + mlir::Location loc, mlir::Region &lhsRegion, + std::optional loweredRhs) { LhsValueAndCleanUp loweredLhs; hlfir::ElementalAddrOp elementalAddrLhs = mlir::dyn_cast(lhsRegion.back().back()); + if (auto temp = savedEntities.find(&lhsRegion); temp != savedEntities.end()) { + // The LHS address was computed and saved in a previous run. Fetch it. + doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); }); + if (elementalAddrLhs && !whereLoopNest) { + // Vector subscripted designator address are saved element by element. + // If no "elemental" loops have been created yet, the shape of the + // RHS, if it is an array can be used, or the shape of the vector + // subscripted designator must be retrieved to generate the "elemental" + // loop nest. + if (loweredRhs && loweredRhs->isArray()) { + // The RHS shape can be used to create the elemental loops and avoid + // saving the LHS shape. + loweredLhs.vectorSubscriptShape = + hlfir::genShape(loc, builder, *loweredRhs); + } else { + // If the shape cannot be retrieved from the RHS, it must have been + // saved. Get it from the temporary. + auto &vectorTmp = + temp->second.cast(); + loweredLhs.vectorSubscriptShape = vectorTmp.fetchShape(loc, builder); + } + loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest( + loc, builder, loweredLhs.vectorSubscriptShape.value()); + builder.setInsertionPointToStart( + loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody()); + } + loweredLhs.lhs = temp->second.fetch(loc, builder); + return loweredLhs; + } + // The LHS has not yet been evaluated and saved. Evaluate it now. if (elementalAddrLhs && !whereLoopNest) { + // This is a vector subscripted entity. The address of elements must + // be returned. If no "elemental" loops have been created for a WHERE, + // create them now based on the vector subscripted designator shape. for (auto &op : lhsRegion.front().without_terminator()) (void)builder.clone(op, mapper); - mlir::Value newShape = mapper.lookupOrDefault(elementalAddrLhs.getShape()); - loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest( - loc, builder, newShape, !elementalAddrLhs.isOrdered()); + loweredLhs.vectorSubscriptShape = + mapper.lookupOrDefault(elementalAddrLhs.getShape()); + loweredLhs.vectorSubscriptLoopNest = + hlfir::genLoopNest(loc, builder, *loweredLhs.vectorSubscriptShape, + !elementalAddrLhs.isOrdered()); builder.setInsertionPointToStart( loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody()); mapper.map(elementalAddrLhs.getIndices(), @@ -713,6 +753,8 @@ OrderedAssignmentRewriter::generateYieldedLHS(mlir::Location loc, loweredLhs.lhs = mapper.lookupOrDefault(loweredLhs.elementalCleanup->getEntity()); } else { + // This is a designator without vector subscripts. Generate it as + // it is done for other entities. auto [lhs, yield] = generateYieldedEntity(lhsRegion); loweredLhs.lhs = lhs; if (yield && !yield->getCleanup().empty()) @@ -932,9 +974,12 @@ void MaskedArrayExpr::generateNoneElementalCleanupIfAny( } } -static bool isLeftHandSide(mlir::Region ®ion) { +static hlfir::RegionAssignOp +getAssignIfLeftHandSideRegion(mlir::Region ®ion) { auto assign = mlir::dyn_cast(region.getParentOp()); - return assign && (&assign.getLhsRegion() == ®ion); + if (assign && (&assign.getLhsRegion() == ®ion)) + return assign; + return nullptr; } bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed( @@ -993,18 +1038,20 @@ getTempName(hlfir::OrderedAssignmentTreeOpInterface root) { void OrderedAssignmentRewriter::generateSaveEntity( hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) { mlir::Region ®ion = *savedEntity.yieldRegion; - mlir::Location loc = region.getParentOp()->getLoc(); - if (!mlir::isa(region.back().back())) - TODO(loc, "creating temporary storage for vector subscripted LHS"); + if (hlfir::RegionAssignOp regionAssignOp = + getAssignIfLeftHandSideRegion(region)) { + // Need to save the address, not the values. + assert(!willUseSavedEntityInSameRun && + "lhs cannot be used in the loop nest where it is saved"); + return saveLeftHandSide(savedEntity, regionAssignOp); + } + mlir::Location loc = region.getParentOp()->getLoc(); // Evaluate the region inside the loop nest (if any). auto [clonedValue, oldYield] = generateYieldedEntity(region); hlfir::Entity entity{clonedValue}; - if (isLeftHandSide(region)) // Need to save the address, not the values. - TODO(loc, "creating temporary storage for LHS"); - else - entity = hlfir::loadTrivialScalar(loc, builder, entity); + entity = hlfir::loadTrivialScalar(loc, builder, entity); mlir::Type entityType = entity.getType(); llvm::StringRef tempName = getTempName(root); @@ -1069,6 +1116,72 @@ void OrderedAssignmentRewriter::generateSaveEntity( } } +static bool rhsIsArray(hlfir::RegionAssignOp regionAssignOp) { + auto yieldOp = mlir::dyn_cast( + regionAssignOp.getRhsRegion().back().back()); + return yieldOp && hlfir::Entity{yieldOp.getEntity()}.isArray(); +} + +void OrderedAssignmentRewriter::saveLeftHandSide( + hlfir::SaveEntity savedEntity, hlfir::RegionAssignOp regionAssignOp) { + mlir::Region ®ion = *savedEntity.yieldRegion; + mlir::Location loc = region.getParentOp()->getLoc(); + LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region); + fir::factory::TemporaryStorage *temp = nullptr; + if (loweredLhs.vectorSubscriptLoopNest) + constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerLoop); + if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) { + // Vector subscripted entity for which the shape must also be saved on top + // of the element addresses (e.g. the shape may change in each forall + // iteration and is needed to create the elemental loops). + mlir::Value shape = loweredLhs.vectorSubscriptShape.value(); + int rank = mlir::cast(shape.getType()).getRank(); + const bool shapeIsInvariant = + constructStack.empty() || + dominanceInfo.properlyDominates(shape, constructStack[0]); + doBeforeLoopNest([&] { + // Outside of any forall/where/elemental loops, create a temporary that + // will both be able to save the vector subscripted designator shape(s) + // and element addresses. + temp = + insertSavedEntity(region, fir::factory::AnyVectorSubscriptStack{ + loc, builder, loweredLhs.lhs.getType(), + shapeIsInvariant, rank}); + }); + // Save shape before the elemental loop nest created by the vector + // subscripted LHS. + auto &vectorTmp = temp->cast(); + auto insertionPoint = builder.saveInsertionPoint(); + builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerLoop); + vectorTmp.pushShape(loc, builder, shape); + builder.restoreInsertionPoint(insertionPoint); + } else { + // Otherwise, only save the LHS address. + // If the LHS address dominates the constructs, its SSA value can + // simply be tracked and there is no need to save the address in memory. + // Otherwise, the addresses are stored at each iteration in memory with + // a descriptor stack. + if (constructStack.empty() || + dominanceInfo.properlyDominates(loweredLhs.lhs, constructStack[0])) + doBeforeLoopNest([&] { + temp = insertSavedEntity(region, fir::factory::SSARegister{}); + }); + else + doBeforeLoopNest([&] { + temp = insertSavedEntity( + region, fir::factory::AnyVariableStack{loc, builder, + loweredLhs.lhs.getType()}); + }); + } + temp->pushValue(loc, builder, loweredLhs.lhs); + generateCleanupIfAny(loweredLhs.elementalCleanup); + if (loweredLhs.vectorSubscriptLoopNest) { + constructStack.pop_back(); + builder.setInsertionPointAfter( + loweredLhs.vectorSubscriptLoopNest->outerLoop); + } +} + /// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given /// a schedule. static void lower(hlfir::OrderedAssignmentTreeOpInterface root, diff --git a/flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir b/flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir new file mode 100644 index 0000000..45ceb51 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/lhs-conflicts-codegen.fir @@ -0,0 +1,216 @@ +// Test code generation of hlfir.region_assign when the LHS computed +// address must be saved before the assignment is evaluated. Because +// the assignment would modify the LHS evaluation. +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +// Test simplified IR for: +// +// x(x(1):x(2)) = l +// +// Verify that, although a conflict is detected, the LHS is not saved +// on a descriptor stack: it is already in a register that can be used +// since there is no forall. + +func.func @save_box_in_ssa_register(%arg0: !fir.box>, %arg1: !fir.box>>) { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + %0:2 = hlfir.declare %arg1 {uniq_name = "l"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.region_assign { + hlfir.yield %0#0 : !fir.box>> + } to { + %2 = hlfir.designate %1#0 (%c1) : (!fir.box>, index) -> !fir.ref + %3 = fir.load %2 : !fir.ref + %4 = hlfir.designate %1#0 (%c2) : (!fir.box>, index) -> !fir.ref + %5 = fir.load %4 : !fir.ref + %6 = arith.subi %5, %3 : i64 + %7 = fir.convert %6 : (i64) -> index + %8 = fir.shape %7 : (index) -> !fir.shape<1> + %9 = hlfir.designate %1#0 (%3:%5:%c1) shape %8 : (!fir.box>, i64, i64, index, !fir.shape<1>) -> !fir.box> + hlfir.yield %9 : !fir.box> + } user_defined_assign (%arg2: !fir.ref>) to (%arg3: !fir.ref) { + %2 = fir.load %arg2 : !fir.ref> + fir.call @logical_to_real(%arg3, %2) : (!fir.ref, !fir.logical<4>) -> () + } + return +} +// CHECK-LABEL: func.func @save_box_in_ssa_register( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box>>) { +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_5]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.box>, i64, i64, index, !fir.shape<1>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_20:.*]] = {{.*}} { +// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_20]]) : (!fir.box>, index) -> !fir.ref +// CHECK: fir.call @logical_to_real(%[[VAL_21]], %{{.*}}) : (!fir.ref, !fir.logical<4>) -> () +// CHECK: } +// CHECK: return +// CHECK: } + +// Test simplified IR for: +// +// ! x = [0,1,2,4] -> [4,2,1,1] +// forall (i=1:3) x(x(i)+1:x(i+1)) = x(4-i) +// +// Verify that the LHS are all computed an saved on a stack before +// any assignment is made. +// +func.func @save_box_in_stack(%arg0: !fir.box>) { + %c1 = arith.constant 1 : index + %c1_i32 = arith.constant 1 : i32 + %c4_i64 = arith.constant 4 : i64 + %c3_i64 = arith.constant 3 : i64 + %c1_i64 = arith.constant 1 : i64 + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.forall lb { + hlfir.yield %c1_i64 : i64 + } ub { + hlfir.yield %c3_i64 : i64 + } (%arg1: i64) { + hlfir.region_assign { + %1 = arith.subi %c4_i64, %arg1 : i64 + %2 = hlfir.designate %0#0 (%1) : (!fir.box>, i64) -> !fir.ref + %3 = fir.load %2 : !fir.ref + hlfir.yield %3 : i32 + } to { + %1 = hlfir.designate %0#0 (%arg1) : (!fir.box>, i64) -> !fir.ref + %2 = fir.load %1 : !fir.ref + %3 = arith.addi %2, %c1_i32 : i32 + %4 = arith.addi %arg1, %c1_i64 : i64 + %5 = hlfir.designate %0#0 (%4) : (!fir.box>, i64) -> !fir.ref + %6 = fir.load %5 : !fir.ref + %7 = arith.subi %6, %3 : i32 + %8 = fir.convert %7 : (i32) -> index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = hlfir.designate %0#0 (%3:%6:%c1) shape %9 : (!fir.box>, i32, i32, index, !fir.shape<1>) -> !fir.box> + hlfir.yield %10 : !fir.box> + } + } + return +} +// CHECK-LABEL: func.func @save_box_in_stack( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_2:.*]] = fir.alloca i64 +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_30:.*]] = fir.call @_FortranACreateDescriptorStack(%{{.*}}, %{{.*}}) : (!fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_48:.*]] = hlfir.designate %[[VAL_9]]#0 {{.*}} : (!fir.box>, i32, i32, index, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_48]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_50:.*]] = fir.call @_FortranAPushDescriptor(%[[VAL_30]], %[[VAL_49]]) : (!fir.llvm_ptr, !fir.box) -> none +// CHECK: } +// CHECK: fir.store %{{.*}} to %[[VAL_2]] : !fir.ref +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_60:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_61:.*]] = arith.addi %[[VAL_60]], %{{.*}} : i64 +// CHECK: fir.store %[[VAL_61]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_62:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_63:.*]] = fir.call @_FortranADescriptorAt(%[[VAL_30]], %[[VAL_60]], %[[VAL_62]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> none +// CHECK: %[[VAL_64:.*]] = fir.load %[[VAL_1]] : !fir.ref>>> +// CHECK: %[[VAL_65:.*]] = fir.convert %[[VAL_64]] : (!fir.box>>) -> !fir.box> +// CHECK: hlfir.assign %{{.*}} to %[[VAL_65]] : i32, !fir.box> +// CHECK: } +// CHECK: fir.call @_FortranADestroyDescriptorStack(%[[VAL_30]]) : (!fir.llvm_ptr) -> none + +// Test simplified IR for: +// +// integer(8) :: x(*) +// forall (integer::i=1:10) x(x(foo(x, i):bar(x, i))) = x(11-i) +// +// The shape of the vector subscripted designator must be saved at each +// iteration. +// +func.func @test_vector_subscript_overlap(%arg0: !fir.ref>) { + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %c11 = arith.constant 11 : index + %0 = fir.undefined index + %1 = fir.shape %0 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %arg0(%1) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) + hlfir.forall lb { + hlfir.yield %c1 : index + } ub { + hlfir.yield %c10 : index + } (%arg1: index) { + hlfir.region_assign { + %3 = arith.subi %c11, %arg1 : index + %4 = hlfir.designate %2#0 (%3) : (!fir.box>, index) -> !fir.ref + %5 = fir.load %4 : !fir.ref + hlfir.yield %5 : i64 + } to { + %3 = fir.call @foo(%2#1, %arg1) : (!fir.ref>, index) -> index + %4 = fir.call @bar(%2#1, %arg1) : (!fir.ref>, index) -> index + %5 = arith.subi %4, %3 : index + %6 = fir.shape %5 : (index) -> !fir.shape<1> + %7 = hlfir.designate %2#0 (%3:%4:%c1) shape %6 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.elemental_addr %6 : !fir.shape<1> { + ^bb0(%arg2: index): + %8 = hlfir.designate %7 (%arg2) : (!fir.box>, index) -> !fir.ref + %9 = fir.load %8 : !fir.ref + %10 = hlfir.designate %2#0 (%9) : (!fir.box>, i64) -> !fir.ref + hlfir.yield %10 : !fir.ref + } + } + } + return +} +// CHECK-LABEL: func.func @test_vector_subscript_overlap( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_2:.*]] = fir.alloca i64 +// CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box> +// CHECK: %[[VAL_4:.*]] = fir.alloca i64 +// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_0]](%{{.*}}) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_30:.*]] = fir.call @_FortranACreateDescriptorStack(%{{.*}}, %{{.*}}) : (!fir.ref, i32) -> !fir.llvm_ptr +// CHECK: %[[VAL_37:.*]] = fir.call @_FortranACreateDescriptorStack(%{{.*}}, %{{.*}}) : (!fir.ref, i32) -> !fir.llvm_ptr +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_45:.*]] = fir.call @foo +// CHECK: %[[VAL_46:.*]] = fir.call @bar +// CHECK: %[[VAL_47:.*]] = arith.subi %[[VAL_46]], %[[VAL_45]] : index +// CHECK: %[[VAL_48:.*]] = fir.shape %[[VAL_47]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_51:.*]] = fir.zero_bits !fir.ref> +// CHECK: %[[VAL_52:.*]] = fir.embox %[[VAL_51]](%[[VAL_48]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_55:.*]] = fir.convert %[[VAL_52]] : (!fir.box>) -> !fir.box +// Save the vector subscripted designator shape. +// CHECK: %[[VAL_56:.*]] = fir.call @_FortranAPushDescriptor({{.*}}, {{.*}}) : (!fir.llvm_ptr, !fir.box) -> none +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_60:.*]] = hlfir.designate %[[VAL_11]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_61:.*]] = fir.embox %[[VAL_60]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_62:.*]] = fir.convert %[[VAL_61]] : (!fir.box) -> !fir.box +// Save the vector subscripted designator element address. +// CHECK: %[[VAL_63:.*]] = fir.call @_FortranAPushDescriptor(%[[VAL_30]], %[[VAL_62]]) : (!fir.llvm_ptr, !fir.box) -> none +// CHECK: } +// CHECK: } +// CHECK: fir.store %{{.*}} to %[[VAL_4]] : !fir.ref +// CHECK: fir.store %{{.*}} to %[[VAL_2]] : !fir.ref +// CHECK: fir.do_loop {{.*}} { +// CHECK: %[[VAL_69:.*]] = fir.load %{{.*}} : !fir.ref +// CHECK: %[[VAL_70:.*]] = fir.load %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_71:.*]] = arith.addi %[[VAL_70]], %{{.*}} : i64 +// CHECK: fir.store %[[VAL_71]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_72:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>>>) -> !fir.ref> +// Fetch the vector subscripted designator shape to create the elemental loop. +// CHECK: %[[VAL_73:.*]] = fir.call @_FortranADescriptorAt(%[[VAL_37]], %[[VAL_70]], %[[VAL_72]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> none +// CHECK: %[[VAL_74:.*]] = fir.load %[[VAL_1]] : !fir.ref>>> +// CHECK: %[[VAL_75:.*]] = fir.convert %[[VAL_74]] : (!fir.box>>) -> !fir.box> +// CHECK: %[[VAL_76:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_77:.*]]:3 = fir.box_dims %[[VAL_75]], %[[VAL_76]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_79:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_80:.*]] = %[[VAL_79]] to %[[VAL_77]]#1 step %[[VAL_79]] { +// CHECK: %[[VAL_81:.*]] = fir.load %[[VAL_4]] : !fir.ref +// CHECK: %[[VAL_82:.*]] = arith.addi %[[VAL_81]], %{{.*}} : i64 +// CHECK: fir.store %[[VAL_82]] to %[[VAL_4]] : !fir.ref +// CHECK: %[[VAL_83:.*]] = fir.convert %[[VAL_3]] : (!fir.ref>>) -> !fir.ref> +// Fetch the vector subscripted designator element address. +// CHECK: %[[VAL_84:.*]] = fir.call @_FortranADescriptorAt(%[[VAL_30]], %[[VAL_81]], %[[VAL_83]]) : (!fir.llvm_ptr, i64, !fir.ref>) -> none +// CHECK: %[[VAL_85:.*]] = fir.load %[[VAL_3]] : !fir.ref>> +// CHECK: %[[VAL_86:.*]] = fir.box_addr %[[VAL_85]] : (!fir.box>) -> !fir.ptr +// CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_86]] : (!fir.ptr) -> !fir.ref +// CHECK: hlfir.assign %{{.*}} to %[[VAL_87]] : i64, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_88:.*]] = fir.call @_FortranADestroyDescriptorStack(%[[VAL_30]]) : (!fir.llvm_ptr) -> none +// CHECK: %[[VAL_89:.*]] = fir.call @_FortranADestroyDescriptorStack(%[[VAL_37]]) : (!fir.llvm_ptr) -> none + +func.func private @integer_to_real(!fir.ref, !fir.logical<4>) +func.func private @foo(!fir.ref>, index) -> index +func.func private @bar(!fir.ref>, index) -> index -- 2.7.4