--- /dev/null
+//===-- Character.h -- lowering of characters -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
+#define FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+
+namespace fir::factory {
+
+/// Helper to facilitate lowering of CHARACTER in FIR.
+class CharacterExprHelper {
+public:
+ /// Constructor.
+ explicit CharacterExprHelper(FirOpBuilder &builder, mlir::Location loc)
+ : builder{builder}, loc{loc} {}
+ CharacterExprHelper(const CharacterExprHelper &) = delete;
+
+ /// Copy the \p count first characters of \p src into \p dest.
+ /// \p count can have any integer type.
+ void createCopy(const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
+ mlir::Value count);
+
+ /// Set characters of \p str at position [\p lower, \p upper) to blanks.
+ /// \p lower and \upper bounds are zero based.
+ /// If \p upper <= \p lower, no padding is done.
+ /// \p upper and \p lower can have any integer type.
+ void createPadding(const fir::CharBoxValue &str, mlir::Value lower,
+ mlir::Value upper);
+
+ /// Create str(lb:ub), lower bounds must always be specified, upper
+ /// bound is optional.
+ fir::CharBoxValue createSubstring(const fir::CharBoxValue &str,
+ llvm::ArrayRef<mlir::Value> bounds);
+
+ /// Return blank character of given \p type !fir.char<kind>
+ mlir::Value createBlankConstant(fir::CharacterType type);
+
+ /// Lower \p lhs = \p rhs where \p lhs and \p rhs are scalar characters.
+ /// It handles cases where \p lhs and \p rhs may overlap.
+ void createAssign(const fir::ExtendedValue &lhs,
+ const fir::ExtendedValue &rhs);
+
+ /// Create lhs // rhs in temp obtained with fir.alloca
+ fir::CharBoxValue createConcatenate(const fir::CharBoxValue &lhs,
+ const fir::CharBoxValue &rhs);
+
+ /// LEN_TRIM intrinsic.
+ mlir::Value createLenTrim(const fir::CharBoxValue &str);
+
+ /// Embox \p addr and \p len and return fir.boxchar.
+ /// Take care of type conversions before emboxing.
+ /// \p len is converted to the integer type for character lengths if needed.
+ mlir::Value createEmboxChar(mlir::Value addr, mlir::Value len);
+ /// Create a fir.boxchar for \p str. If \p str is not in memory, a temp is
+ /// allocated to create the fir.boxchar.
+ mlir::Value createEmbox(const fir::CharBoxValue &str);
+ /// Embox a string array. Note that the size/shape of the array is not
+ /// retrievable from the resulting mlir::Value.
+ mlir::Value createEmbox(const fir::CharArrayBoxValue &str);
+
+ /// Convert character array to a scalar by reducing the extents into the
+ /// length. Will fail if call on non reference like base.
+ fir::CharBoxValue toScalarCharacter(const fir::CharArrayBoxValue &);
+
+ /// Unbox \p boxchar into (fir.ref<fir.char<kind>>, character length type).
+ std::pair<mlir::Value, mlir::Value> createUnboxChar(mlir::Value boxChar);
+
+ /// Allocate a temp of fir::CharacterType type and length len.
+ /// Returns related fir.ref<fir.array<? x fir.char<kind>>>.
+ fir::CharBoxValue createCharacterTemp(mlir::Type type, mlir::Value len);
+
+ /// Allocate a temp of compile time constant length.
+ /// Returns related fir.ref<fir.array<len x fir.char<kind>>>.
+ fir::CharBoxValue createCharacterTemp(mlir::Type type, int len);
+
+ /// Create a temporary with the same kind, length, and value as source.
+ fir::CharBoxValue createTempFrom(const fir::ExtendedValue &source);
+
+ /// Return true if \p type is a character literal type (is
+ /// `fir.array<len x fir.char<kind>>`).;
+ static bool isCharacterLiteral(mlir::Type type);
+
+ /// Return true if \p type is one of the following type
+ /// - fir.boxchar<kind>
+ /// - fir.ref<fir.char<kind,len>>
+ /// - fir.char<kind,len>
+ static bool isCharacterScalar(mlir::Type type);
+
+ /// Does this extended value base type is fir.char<kind,len>
+ /// where len is not the unknown extent ?
+ static bool hasConstantLengthInType(const fir::ExtendedValue &);
+
+ /// Extract the kind of a character type
+ static fir::KindTy getCharacterKind(mlir::Type type);
+
+ /// Extract the kind of a character or array of character type.
+ static fir::KindTy getCharacterOrSequenceKind(mlir::Type type);
+
+ /// Determine the base character type
+ static fir::CharacterType getCharacterType(mlir::Type type);
+ static fir::CharacterType getCharacterType(const fir::CharBoxValue &box);
+ static fir::CharacterType getCharacterType(mlir::Value str);
+
+ /// Create an extended value from a value of type:
+ /// - fir.boxchar<kind>
+ /// - fir.ref<fir.char<kind,len>>
+ /// - fir.char<kind,len>
+ /// or the array versions:
+ /// - fir.ref<fir.array<n x...x fir.char<kind,len>>>
+ /// - fir.array<n x...x fir.char<kind,len>>
+ ///
+ /// Does the heavy lifting of converting the value \p character (along with an
+ /// optional \p len value) to an extended value. If \p len is null, a length
+ /// value is extracted from \p character (or its type). This will produce an
+ /// error if it's not possible. The returned value is a CharBoxValue if \p
+ /// character is a scalar, otherwise it is a CharArrayBoxValue.
+ fir::ExtendedValue toExtendedValue(mlir::Value character,
+ mlir::Value len = {});
+
+ /// Is `type` a sequence (array) of CHARACTER type? Return true for any of the
+ /// following cases:
+ /// - !fir.array<dim x ... x !fir.char<kind, len>>
+ /// - !fir.ref<T> where T is either of the first case
+ /// - !fir.box<T> where T is either of the first case
+ ///
+ /// In certain contexts, Fortran allows an array of CHARACTERs to be treated
+ /// as if it were one longer CHARACTER scalar, each element append to the
+ /// previous.
+ static bool isArray(mlir::Type type);
+
+ /// Temporary helper to help migrating towards properties of
+ /// ExtendedValue containing characters.
+ /// Mainly, this ensure that characters are always CharArrayBoxValue,
+ /// CharBoxValue, or BoxValue and that the base address is not a boxchar.
+ /// Return the argument if this is not a character.
+ /// TODO: Create and propagate ExtendedValue according to properties listed
+ /// above instead of fixing it when needed.
+ fir::ExtendedValue cleanUpCharacterExtendedValue(const fir::ExtendedValue &);
+
+ /// Create fir.char<kind> singleton from \p code integer value.
+ mlir::Value createSingletonFromCode(mlir::Value code, int kind);
+ /// Returns integer value held in a character singleton.
+ mlir::Value extractCodeFromSingleton(mlir::Value singleton);
+
+ /// Create a value for the length of a character based on its memory reference
+ /// that may be a boxchar, box or !fir.[ptr|ref|heap]<fir.char<kind, len>>. If
+ /// the memref is a simple address and the length is not constant in type, the
+ /// returned length will be empty.
+ mlir::Value getLength(mlir::Value memref);
+
+ /// Compute length given a fir.box describing a character entity.
+ /// It adjusts the length from the number of bytes per the descriptor
+ /// to the number of characters per the Fortran KIND.
+ mlir::Value readLengthFromBox(mlir::Value box);
+
+private:
+ /// FIXME: the implementation also needs a clean-up now that
+ /// CharBoxValue are better propagated.
+ fir::CharBoxValue materializeValue(mlir::Value str);
+ mlir::Value getCharBoxBuffer(const fir::CharBoxValue &box);
+ mlir::Value createElementAddr(mlir::Value buffer, mlir::Value index);
+ mlir::Value createLoadCharAt(mlir::Value buff, mlir::Value index);
+ void createStoreCharAt(mlir::Value str, mlir::Value index, mlir::Value c);
+ void createLengthOneAssign(const fir::CharBoxValue &lhs,
+ const fir::CharBoxValue &rhs);
+ void createAssign(const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs);
+ mlir::Value createBlankConstantCode(fir::CharacterType type);
+
+ FirOpBuilder &builder;
+ mlir::Location loc;
+};
+
+// FIXME: Move these to Optimizer
+mlir::FuncOp getLlvmMemcpy(FirOpBuilder &builder);
+mlir::FuncOp getLlvmMemmove(FirOpBuilder &builder);
+mlir::FuncOp getLlvmMemset(FirOpBuilder &builder);
+mlir::FuncOp getRealloc(FirOpBuilder &builder);
+
+} // namespace fir::factory
+
+#endif // FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
--- /dev/null
+//===-- Character.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/Character.h"
+#include "flang/Lower/Todo.h"
+#include "flang/Optimizer/Builder/DoLoopHelper.h"
+#include "llvm/Support/Debug.h"
+#include <optional>
+
+#define DEBUG_TYPE "flang-lower-character"
+
+//===----------------------------------------------------------------------===//
+// CharacterExprHelper implementation
+//===----------------------------------------------------------------------===//
+
+/// Unwrap base fir.char<kind,len> type.
+static fir::CharacterType recoverCharacterType(mlir::Type type) {
+ if (auto boxType = type.dyn_cast<fir::BoxCharType>())
+ return boxType.getEleTy();
+ while (true) {
+ type = fir::unwrapRefType(type);
+ if (auto boxTy = type.dyn_cast<fir::BoxType>())
+ type = boxTy.getEleTy();
+ else
+ break;
+ }
+ return fir::unwrapSequenceType(type).cast<fir::CharacterType>();
+}
+
+/// Get fir.char<kind> type with the same kind as inside str.
+fir::CharacterType
+fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
+ assert(isCharacterScalar(type) && "expected scalar character");
+ return recoverCharacterType(type);
+}
+
+fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
+ const fir::CharBoxValue &box) {
+ return getCharacterType(box.getBuffer().getType());
+}
+
+fir::CharacterType
+fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
+ return getCharacterType(str.getType());
+}
+
+/// Determine the static size of the character. Returns the computed size, not
+/// an IR Value.
+static std::optional<fir::CharacterType::LenType>
+getCompileTimeLength(const fir::CharBoxValue &box) {
+ auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
+ if (len == fir::CharacterType::unknownLen())
+ return {};
+ return len;
+}
+
+/// Detect the precondition that the value `str` does not reside in memory. Such
+/// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
+LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
+ return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
+}
+
+/// Unwrap integer constant from mlir::Value.
+static llvm::Optional<std::int64_t> getIntIfConstant(mlir::Value value) {
+ if (auto *definingOp = value.getDefiningOp())
+ if (auto cst = mlir::dyn_cast<mlir::ConstantOp>(definingOp))
+ if (auto intAttr = cst.getValue().dyn_cast<mlir::IntegerAttr>())
+ return intAttr.getInt();
+ return {};
+}
+
+/// This is called only if `str` does not reside in memory. Such a bare string
+/// value will be converted into a memory-based temporary and an extended
+/// boxchar value returned.
+fir::CharBoxValue
+fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
+ assert(needToMaterialize(str));
+ auto ty = str.getType();
+ assert(isCharacterScalar(ty) && "expected scalar character");
+ auto charTy = ty.dyn_cast<fir::CharacterType>();
+ if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
+ LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
+ llvm_unreachable("must be a !fir.char<N> type");
+ }
+ auto len = builder.createIntegerConstant(
+ loc, builder.getCharacterLengthType(), charTy.getLen());
+ auto temp = builder.create<fir::AllocaOp>(loc, charTy);
+ builder.create<fir::StoreOp>(loc, str, temp);
+ LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
+ << ", " << len << ")\n");
+ return {temp, len};
+}
+
+fir::ExtendedValue
+fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
+ mlir::Value len) {
+ auto lenType = builder.getCharacterLengthType();
+ auto type = character.getType();
+ auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
+ auto resultLen = len;
+ llvm::SmallVector<mlir::Value> extents;
+
+ if (auto eleType = fir::dyn_cast_ptrEleTy(type))
+ type = eleType;
+
+ if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
+ type = arrayType.getEleTy();
+ auto indexType = builder.getIndexType();
+ for (auto extent : arrayType.getShape()) {
+ if (extent == fir::SequenceType::getUnknownExtent())
+ break;
+ extents.emplace_back(
+ builder.createIntegerConstant(loc, indexType, extent));
+ }
+ // Last extent might be missing in case of assumed-size. If more extents
+ // could not be deduced from type, that's an error (a fir.box should
+ // have been used in the interface).
+ if (extents.size() + 1 < arrayType.getShape().size())
+ mlir::emitError(loc, "cannot retrieve array extents from type");
+ }
+
+ if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
+ if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
+ resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
+ } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
+ auto refType = builder.getRefType(boxCharType.getEleTy());
+ // If the embox is accessible, use its operand to avoid filling
+ // the generated fir with embox/unbox.
+ mlir::Value boxCharLen;
+ if (auto *definingOp = character.getDefiningOp()) {
+ if (auto box = dyn_cast<fir::EmboxCharOp>(definingOp)) {
+ base = box.memref();
+ boxCharLen = box.len();
+ }
+ }
+ if (!boxCharLen) {
+ auto unboxed =
+ builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
+ base = builder.createConvert(loc, refType, unboxed.getResult(0));
+ boxCharLen = unboxed.getResult(1);
+ }
+ if (!resultLen) {
+ resultLen = boxCharLen;
+ }
+ } else if (type.isa<fir::BoxType>()) {
+ mlir::emitError(loc, "descriptor or derived type not yet handled");
+ } else {
+ llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
+ }
+
+ if (!base) {
+ if (auto load =
+ mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
+ base = load.getOperand();
+ } else {
+ return materializeValue(fir::getBase(character));
+ }
+ }
+ if (!resultLen)
+ llvm::report_fatal_error("no dynamic length found for character");
+ if (!extents.empty())
+ return fir::CharArrayBoxValue{base, resultLen, extents};
+ return fir::CharBoxValue{base, resultLen};
+}
+
+static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
+ return fir::CharacterType::getSingleton(ctxt, kind);
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
+ // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
+ // type)
+ auto charTy = recoverCharacterType(box.getBuffer().getType());
+ auto boxCharType =
+ fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
+ auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
+ mlir::Value buff = box.getBuffer();
+ // fir.boxchar requires a memory reference. Allocate temp if the character is
+ // not in memory.
+ if (!fir::isa_ref_type(buff.getType())) {
+ auto temp = builder.createTemporary(loc, buff.getType());
+ builder.create<fir::StoreOp>(loc, buff, temp);
+ buff = temp;
+ }
+ buff = builder.createConvert(loc, refType, buff);
+ // Convert in case the provided length is not of the integer type that must
+ // be used in boxchar.
+ auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
+ box.getLen());
+ return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
+ const fir::CharArrayBoxValue &box) {
+ if (box.getBuffer().getType().isa<fir::PointerType>())
+ TODO(loc, "concatenating non contiguous character array into a scalar");
+
+ // TODO: add a fast path multiplying new length at compile time if the info is
+ // in the array type.
+ auto lenType = builder.getCharacterLengthType();
+ auto len = builder.createConvert(loc, lenType, box.getLen());
+ for (auto extent : box.getExtents())
+ len = builder.create<arith::MulIOp>(
+ loc, len, builder.createConvert(loc, lenType, extent));
+
+ // TODO: typeLen can be improved in compiled constant cases
+ // TODO: allow bare fir.array<> (no ref) conversion here ?
+ auto typeLen = fir::CharacterType::unknownLen();
+ auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
+ auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
+ auto type = fir::ReferenceType::get(charTy);
+ auto buffer = builder.createConvert(loc, type, box.getBuffer());
+ return {buffer, len};
+}
+
+mlir::Value fir::factory::CharacterExprHelper::createEmbox(
+ const fir::CharArrayBoxValue &box) {
+ // Use same embox as for scalar. It's losing the actual data size information
+ // (We do not multiply the length by the array size), but that is what Fortran
+ // call interfaces using boxchar expect.
+ return createEmbox(static_cast<const fir::CharBoxValue &>(box));
+}
+
+/// Get the address of the element at position \p index of the scalar character
+/// \p buffer.
+/// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
+/// unknown. \p index must have any integer type, and is zero based. The return
+/// value is a singleton address (!fir.ref<!fir.char<kind>>)
+mlir::Value
+fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
+ mlir::Value index) {
+ // The only way to address an element of a fir.ref<char<kind, len>> is to cast
+ // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
+ auto bufferType = buffer.getType();
+ assert(fir::isa_ref_type(bufferType));
+ assert(isCharacterScalar(bufferType));
+ auto charTy = recoverCharacterType(bufferType);
+ auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
+ auto singleRefTy = builder.getRefType(singleTy);
+ auto extent = fir::SequenceType::getUnknownExtent();
+ if (charTy.getLen() != fir::CharacterType::unknownLen())
+ extent = charTy.getLen();
+ auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
+
+ auto coor = builder.createConvert(loc, coorTy, buffer);
+ auto i = builder.createConvert(loc, builder.getIndexType(), index);
+ return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
+}
+
+/// Load a character out of `buff` from offset `index`.
+/// `buff` must be a reference to memory.
+mlir::Value
+fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
+ mlir::Value index) {
+ LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
+ << buff.getType() << " at: " << index << '\n');
+ return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
+}
+
+/// Store the singleton character `c` to `str` at offset `index`.
+/// `str` must be a reference to memory.
+void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
+ mlir::Value index,
+ mlir::Value c) {
+ LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
+ << " type: " << str.getType() << " at: " << index
+ << '\n');
+ auto addr = createElementAddr(str, index);
+ builder.create<fir::StoreOp>(loc, c, addr);
+}
+
+// FIXME: this temp is useless... either fir.coordinate_of needs to
+// work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
+// character should never be loaded.
+// If this is a fir.array<>, allocate and store the value so that
+// fir.cooridnate_of can be use on the value.
+mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
+ const fir::CharBoxValue &box) {
+ auto buff = box.getBuffer();
+ if (fir::isa_char(buff.getType())) {
+ auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
+ builder.create<fir::StoreOp>(loc, buff, newBuff);
+ return newBuff;
+ }
+ return buff;
+}
+
+/// Get the LLVM intrinsic for `memcpy`. Use the 64 bit version.
+mlir::FuncOp fir::factory::getLlvmMemcpy(fir::FirOpBuilder &builder) {
+ auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+ llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
+ builder.getI1Type()};
+ auto memcpyTy =
+ mlir::FunctionType::get(builder.getContext(), args, llvm::None);
+ return builder.addNamedFunction(builder.getUnknownLoc(),
+ "llvm.memcpy.p0i8.p0i8.i64", memcpyTy);
+}
+
+/// Get the LLVM intrinsic for `memmove`. Use the 64 bit version.
+mlir::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) {
+ auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+ llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
+ builder.getI1Type()};
+ auto memmoveTy =
+ mlir::FunctionType::get(builder.getContext(), args, llvm::None);
+ return builder.addNamedFunction(builder.getUnknownLoc(),
+ "llvm.memmove.p0i8.p0i8.i64", memmoveTy);
+}
+
+/// Get the LLVM intrinsic for `memset`. Use the 64 bit version.
+mlir::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) {
+ auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+ llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
+ builder.getI1Type()};
+ auto memsetTy =
+ mlir::FunctionType::get(builder.getContext(), args, llvm::None);
+ return builder.addNamedFunction(builder.getUnknownLoc(),
+ "llvm.memset.p0i8.p0i8.i64", memsetTy);
+}
+
+/// Get the standard `realloc` function.
+mlir::FuncOp fir::factory::getRealloc(fir::FirOpBuilder &builder) {
+ auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+ llvm::SmallVector<mlir::Type> args = {ptrTy, builder.getI64Type()};
+ auto reallocTy = mlir::FunctionType::get(builder.getContext(), args, {ptrTy});
+ return builder.addNamedFunction(builder.getUnknownLoc(), "realloc",
+ reallocTy);
+}
+
+/// Create a loop to copy `count` characters from `src` to `dest`. Note that the
+/// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
+void fir::factory::CharacterExprHelper::createCopy(
+ const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
+ mlir::Value count) {
+ auto fromBuff = getCharBoxBuffer(src);
+ auto toBuff = getCharBoxBuffer(dest);
+ LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
+ llvm::dbgs() << " to: "; dest.dump();
+ llvm::dbgs() << " count: " << count << '\n');
+ auto kind = getCharacterKind(src.getBuffer().getType());
+ // If the src and dest are the same KIND, then use memmove to move the bits.
+ // We don't have to worry about overlapping ranges with memmove.
+ if (getCharacterKind(dest.getBuffer().getType()) == kind) {
+ auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
+ auto i64Ty = builder.getI64Type();
+ auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
+ auto castCount = builder.createConvert(loc, i64Ty, count);
+ auto totalBytes = builder.create<arith::MulIOp>(loc, kindBytes, castCount);
+ auto notVolatile = builder.createBool(loc, false);
+ auto memmv = getLlvmMemmove(builder);
+ auto argTys = memmv.getType().getInputs();
+ auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
+ auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
+ builder.create<fir::CallOp>(
+ loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
+ return;
+ }
+
+ // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
+ builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
+ dest.getBuffer());
+}
+
+void fir::factory::CharacterExprHelper::createPadding(
+ const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
+ auto blank = createBlankConstant(getCharacterType(str));
+ // Always create the loop, if upper < lower, no iteration will be
+ // executed.
+ auto toBuff = getCharBoxBuffer(str);
+ fir::factory::DoLoopHelper{builder, loc}.createLoop(
+ lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
+ createStoreCharAt(toBuff, index, blank);
+ });
+}
+
+fir::CharBoxValue
+fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
+ mlir::Value len) {
+ auto kind = recoverCharacterType(type).getFKind();
+ auto typeLen = fir::CharacterType::unknownLen();
+ // If len is a constant, reflect the length in the type.
+ if (auto cstLen = getIntIfConstant(len))
+ typeLen = *cstLen;
+ auto *ctxt = builder.getContext();
+ auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
+ llvm::SmallVector<mlir::Value> lenParams;
+ if (typeLen == fir::CharacterType::unknownLen())
+ lenParams.push_back(len);
+ auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
+ /*shape=*/llvm::None, lenParams);
+ return {ref, len};
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
+ const fir::ExtendedValue &source) {
+ const auto *charBox = source.getCharBox();
+ if (!charBox)
+ fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
+ auto len = charBox->getLen();
+ auto sourceTy = charBox->getBuffer().getType();
+ auto temp = createCharacterTemp(sourceTy, len);
+ if (fir::isa_ref_type(sourceTy)) {
+ createCopy(temp, *charBox, len);
+ } else {
+ auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
+ temp.getBuffer());
+ builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
+ }
+ return temp;
+}
+
+// Simple length one character assignment without loops.
+void fir::factory::CharacterExprHelper::createLengthOneAssign(
+ const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
+ auto addr = lhs.getBuffer();
+ mlir::Value val = builder.create<fir::LoadOp>(loc, rhs.getBuffer());
+ auto addrTy = builder.getRefType(val.getType());
+ addr = builder.createConvert(loc, addrTy, addr);
+ builder.create<fir::StoreOp>(loc, val, addr);
+}
+
+/// Returns the minimum of integer mlir::Value \p a and \b.
+mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value a, mlir::Value b) {
+ auto cmp =
+ builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, b);
+ return builder.create<mlir::SelectOp>(loc, cmp, a, b);
+}
+
+void fir::factory::CharacterExprHelper::createAssign(
+ const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
+ auto rhsCstLen = getCompileTimeLength(rhs);
+ auto lhsCstLen = getCompileTimeLength(lhs);
+ bool compileTimeSameLength =
+ lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen;
+
+ if (compileTimeSameLength && *lhsCstLen == 1) {
+ createLengthOneAssign(lhs, rhs);
+ return;
+ }
+
+ // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
+ // if needed.
+ auto copyCount = lhs.getLen();
+ auto idxTy = builder.getIndexType();
+ if (!compileTimeSameLength) {
+ auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
+ auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
+ copyCount = genMin(builder, loc, lhsLen, rhsLen);
+ }
+
+ // Actual copy
+ createCopy(lhs, rhs, copyCount);
+
+ // Pad if needed.
+ if (!compileTimeSameLength) {
+ auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
+ auto maxPadding = builder.create<arith::SubIOp>(loc, lhs.getLen(), one);
+ createPadding(lhs, copyCount, maxPadding);
+ }
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
+ const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
+ auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
+ lhs.getLen());
+ auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
+ rhs.getLen());
+ mlir::Value len = builder.create<arith::AddIOp>(loc, lhsLen, rhsLen);
+ auto temp = createCharacterTemp(getCharacterType(rhs), len);
+ createCopy(temp, lhs, lhsLen);
+ auto one = builder.createIntegerConstant(loc, len.getType(), 1);
+ auto upperBound = builder.create<arith::SubIOp>(loc, len, one);
+ auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
+ auto fromBuff = getCharBoxBuffer(rhs);
+ auto toBuff = getCharBoxBuffer(temp);
+ fir::factory::DoLoopHelper{builder, loc}.createLoop(
+ lhsLenIdx, upperBound, one,
+ [&](fir::FirOpBuilder &bldr, mlir::Value index) {
+ auto rhsIndex = bldr.create<arith::SubIOp>(loc, index, lhsLenIdx);
+ auto charVal = createLoadCharAt(fromBuff, rhsIndex);
+ createStoreCharAt(toBuff, index, charVal);
+ });
+ return temp;
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
+ const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
+ // Constant need to be materialize in memory to use fir.coordinate_of.
+ auto nbounds = bounds.size();
+ if (nbounds < 1 || nbounds > 2) {
+ mlir::emitError(loc, "Incorrect number of bounds in substring");
+ return {mlir::Value{}, mlir::Value{}};
+ }
+ mlir::SmallVector<mlir::Value> castBounds;
+ // Convert bounds to length type to do safe arithmetic on it.
+ for (auto bound : bounds)
+ castBounds.push_back(
+ builder.createConvert(loc, builder.getCharacterLengthType(), bound));
+ auto lowerBound = castBounds[0];
+ // FIR CoordinateOp is zero based but Fortran substring are one based.
+ auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
+ auto offset = builder.create<arith::SubIOp>(loc, lowerBound, one).getResult();
+ auto addr = createElementAddr(box.getBuffer(), offset);
+ auto kind = getCharacterKind(box.getBuffer().getType());
+ auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
+ auto resultType = builder.getRefType(charTy);
+ auto substringRef = builder.createConvert(loc, resultType, addr);
+
+ // Compute the length.
+ mlir::Value substringLen;
+ if (nbounds < 2) {
+ substringLen =
+ builder.create<arith::SubIOp>(loc, box.getLen(), castBounds[0]);
+ } else {
+ substringLen =
+ builder.create<arith::SubIOp>(loc, castBounds[1], castBounds[0]);
+ }
+ substringLen = builder.create<arith::AddIOp>(loc, substringLen, one);
+
+ // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
+ auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
+ auto cdt = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
+ substringLen, zero);
+ substringLen = builder.create<mlir::SelectOp>(loc, cdt, zero, substringLen);
+
+ return {substringRef, substringLen};
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
+ // Note: Runtime for LEN_TRIM should also be available at some
+ // point. For now use an inlined implementation.
+ auto indexType = builder.getIndexType();
+ auto len = builder.createConvert(loc, indexType, str.getLen());
+ auto one = builder.createIntegerConstant(loc, indexType, 1);
+ auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
+ auto zero = builder.createIntegerConstant(loc, indexType, 0);
+ auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
+ auto blank = createBlankConstantCode(getCharacterType(str));
+ mlir::Value lastChar = builder.create<arith::SubIOp>(loc, len, one);
+
+ auto iterWhile =
+ builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
+ /*returnFinalCount=*/false, lastChar);
+ auto insPt = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(iterWhile.getBody());
+ auto index = iterWhile.getInductionVar();
+ // Look for first non-blank from the right of the character.
+ auto fromBuff = getCharBoxBuffer(str);
+ auto elemAddr = createElementAddr(fromBuff, index);
+ auto codeAddr =
+ builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
+ auto c = builder.create<fir::LoadOp>(loc, codeAddr);
+ auto isBlank =
+ builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, blank, c);
+ llvm::SmallVector<mlir::Value> results = {isBlank, index};
+ builder.create<fir::ResultOp>(loc, results);
+ builder.restoreInsertionPoint(insPt);
+ // Compute length after iteration (zero if all blanks)
+ mlir::Value newLen =
+ builder.create<arith::AddIOp>(loc, iterWhile.getResult(1), one);
+ auto result =
+ builder.create<mlir::SelectOp>(loc, iterWhile.getResult(0), zero, newLen);
+ return builder.createConvert(loc, builder.getCharacterLengthType(), result);
+}
+
+fir::CharBoxValue
+fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
+ int len) {
+ assert(len >= 0 && "expected positive length");
+ auto kind = recoverCharacterType(type).getFKind();
+ auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
+ auto addr = builder.create<fir::AllocaOp>(loc, charType);
+ auto mlirLen =
+ builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
+ return {addr, mlirLen};
+}
+
+// Returns integer with code for blank. The integer has the same
+// size as the character. Blank has ascii space code for all kinds.
+mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
+ fir::CharacterType type) {
+ auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
+ auto intType = builder.getIntegerType(bits);
+ return builder.createIntegerConstant(loc, intType, ' ');
+}
+
+mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
+ fir::CharacterType type) {
+ return createSingletonFromCode(createBlankConstantCode(type),
+ type.getFKind());
+}
+
+void fir::factory::CharacterExprHelper::createAssign(
+ const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
+ if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
+ if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
+ createAssign(*to, *str);
+ return;
+ }
+ }
+ TODO(loc, "character array assignment");
+ // Note that it is not sure the array aspect should be handled
+ // by this utility.
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
+ mlir::Value len) {
+ return createEmbox(fir::CharBoxValue{addr, len});
+}
+
+std::pair<mlir::Value, mlir::Value>
+fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
+ using T = std::pair<mlir::Value, mlir::Value>;
+ return toExtendedValue(boxChar).match(
+ [](const fir::CharBoxValue &b) -> T {
+ return {b.getBuffer(), b.getLen()};
+ },
+ [](const fir::CharArrayBoxValue &b) -> T {
+ return {b.getBuffer(), b.getLen()};
+ },
+ [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
+}
+
+bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
+ if (auto seqType = type.dyn_cast<fir::SequenceType>())
+ return (seqType.getShape().size() == 1) &&
+ fir::isa_char(seqType.getEleTy());
+ return false;
+}
+
+bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
+ if (type.isa<fir::BoxCharType>())
+ return true;
+ type = fir::unwrapRefType(type);
+ if (auto boxTy = type.dyn_cast<fir::BoxType>())
+ type = boxTy.getEleTy();
+ type = fir::unwrapRefType(type);
+ return !type.isa<fir::SequenceType>() && fir::isa_char(type);
+}
+
+fir::KindTy
+fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
+ assert(isCharacterScalar(type) && "expected scalar character");
+ return recoverCharacterType(type).getFKind();
+}
+
+fir::KindTy
+fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
+ return recoverCharacterType(type).getFKind();
+}
+
+bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
+ return !isCharacterScalar(type);
+}
+
+bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
+ const fir::ExtendedValue &exv) {
+ auto charTy = recoverCharacterType(fir::getBase(exv).getType());
+ return charTy.hasConstantLen();
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
+ int kind) {
+ auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
+ auto bits = builder.getKindMap().getCharacterBitsize(kind);
+ auto intType = builder.getIntegerType(bits);
+ auto cast = builder.createConvert(loc, intType, code);
+ auto undef = builder.create<fir::UndefOp>(loc, charType);
+ auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+ return builder.create<fir::InsertValueOp>(loc, charType, undef, cast, zero);
+}
+
+mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
+ mlir::Value singleton) {
+ auto type = getCharacterType(singleton);
+ assert(type.getLen() == 1);
+ auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
+ auto intType = builder.getIntegerType(bits);
+ auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+ return builder.create<fir::ExtractValueOp>(loc, intType, singleton, zero);
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
+ auto lenTy = builder.getCharacterLengthType();
+ auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
+ auto charTy = recoverCharacterType(box.getType());
+ auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
+ auto width = bits / 8;
+ if (width > 1) {
+ auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
+ return builder.create<arith::DivSIOp>(loc, size, widthVal);
+ }
+ return size;
+}
+
+mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
+ auto memrefType = memref.getType();
+ auto charType = recoverCharacterType(memrefType);
+ assert(charType && "must be a character type");
+ if (charType.hasConstantLen())
+ return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
+ charType.getLen());
+ if (memrefType.isa<fir::BoxType>())
+ return readLengthFromBox(memref);
+ if (memrefType.isa<fir::BoxCharType>())
+ return createUnboxChar(memref).second;
+
+ // Length cannot be deduced from memref.
+ return {};
+}