def LLVM_UMinOp : LLVM_BinarySameArgsIntrOpI<"umin">;
class LLVM_MemcpyIntrOpBase<string name> :
- LLVM_ZeroResultIntrOp<name, [0, 1, 2], [], /*requiresAccessGroup=*/1,
- /*requiresAliasAnalysis=*/1> {
+ LLVM_ZeroResultIntrOp<name, [0, 1, 2],
+ [DeclareOpInterfaceMethods<PromotableMemOpInterface>,
+ DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>,
+ DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>],
+ /*requiresAccessGroup=*/1, /*requiresAliasAnalysis=*/1> {
dag args = (ins Arg<LLVM_AnyPointer,"",[MemWrite]>:$dst,
Arg<LLVM_AnyPointer,"",[MemRead]>:$src,
AnySignlessInteger:$len, I1Attr:$isVolatile);
def LLVM_MemmoveOp : LLVM_MemcpyIntrOpBase<"memmove">;
def LLVM_MemcpyInlineOp :
- LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1], [],
- /*requiresAccessGroup=*/1,
- /*requiresAliasAnalysis=*/1> {
+ LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1],
+ [DeclareOpInterfaceMethods<PromotableMemOpInterface>,
+ DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>,
+ DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>],
+ /*requiresAccessGroup=*/1, /*requiresAliasAnalysis=*/1> {
dag args = (ins Arg<LLVM_AnyPointer,"",[MemWrite]>:$dst,
Arg<LLVM_AnyPointer,"",[MemRead]>:$src,
APIntAttr:$len, I1Attr:$isVolatile);
#include "mlir/IR/ValueRange.h"
#include "mlir/Interfaces/DataLayoutInterfaces.h"
#include "mlir/Interfaces/MemorySlotInterfaces.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
using namespace mlir;
}
//===----------------------------------------------------------------------===//
-// Interfaces for memset
+// Utilities for memory intrinsics
//===----------------------------------------------------------------------===//
-/// Returns the length of the given memset in bytes if it can be known at
-/// compile-time on a best-effort basis, nothing otherwise.
-static std::optional<uint64_t> getStaticMemsetLen(LLVM::MemsetOp op) {
- IntegerAttr::ValueType memsetLen;
- if (!matchPattern(op.getLen(), m_ConstantInt(&memsetLen)))
- return {};
+namespace {
- if (memsetLen.getBitWidth() > 64)
+/// Returns the length of the given memory intrinsic in bytes if it can be known
+/// at compile-time on a best-effort basis, nothing otherwise.
+template <class MemIntr>
+std::optional<uint64_t> getStaticMemIntrLen(MemIntr op) {
+ APInt memIntrLen;
+ if (!matchPattern(op.getLen(), m_ConstantInt(&memIntrLen)))
+ return {};
+ if (memIntrLen.getBitWidth() > 64)
return {};
+ return memIntrLen.getZExtValue();
+}
- return memsetLen.getZExtValue();
+/// Returns the length of the given memory intrinsic in bytes if it can be known
+/// at compile-time on a best-effort basis, nothing otherwise.
+/// Because MemcpyInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
+template <>
+std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemcpyInlineOp op) {
+ APInt memIntrLen = op.getLen();
+ if (memIntrLen.getBitWidth() > 64)
+ return {};
+ return memIntrLen.getZExtValue();
}
-/// Returns whether one can be sure the memset does not write outside of the
-/// bounds of the given slot, on a best-effort basis.
-static bool definitelyWritesOnlyWithinSlot(LLVM::MemsetOp op,
- const MemorySlot &slot,
+} // namespace
+
+/// Returns whether one can be sure the memory intrinsic does not write outside
+/// of the bounds of the given slot, on a best-effort basis.
+template <class MemIntr>
+static bool definitelyWritesOnlyWithinSlot(MemIntr op, const MemorySlot &slot,
DataLayout &dataLayout) {
if (!isa<LLVM::LLVMPointerType>(slot.ptr.getType()) ||
op.getDst() != slot.ptr)
return false;
- std::optional<uint64_t> memsetLen = getStaticMemsetLen(op);
- return memsetLen && *memsetLen <= dataLayout.getTypeSize(slot.elemType);
+ std::optional<uint64_t> memIntrLen = getStaticMemIntrLen(op);
+ return memIntrLen && *memIntrLen <= dataLayout.getTypeSize(slot.elemType);
+}
+
+/// Checks whether all indices are i32. This is used to check GEPs can index
+/// into them.
+static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
+ Type i32 = IntegerType::get(slot.ptr.getContext(), 32);
+ return llvm::all_of(llvm::make_first_range(slot.elementPtrs),
+ [&](Attribute index) {
+ auto intIndex = dyn_cast<IntegerAttr>(index);
+ return intIndex && intIndex.getType() == i32;
+ });
}
+//===----------------------------------------------------------------------===//
+// Interfaces for memset
+//===----------------------------------------------------------------------===//
+
bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
assert(intType.getWidth() % 8 == 0);
- // Build the memset integer by repeatedly shifting the value and or-ing
- // it with the previous value.
+ // Build the memset integer by repeatedly shifting the value and
+ // or-ing it with the previous value.
uint64_t coveredBits = 8;
Value currentValue =
rewriter.create<LLVM::ZExtOp>(getLoc(), intType, getVal());
return false;
DataLayout layout = DataLayout::closest(*this);
- return getStaticMemsetLen(*this) == layout.getTypeSize(slot.elemType);
+ return getStaticMemIntrLen(*this) == layout.getTypeSize(slot.elemType);
}
DeletionKind LLVM::MemsetOp::removeBlockingUses(
.getSubelementIndexMap())
return false;
+ if (!areAllIndicesI32(slot))
+ return false;
+
DataLayout dataLayout = DataLayout::closest(*this);
return definitelyWritesOnlyWithinSlot(*this, slot, dataLayout);
}
}
//===----------------------------------------------------------------------===//
+// Interfaces for memcpy/memmove
+//===----------------------------------------------------------------------===//
+
+template <class MemcpyLike>
+static bool memcpyLoadsFrom(MemcpyLike op, const MemorySlot &slot) {
+ return op.getSrc() == slot.ptr;
+}
+
+template <class MemcpyLike>
+static bool memcpyStoresTo(MemcpyLike op, const MemorySlot &slot) {
+ return op.getDst() == slot.ptr;
+}
+
+template <class MemcpyLike>
+static Value memcpyGetStored(MemcpyLike op, const MemorySlot &slot,
+ RewriterBase &rewriter) {
+ return rewriter.create<LLVM::LoadOp>(op.getLoc(), slot.elemType, op.getSrc());
+}
+
+template <class MemcpyLike>
+static bool
+memcpyCanUsesBeRemoved(MemcpyLike op, const MemorySlot &slot,
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ // If source and destination are the same, memcpy behavior is undefined and
+ // memmove is a no-op. Because there is no memory change happening here,
+ // simplifying such operations is left to canonicalization.
+ if (op.getDst() == op.getSrc())
+ return false;
+
+ if (op.getIsVolatile())
+ return false;
+
+ DataLayout layout = DataLayout::closest(op);
+ return getStaticMemIntrLen(op) == layout.getTypeSize(slot.elemType);
+}
+
+template <class MemcpyLike>
+static DeletionKind
+memcpyRemoveBlockingUses(MemcpyLike op, const MemorySlot &slot,
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ RewriterBase &rewriter, Value reachingDefinition) {
+ if (op.loadsFrom(slot))
+ rewriter.create<LLVM::StoreOp>(op.getLoc(), reachingDefinition,
+ op.getDst());
+ return DeletionKind::Delete;
+}
+
+template <class MemcpyLike>
+static LogicalResult
+memcpyEnsureOnlySafeAccesses(MemcpyLike op, const MemorySlot &slot,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ DataLayout dataLayout = DataLayout::closest(op);
+ // While rewiring memcpy-like intrinsics only supports full copies, partial
+ // copies are still safe accesses so it is enough to only check for writes
+ // within bounds.
+ return success(definitelyWritesOnlyWithinSlot(op, slot, dataLayout));
+}
+
+template <class MemcpyLike>
+static bool memcpyCanRewire(MemcpyLike op, const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ if (op.getIsVolatile())
+ return false;
+
+ if (!slot.elemType.cast<DestructurableTypeInterface>()
+ .getSubelementIndexMap())
+ return false;
+
+ if (!areAllIndicesI32(slot))
+ return false;
+
+ // Only full copies are supported.
+ DataLayout dataLayout = DataLayout::closest(op);
+ if (getStaticMemIntrLen(op) != dataLayout.getTypeSize(slot.elemType))
+ return false;
+
+ if (op.getSrc() == slot.ptr)
+ for (Attribute index : llvm::make_first_range(slot.elementPtrs))
+ usedIndices.insert(index);
+
+ return true;
+}
+
+namespace {
+
+template <class MemcpyLike>
+void createMemcpyLikeToReplace(RewriterBase &rewriter, const DataLayout &layout,
+ MemcpyLike toReplace, Value dst, Value src,
+ Type toCpy, bool isVolatile) {
+ Value memcpySize = rewriter.create<LLVM::ConstantOp>(
+ toReplace.getLoc(), IntegerAttr::get(toReplace.getLen().getType(),
+ layout.getTypeSize(toCpy)));
+ rewriter.create<MemcpyLike>(toReplace.getLoc(), dst, src, memcpySize,
+ isVolatile);
+}
+
+template <>
+void createMemcpyLikeToReplace(RewriterBase &rewriter, const DataLayout &layout,
+ LLVM::MemcpyInlineOp toReplace, Value dst,
+ Value src, Type toCpy, bool isVolatile) {
+ Type lenType = IntegerType::get(toReplace->getContext(),
+ toReplace.getLen().getBitWidth());
+ rewriter.create<LLVM::MemcpyInlineOp>(
+ toReplace.getLoc(), dst, src,
+ IntegerAttr::get(lenType, layout.getTypeSize(toCpy)), isVolatile);
+}
+
+} // namespace
+
+/// Rewires a memcpy-like operation. Only copies to or from the full slot are
+/// supported.
+template <class MemcpyLike>
+static DeletionKind memcpyRewire(MemcpyLike op,
+ const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ RewriterBase &rewriter) {
+ if (subslots.empty())
+ return DeletionKind::Delete;
+
+ DataLayout layout = DataLayout::closest(op);
+
+ assert((slot.ptr == op.getDst()) != (slot.ptr == op.getSrc()));
+ bool isDst = slot.ptr == op.getDst();
+
+#ifndef NDEBUG
+ size_t slotsTreated = 0;
+#endif
+
+ // It was previously checked that index types are consistent, so this type can
+ // be fetched now.
+ Type indexType = cast<IntegerAttr>(subslots.begin()->first).getType();
+ for (size_t i = 0, e = slot.elementPtrs.size(); i != e; i++) {
+ Attribute index = IntegerAttr::get(indexType, i);
+ if (!subslots.contains(index))
+ continue;
+ const MemorySlot &subslot = subslots.at(index);
+
+#ifndef NDEBUG
+ slotsTreated++;
+#endif
+
+ // First get a pointer to the equivalent of this subslot from the source
+ // pointer.
+ SmallVector<LLVM::GEPArg> gepIndices{
+ 0, static_cast<int32_t>(
+ cast<IntegerAttr>(index).getValue().getZExtValue())};
+ Value subslotPtrInOther = rewriter.create<LLVM::GEPOp>(
+ op.getLoc(), LLVM::LLVMPointerType::get(op.getContext()), slot.elemType,
+ isDst ? op.getSrc() : op.getDst(), gepIndices);
+
+ // Then create a new memcpy out of this source pointer.
+ createMemcpyLikeToReplace(rewriter, layout, op,
+ isDst ? subslot.ptr : subslotPtrInOther,
+ isDst ? subslotPtrInOther : subslot.ptr,
+ subslot.elemType, op.getIsVolatile());
+ }
+
+ assert(subslots.size() == slotsTreated);
+
+ return DeletionKind::Delete;
+}
+
+bool LLVM::MemcpyOp::loadsFrom(const MemorySlot &slot) {
+ return memcpyLoadsFrom(*this, slot);
+}
+
+bool LLVM::MemcpyOp::storesTo(const MemorySlot &slot) {
+ return memcpyStoresTo(*this, slot);
+}
+
+Value LLVM::MemcpyOp::getStored(const MemorySlot &slot,
+ RewriterBase &rewriter) {
+ return memcpyGetStored(*this, slot, rewriter);
+}
+
+bool LLVM::MemcpyOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return memcpyCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses);
+}
+
+DeletionKind LLVM::MemcpyOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ RewriterBase &rewriter, Value reachingDefinition) {
+ return memcpyRemoveBlockingUses(*this, slot, blockingUses, rewriter,
+ reachingDefinition);
+}
+
+LogicalResult LLVM::MemcpyOp::ensureOnlySafeAccesses(
+ const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ return memcpyEnsureOnlySafeAccesses(*this, slot, mustBeSafelyUsed);
+}
+
+bool LLVM::MemcpyOp::canRewire(const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ return memcpyCanRewire(*this, slot, usedIndices, mustBeSafelyUsed);
+}
+
+DeletionKind LLVM::MemcpyOp::rewire(const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ RewriterBase &rewriter) {
+ return memcpyRewire(*this, slot, subslots, rewriter);
+}
+
+bool LLVM::MemcpyInlineOp::loadsFrom(const MemorySlot &slot) {
+ return memcpyLoadsFrom(*this, slot);
+}
+
+bool LLVM::MemcpyInlineOp::storesTo(const MemorySlot &slot) {
+ return memcpyStoresTo(*this, slot);
+}
+
+Value LLVM::MemcpyInlineOp::getStored(const MemorySlot &slot,
+ RewriterBase &rewriter) {
+ return memcpyGetStored(*this, slot, rewriter);
+}
+
+bool LLVM::MemcpyInlineOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return memcpyCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses);
+}
+
+DeletionKind LLVM::MemcpyInlineOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ RewriterBase &rewriter, Value reachingDefinition) {
+ return memcpyRemoveBlockingUses(*this, slot, blockingUses, rewriter,
+ reachingDefinition);
+}
+
+LogicalResult LLVM::MemcpyInlineOp::ensureOnlySafeAccesses(
+ const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ return memcpyEnsureOnlySafeAccesses(*this, slot, mustBeSafelyUsed);
+}
+
+bool LLVM::MemcpyInlineOp::canRewire(
+ const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ return memcpyCanRewire(*this, slot, usedIndices, mustBeSafelyUsed);
+}
+
+DeletionKind
+LLVM::MemcpyInlineOp::rewire(const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ RewriterBase &rewriter) {
+ return memcpyRewire(*this, slot, subslots, rewriter);
+}
+
+bool LLVM::MemmoveOp::loadsFrom(const MemorySlot &slot) {
+ return memcpyLoadsFrom(*this, slot);
+}
+
+bool LLVM::MemmoveOp::storesTo(const MemorySlot &slot) {
+ return memcpyStoresTo(*this, slot);
+}
+
+Value LLVM::MemmoveOp::getStored(const MemorySlot &slot,
+ RewriterBase &rewriter) {
+ return memcpyGetStored(*this, slot, rewriter);
+}
+
+bool LLVM::MemmoveOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return memcpyCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses);
+}
+
+DeletionKind LLVM::MemmoveOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ RewriterBase &rewriter, Value reachingDefinition) {
+ return memcpyRemoveBlockingUses(*this, slot, blockingUses, rewriter,
+ reachingDefinition);
+}
+
+LogicalResult LLVM::MemmoveOp::ensureOnlySafeAccesses(
+ const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ return memcpyEnsureOnlySafeAccesses(*this, slot, mustBeSafelyUsed);
+}
+
+bool LLVM::MemmoveOp::canRewire(const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed) {
+ return memcpyCanRewire(*this, slot, usedIndices, mustBeSafelyUsed);
+}
+
+DeletionKind LLVM::MemmoveOp::rewire(const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ RewriterBase &rewriter) {
+ return memcpyRewire(*this, slot, subslots, rewriter);
+}
+
+//===----------------------------------------------------------------------===//
// Interfaces for destructurable types
//===----------------------------------------------------------------------===//
%2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i10
llvm.return %2 : i10
}
+
+// -----
+
+// CHECK-LABEL: llvm.func @basic_memcpy
+// CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
+llvm.func @basic_memcpy(%source: !llvm.ptr) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %is_volatile = llvm.mlir.constant(false) : i1
+ %memcpy_len = llvm.mlir.constant(4 : i32) : i32
+ "llvm.intr.memcpy"(%1, %source, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ // CHECK-NOT: "llvm.intr.memcpy"
+ // CHECK: %[[LOADED:.*]] = llvm.load %[[SOURCE]] : !llvm.ptr -> i32
+ // CHECK-NOT: "llvm.intr.memcpy"
+ %2 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[LOADED]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @basic_memcpy_dest
+// CHECK-SAME: (%[[DESTINATION:.*]]: !llvm.ptr)
+llvm.func @basic_memcpy_dest(%destination: !llvm.ptr) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[DATA:.*]] = llvm.mlir.constant(42 : i32) : i32
+ %data = llvm.mlir.constant(42 : i32) : i32
+ %is_volatile = llvm.mlir.constant(false) : i1
+ %memcpy_len = llvm.mlir.constant(4 : i32) : i32
+
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ llvm.store %data, %1 : i32, !llvm.ptr
+ "llvm.intr.memcpy"(%destination, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ // CHECK-NOT: "llvm.intr.memcpy"
+ // CHECK: llvm.store %[[DATA]], %[[DESTINATION]] : i32, !llvm.ptr
+ // CHECK-NOT: "llvm.intr.memcpy"
+
+ %2 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[DATA]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @double_memcpy
+llvm.func @double_memcpy() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NEXT: %[[DATA:.*]] = llvm.mlir.constant(42 : i32) : i32
+ %data = llvm.mlir.constant(42 : i32) : i32
+ %is_volatile = llvm.mlir.constant(false) : i1
+ %memcpy_len = llvm.mlir.constant(4 : i32) : i32
+
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ llvm.store %data, %1 : i32, !llvm.ptr
+ "llvm.intr.memcpy"(%2, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+
+ %res = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK-NEXT: llvm.return %[[DATA]] : i32
+ llvm.return %res : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_self_memcpy
+llvm.func @ignore_self_memcpy() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %is_volatile = llvm.mlir.constant(false) : i1
+ %memcpy_len = llvm.mlir.constant(4 : i32) : i32
+
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[ALLOCA]]
+ "llvm.intr.memcpy"(%1, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+
+ %res = llvm.load %1 : !llvm.ptr -> i32
+ llvm.return %res : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_partial_memcpy
+// CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
+llvm.func @ignore_partial_memcpy(%source: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %is_volatile = llvm.mlir.constant(false) : i1
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(2 : i32) : i32
+ %memcpy_len = llvm.mlir.constant(2 : i32) : i32
+
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ "llvm.intr.memcpy"(%1, %source, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+
+ %res = llvm.load %1 : !llvm.ptr -> i32
+ llvm.return %res : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_volatile_memcpy
+// CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
+llvm.func @ignore_volatile_memcpy(%source: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %is_volatile = llvm.mlir.constant(false) : i1
+ %memcpy_len = llvm.mlir.constant(4 : i32) : i32
+
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[MEMCPY_LEN]]) <{isVolatile = true}>
+ "llvm.intr.memcpy"(%1, %source, %memcpy_len) <{isVolatile = true}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+
+ %res = llvm.load %1 : !llvm.ptr -> i32
+ llvm.return %res : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @basic_memmove
+// CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
+llvm.func @basic_memmove(%source: !llvm.ptr) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %is_volatile = llvm.mlir.constant(false) : i1
+ %memmove_len = llvm.mlir.constant(4 : i32) : i32
+ "llvm.intr.memmove"(%1, %source, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ // CHECK-NOT: "llvm.intr.memmove"
+ // CHECK: %[[LOADED:.*]] = llvm.load %[[SOURCE]] : !llvm.ptr -> i32
+ // CHECK-NOT: "llvm.intr.memmove"
+ %2 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[LOADED]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @basic_memcpy_inline
+// CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
+llvm.func @basic_memcpy_inline(%source: !llvm.ptr) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %is_volatile = llvm.mlir.constant(false) : i1
+ "llvm.intr.memcpy.inline"(%1, %source) <{isVolatile = false, len = 4 : i32}> : (!llvm.ptr, !llvm.ptr) -> ()
+ // CHECK-NOT: "llvm.intr.memcpy.inline"
+ // CHECK: %[[LOADED:.*]] = llvm.load %[[SOURCE]] : !llvm.ptr -> i32
+ // CHECK-NOT: "llvm.intr.memcpy.inline"
+ %2 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[LOADED]] : i32
+ llvm.return %2 : i32
+}
%7 = llvm.add %3, %6 : i32
llvm.return %7 : i32
}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_dest
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
+ %memcpy_len = llvm.mlir.constant(40 : i32) : i32
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_src
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
+ %memcpy_len = llvm.mlir.constant(16 : i32) : i32
+ // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
+ // We can only check that the amount of operations and allocated slots is correct, which should be sufficient
+ // as unused slots are not generated.
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ "llvm.intr.memcpy"(%other_array, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_double
+llvm.func @memcpy_double() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-COUNT-2: = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %1 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
+ %2 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
+ %memcpy_len = llvm.mlir.constant(4 : i32) : i32
+ // CHECK-NOT: "llvm.intr.memcpy"
+ // CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK-NOT: "llvm.intr.memcpy"
+ "llvm.intr.memcpy"(%1, %2, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %3 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<1 x i32>
+ %4 = llvm.load %3 : !llvm.ptr -> i32
+ llvm.return %4 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_no_partial
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memcpy_no_partial(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(21 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
+ %memcpy_len = llvm.mlir.constant(21 : i32) : i32
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[OTHER_ARRAY]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_no_volatile
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memcpy_no_volatile(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+ // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(40 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
+ %memcpy_len = llvm.mlir.constant(40 : i32) : i32
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[OTHER_ARRAY]], %[[MEMCPY_LEN]]) <{isVolatile = true}>
+ "llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = true}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memmove_dest
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK-DAG: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
+ %memmove_len = llvm.mlir.constant(40 : i32) : i32
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ // CHECK: "llvm.intr.memmove"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ "llvm.intr.memmove"(%1, %other_array, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memmove_src
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memmove_src(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK-DAG: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
+ %memmove_len = llvm.mlir.constant(16 : i32) : i32
+ // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
+ // We can only check that the amount of operations and allocated slots is correct, which should be sufficient
+ // as unused slots are not generated.
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ "llvm.intr.memmove"(%other_array, %1, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_inline_dest
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memcpy_inline_dest(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ // CHECK: "llvm.intr.memcpy.inline"(%[[ALLOCA]], %[[SLOT_IN_OTHER]]) <{isVolatile = false, len = 4 : i32}>
+ "llvm.intr.memcpy.inline"(%1, %other_array) <{isVolatile = false, len = 40 : i32}> : (!llvm.ptr, !llvm.ptr) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @memcpy_inline_src
+// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
+llvm.func @memcpy_inline_src(%other_array: !llvm.ptr) -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
+ // Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
+ // We can only check that the amount of operations and allocated slots is correct, which should be sufficient
+ // as unused slots are not generated.
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
+ // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK-DAG: "llvm.intr.memcpy.inline"(%[[SLOT_IN_OTHER]], %{{.*}}) <{isVolatile = false, len = 4 : i32}>
+ "llvm.intr.memcpy.inline"(%other_array, %1) <{isVolatile = false, len = 16 : i32}> : (!llvm.ptr, !llvm.ptr) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}