This is a fix for https://bugs.llvm.org/show_bug.cgi?id=39282. Compared to D90104, this version is based on part of the full restrict patched (D68484) and uses the `@llvm.experimental.noalias.scope.decl` intrinsic to track the location where !noalias and !alias.scope scopes have been introduced. This allows us to only duplicate the scopes that are really needed.
Notes:
- it also includes changes and tests from D90104
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D92887
return nullptr;
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
+ StringRef getName() const {
+ if (Node->getNumOperands() > 2)
+ if (MDString *N = dyn_cast_or_null<MDString>(Node->getOperand(2)))
+ return N->getString();
+ return StringRef();
+ }
};
/// Typed iterator through MDNode operands.
Function *Callee, int64_t entryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
+/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
+/// basic blocks and extract their scope. These are candidates for duplication
+/// when cloning.
+void identifyNoAliasScopesToClone(
+ ArrayRef<BasicBlock *> BBs,
+ SmallVectorImpl<MetadataAsValue *> &NoAliasDeclScopes);
+
+/// Duplicate the specified list of noalias decl scopes.
+/// The 'Ext' string is added as an extension to the name.
+/// Afterwards, the ClonedMVScopes contains a mapping of the original MV onto
+/// the cloned version.
+/// The ClonedScopes contains the mapping of the original scope MDNode onto the
+/// cloned scope.
+/// Be aware that the cloned scopes are still part of the original scope domain.
+void cloneNoAliasScopes(
+ ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
+ StringRef Ext, LLVMContext &Context);
+
+/// Adapt the metadata for the specified instruction according to the
+/// provided mapping. This is normally used after cloning an instruction, when
+/// some noalias scopes needed to be cloned.
+void adaptNoAliasScopes(
+ llvm::Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ const DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
+ LLVMContext &Context);
+
+/// Clone the specified noalias decl scopes. Then adapt all instructions in the
+/// NewBlocks basicblocks to the cloned versions.
+/// 'Ext' will be added to the duplicate scope names.
+void cloneAndAdaptNoAliasScopes(ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks,
+ LLVMContext &Context, StringRef Ext);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <map>
using namespace llvm;
+#define DEBUG_TYPE "clone-function"
+
/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
return NewBB;
}
+
+void llvm::cloneNoAliasScopes(
+ ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
+ StringRef Ext, LLVMContext &Context) {
+ MDBuilder MDB(Context);
+
+ for (auto *MV : NoAliasDeclScopes) {
+ SmallVector<Metadata *, 4> ScopeList;
+ for (auto &MDOperand : cast<MDNode>(MV->getMetadata())->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
+ AliasScopeNode SNANode(MD);
+
+ std::string Name;
+ auto ScopeName = SNANode.getName();
+ if (!ScopeName.empty())
+ Name = (Twine(ScopeName) + ":" + Ext).str();
+ else
+ Name = std::string(Ext);
+
+ MDNode *NewScope = MDB.createAnonymousAliasScope(
+ const_cast<MDNode *>(SNANode.getDomain()), Name);
+ ClonedScopes.insert(std::make_pair(MD, NewScope));
+ ScopeList.push_back(NewScope);
+ }
+ }
+ MDNode *NewScopeList = MDNode::get(Context, ScopeList);
+ ClonedMVScopes.insert(
+ std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList)));
+ }
+}
+
+void llvm::adaptNoAliasScopes(
+ Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ const DenseMap<MetadataAsValue *, MetadataAsValue *> &ClonedMVScopes,
+ LLVMContext &Context) {
+ // MetadataAsValue will always be replaced !
+ for (Use &U : I->operands())
+ if (MetadataAsValue *MV = dyn_cast<MetadataAsValue>(U))
+ if (auto *NewMV = ClonedMVScopes.lookup(MV))
+ U.set(NewMV);
+
+ auto replaceWhenNeeded = [&](unsigned MD_ID) {
+ if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) {
+ bool NeedsReplacement = false;
+ SmallVector<Metadata *, 8> NewScopeList;
+ for (auto &MDOp : CSNoAlias->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
+ if (auto *NewMD = ClonedScopes.lookup(MD)) {
+ NewScopeList.push_back(NewMD);
+ NeedsReplacement = true;
+ continue;
+ }
+ NewScopeList.push_back(MD);
+ }
+ }
+ if (NeedsReplacement)
+ I->setMetadata(MD_ID, MDNode::get(Context, NewScopeList));
+ }
+ };
+ replaceWhenNeeded(LLVMContext::MD_noalias);
+ replaceWhenNeeded(LLVMContext::MD_alias_scope);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(
+ ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ DenseMap<MetadataAsValue *, MetadataAsValue *> ClonedMVScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext,
+ Context);
+ // Identify instructions using metadata that needs adaptation
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context);
+}
+
+void llvm::identifyNoAliasScopesToClone(
+ ArrayRef<BasicBlock *> BBs,
+ SmallVectorImpl<MetadataAsValue *> &NoAliasDeclScopes) {
+ for (BasicBlock *BB : BBs)
+ for (Instruction &I : *BB)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclScopes.push_back(cast<MetadataAsValue>(
+ Decl->getOperand(Intrinsic::NoAliasScopeDeclScopeArg)));
+}
<< DIL->getFilename() << " Line: " << DIL->getLine());
}
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MetadataAsValue *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
AC->registerAssumption(II);
}
}
+
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string ext = (Twine("It") + Twine(It)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), ext);
+ }
}
// Loop over the PHI nodes in the original block, setting incoming values.
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-unroll -unroll-count=4 < %s | FileCheck %s
+
+define void @test_inside(i32* %addr1, i32* %addr2) {
+; CHECK-LABEL: @test_inside(
+; CHECK-NEXT: start:
+; CHECK-NEXT: br label [[BODY:%.*]]
+; CHECK: body:
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0)
+; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0
+; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0
+; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
+; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3)
+; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3
+; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !3
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !5
+; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !5
+; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
+; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !7)
+; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !7
+; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !7
+; CHECK-NEXT: ret void
+;
+start:
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %start ], [ %i2, %body ]
+ %j = and i32 %i, 1
+ %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j
+ %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j
+
+ call void @llvm.experimental.noalias.scope.decl(metadata !2)
+ %x = load i32, i32* %addr1i, !alias.scope !2
+ store i32 %x, i32* %addr2i, !noalias !2
+
+ %i2 = add i32 %i, 1
+ %cmp = icmp slt i32 %i2, 4
+ br i1 %cmp, label %body, label %end
+
+end:
+ ret void
+}
+
+define void @test_outside(i32* %addr1, i32* %addr2) {
+; CHECK-LABEL: @test_outside(
+; CHECK-NEXT: start:
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0)
+; CHECK-NEXT: br label [[BODY:%.*]]
+; CHECK: body:
+; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0
+; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0
+; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
+; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
+; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !0
+; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !0
+; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !0
+; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !0
+; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1
+; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1
+; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !0
+; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !0
+; CHECK-NEXT: ret void
+;
+start:
+ call void @llvm.experimental.noalias.scope.decl(metadata !2)
+ br label %body
+
+body:
+ %i = phi i32 [ 0, %start ], [ %i2, %body ]
+ %j = and i32 %i, 1
+ %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j
+ %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j
+
+ %x = load i32, i32* %addr1i, !alias.scope !2
+ store i32 %x, i32* %addr2i, !noalias !2
+
+ %i2 = add i32 %i, 1
+ %cmp = icmp slt i32 %i2, 4
+ br i1 %cmp, label %body, label %end
+
+end:
+ ret void
+}
+
+declare void @llvm.experimental.noalias.scope.decl(metadata)
+
+!0 = distinct !{!0}
+!1 = distinct !{!1, !0}
+!2 = !{!1}
+
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2}
+; CHECK: !2 = distinct !{!2}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2, !"It1"}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !2, !"It2"}
+; CHECK: !7 = !{!8}
+; CHECK: !8 = distinct !{!8, !2, !"It3"}
; Consider that %addr1 = %addr2 + 1, in which case %addr2i and %addr1i are
; noalias within one iteration, but may alias across iterations.
-; TODO: This is a micompile.
define void @pr39282(i32* %addr1, i32* %addr2) {
; CHECK-LABEL: @pr39282(
; CHECK-NEXT: start:
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0:metadata !.*]])
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3:metadata !.*]])
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1
-; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2:%.*]], i64 1
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]])
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]])
-; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3, !noalias !0
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]])
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]])
-; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2]], align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]])
-; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]])
-; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5
+; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !11, !noalias !9
+; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
+; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13
+; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15
; CHECK-NEXT: ret void
;
start: