/// the only relevant load gets deleted.)
///
/// \param Load The load we want to replace.
-/// \param ScanBB The basic block to scan. FIXME: This is redundant.
+/// \param ScanBB The basic block to scan.
/// \param [in,out] ScanFrom The location to start scanning from. When this
/// function returns, it points at the last instruction scanned.
/// \param MaxInstsToScan The maximum number of instructions to scan. If this
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan = DefMaxInstsToScan,
AliasAnalysis *AA = nullptr,
- AAMDNodes *AATags = nullptr,
bool *IsLoadCSE = nullptr);
}
/// Metadata not listed as known via KnownIDs is removed
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs);
+/// Combine the metadata of two instructions so that K can replace J. This
+/// specifically handles the case of CSE-like transformations.
+///
+/// Unknown metadata is removed.
+void combineMetadataForCSE(Instruction *K, const Instruction *J);
+
/// Replace each use of 'From' with 'To' if that use is dominated by
/// the given edge. Returns the number of replacements made.
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
"to scan backward from a given instruction, when searching for "
"available loaded value"));
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
+ BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA, AAMDNodes *AATags,
- bool *IsLoadCSE) {
+ AliasAnalysis *AA, bool *IsLoadCSE) {
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
if (LI->isAtomic() < Load->isAtomic())
return nullptr;
- if (AATags)
- LI->getAAMetadata(*AATags);
if (IsLoadCSE)
*IsLoadCSE = true;
return LI;
if (SI->isAtomic() < Load->isAtomic())
return nullptr;
- if (AATags)
- SI->getAAMetadata(*AATags);
+ if (IsLoadCSE)
+ *IsLoadCSE = false;
return SI->getOperand(0);
}
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
BasicBlock::iterator BBI(LI);
- AAMDNodes AATags;
bool IsLoadCSE = false;
if (Value *AvailableVal =
FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
- DefMaxInstsToScan, AA, &AATags, &IsLoadCSE)) {
+ DefMaxInstsToScan, AA, &IsLoadCSE)) {
if (IsLoadCSE) {
LoadInst *NLI = cast<LoadInst>(AvailableVal);
unsigned KnownIDs[] = {
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
BasicBlock::iterator BBIt(LI);
-
+ bool IsLoadCSE;
if (Value *AvailableVal =
- FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan)) {
+ FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
+ if (IsLoadCSE) {
+ LoadInst *NLI = cast<LoadInst>(AvailableVal);
+ combineMetadataForCSE(NLI, LI);
+ };
+
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
BasicBlock *OneUnavailablePred = nullptr;
+ SmallVector<LoadInst*, 8> CSELoads;
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- AAMDNodes ThisAATags;
Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt,
DefMaxInstsToScan,
- nullptr, &ThisAATags);
+ nullptr,
+ &IsLoadCSE);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
- // If AA tags disagree or are not present, forget about them.
- if (AATags != ThisAATags) AATags = AAMDNodes();
+ if (IsLoadCSE)
+ CSELoads.push_back(cast<LoadInst>(PredAvailable));
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
PN->addIncoming(PredV, I->first);
}
+ for (LoadInst *PredLI : CSELoads) {
+ combineMetadataForCSE(PredLI, LI);
+ }
+
LI->replaceAllUsesWith(PN);
LI->eraseFromParent();
K->setMetadata(LLVMContext::MD_invariant_group, JMD);
}
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
+ combineMetadata(K, J, KnownIDs);
+}
+
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
DominatorTree &DT,
const BasicBlockEdge &Root) {
ret i32 %res.0
}
+; Make sure we merge the aliasing metadata. (If we don't, we have a load
+; with the wrong metadata, so the branch gets incorrectly eliminated.)
+define void @test8(i32*, i32*, i32*) {
+; CHECK-LABEL: @test8(
+; CHECK: %a = load i32, i32* %0, !range !4
+; CHECK-NEXT: store i32 %a
+; CHECK: br i1 %c
+ %a = load i32, i32* %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10
+ %b = load i32, i32* %0, !range !5
+ store i32 %a, i32* %1
+ %c = icmp eq i32 %b, 8
+ br i1 %c, label %ret1, label %ret2
+
+ret1:
+ ret void
+
+ret2:
+ %xxx = tail call i32 (...) @f1() nounwind
+ ret void
+}
+
+; Make sure we merge/PRE aliasing metadata correctly. That means that
+; we need to remove metadata from the existing load, and add appropriate
+; metadata to the newly inserted load.
+define void @test9(i32*, i32*, i32*, i1 %c) {
+; CHECK-LABEL: @test9(
+ br i1 %c, label %d1, label %d2
+
+; CHECK: d1:
+; CHECK-NEXT: %a = load i32, i32* %0{{$}}
+d1:
+ %a = load i32, i32* %0, !range !4, !alias.scope !9, !noalias !10
+ br label %d3
+
+; CHECK: d2:
+; CHECK-NEXT: %xxxx = tail call i32 (...) @f1()
+; CHECK-NEXT: %b.pr = load i32, i32* %0, !tbaa !0{{$}}
+d2:
+ %xxxx = tail call i32 (...) @f1() nounwind
+ br label %d3
+
+d3:
+ %p = phi i32 [ 1, %d2 ], [ %a, %d1 ]
+ %b = load i32, i32* %0, !tbaa !0
+ store i32 %p, i32* %1
+ %c2 = icmp eq i32 %b, 8
+ br i1 %c2, label %ret1, label %ret2
+
+ret1:
+ ret void
+
+ret2:
+ %xxx = tail call i32 (...) @f1() nounwind
+ ret void
+}
+
!0 = !{!3, !3, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA", null}
!3 = !{!"int", !1}
+!4 = !{ i32 0, i32 1 }
+!5 = !{ i32 8, i32 10 }
+!6 = !{!6}
+!7 = !{!7, !6}
+!8 = !{!8, !6}
+!9 = !{!7}
+!10 = !{!8}