/// alias sets.
bool containsPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo) const;
+ /// Return true if the specified instruction "may" (or must) alias one of the
+ /// members in any of the sets.
+ bool containsUnknown(Instruction *I) const;
+
/// getAliasAnalysis - Return the underlying alias analysis object used by
/// this tracker.
AliasAnalysis &getAliasAnalysis() const { return AA; }
return false;
}
-
+bool AliasSetTracker::containsUnknown(Instruction *Inst) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesUnknownInst(Inst, AA))
+ return true;
+ return false;
+}
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Pass.h"
#include "llvm/IR/DataLayout.h"
class LoadCombine : public BasicBlockPass {
LLVMContext *C;
const DataLayout *DL;
+ AliasAnalysis *AA;
public:
LoadCombine()
: BasicBlockPass(ID),
- C(nullptr), DL(nullptr) {
+ C(nullptr), DL(nullptr), AA(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
if (skipOptnoneFunction(BB) || !DL)
return false;
+ AA = &getAnalysis<AliasAnalysis>();
+
IRBuilder<true, TargetFolder>
TheBuilder(BB.getContext(), TargetFolder(DL));
Builder = &TheBuilder;
DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
+ AliasSetTracker AST(*AA);
bool Combined = false;
unsigned Index = 0;
for (auto &I : BB) {
- if (I.mayWriteToMemory() || I.mayThrow()) {
+ if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
if (combineLoads(LoadMap))
Combined = true;
LoadMap.clear();
+ AST.clear();
continue;
}
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!POP.Pointer)
continue;
LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
+ AST.add(LI);
}
if (combineLoads(LoadMap))
Combined = true;
void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
}
char LoadCombine::ID = 0;
return new LoadCombine();
}
-INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false,
- false)
+INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
+ false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
+ false, false)
+
--- /dev/null
+; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) {
+; CHECK-LABEL: @test1
+
+; CHECK: load i64*
+; CHECK: ret i64
+
+ %load1 = load i32* %a, align 4
+ %conv = zext i32 %load1 to i64
+ %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+ store i32 %load1, i32* %b, align 4
+ %load2 = load i32* %arrayidx1, align 4
+ %conv2 = zext i32 %load2 to i64
+ %shl = shl nuw i64 %conv2, 32
+ %add = or i64 %shl, %conv
+ ret i64 %add
+}
+
+define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) {
+; CHECK-LABEL: @test2
+
+; CHECK: load i32*
+; CHECK: load i32*
+; CHECK: ret i64
+
+ %load1 = load i32* %a, align 4
+ %conv = zext i32 %load1 to i64
+ %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+ store i32 %load1, i32* %b, align 4
+ %load2 = load i32* %arrayidx1, align 4
+ %conv2 = zext i32 %load2 to i64
+ %shl = shl nuw i64 %conv2, 32
+ %add = or i64 %shl, %conv
+ ret i64 %add
+}
+
--- /dev/null
+; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.assume(i1) nounwind
+
+; 'load' before the 'call' gets optimized:
+define i64 @test1(i32* nocapture readonly %a, i1 %b) {
+; CHECK-LABEL: @test1
+
+; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: tail call void @llvm.assume(i1 %b)
+; CHECK: ret i64
+
+ %load1 = load i32* %a, align 4
+ %conv = zext i32 %load1 to i64
+ %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+ %load2 = load i32* %arrayidx1, align 4
+ tail call void @llvm.assume(i1 %b)
+ %conv2 = zext i32 %load2 to i64
+ %shl = shl nuw i64 %conv2, 32
+ %add = or i64 %shl, %conv
+ ret i64 %add
+}
+
+; 'call' before the 'load' doesn't get optimized:
+define i64 @test2(i32* nocapture readonly %a, i1 %b) {
+; CHECK-LABEL: @test2
+
+; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: tail call void @llvm.assume(i1 %b)
+; CHECK: ret i64
+
+ %load1 = load i32* %a, align 4
+ %conv = zext i32 %load1 to i64
+ %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+ tail call void @llvm.assume(i1 %b)
+ %load2 = load i32* %arrayidx1, align 4
+ %conv2 = zext i32 %load2 to i64
+ %shl = shl nuw i64 %conv2, 32
+ %add = or i64 %shl, %conv
+ ret i64 %add
+}
+