From 840257a49c26c5c7d66ee17eeaaf5a43fad0b9da Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 3 Nov 2014 23:19:16 +0000 Subject: [PATCH] Use AA in LoadCombine LoadCombine can be smarter about aborting when a writing instruction is encountered, instead of aborting upon encountering any writing instruction, use an AliasSetTracker, and only abort when encountering some write that might alias with the loads that could potentially be combined. This was originally motivated by comments made (and a test case provided) by David Majnemer in response to PR21448. It turned out that LoadCombine was not responsible for that PR, but LoadCombine should also be improved so that unrelated stores (and @llvm.assume) don't interrupt load combining. llvm-svn: 221203 --- llvm/include/llvm/Analysis/AliasSetTracker.h | 4 ++ llvm/lib/Analysis/AliasSetTracker.cpp | 7 +++- llvm/lib/Transforms/Scalar/LoadCombine.cpp | 23 +++++++++-- .../test/Transforms/LoadCombine/load-combine-aa.ll | 39 +++++++++++++++++++ .../Transforms/LoadCombine/load-combine-assume.ll | 44 ++++++++++++++++++++++ 5 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/LoadCombine/load-combine-aa.ll create mode 100644 llvm/test/Transforms/LoadCombine/load-combine-assume.ll diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 6dcd4a0..403a2b5 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -370,6 +370,10 @@ public: /// alias sets. bool containsPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo) const; + /// Return true if the specified instruction "may" (or must) alias one of the + /// members in any of the sets. + bool containsUnknown(Instruction *I) const; + /// getAliasAnalysis - Return the underlying alias analysis object used by /// this tracker. AliasAnalysis &getAliasAnalysis() const { return AA; } diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 843a541..9d941e5 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -242,7 +242,12 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, return false; } - +bool AliasSetTracker::containsUnknown(Instruction *Inst) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesUnknownInst(Inst, AA)) + return true; + return false; +} AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { AliasSet *FoundSet = nullptr; diff --git a/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/llvm/lib/Transforms/Scalar/LoadCombine.cpp index 648626a..11e4d76 100644 --- a/llvm/lib/Transforms/Scalar/LoadCombine.cpp +++ b/llvm/lib/Transforms/Scalar/LoadCombine.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Pass.h" #include "llvm/IR/DataLayout.h" @@ -51,11 +53,12 @@ struct LoadPOPPair { class LoadCombine : public BasicBlockPass { LLVMContext *C; const DataLayout *DL; + AliasAnalysis *AA; public: LoadCombine() : BasicBlockPass(ID), - C(nullptr), DL(nullptr) { + C(nullptr), DL(nullptr), AA(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } @@ -225,19 +228,23 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (skipOptnoneFunction(BB) || !DL) return false; + AA = &getAnalysis(); + IRBuilder TheBuilder(BB.getContext(), TargetFolder(DL)); Builder = &TheBuilder; DenseMap> LoadMap; + AliasSetTracker AST(*AA); bool Combined = false; unsigned Index = 0; for (auto &I : BB) { - if (I.mayWriteToMemory() || I.mayThrow()) { + if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) { if (combineLoads(LoadMap)) Combined = true; LoadMap.clear(); + AST.clear(); continue; } LoadInst *LI = dyn_cast(&I); @@ -250,6 +257,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (!POP.Pointer) continue; LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); + AST.add(LI); } if (combineLoads(LoadMap)) Combined = true; @@ -258,6 +266,9 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + + AU.addRequired(); + AU.addPreserved(); } char LoadCombine::ID = 0; @@ -266,5 +277,9 @@ BasicBlockPass *llvm::createLoadCombinePass() { return new LoadCombine(); } -INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false, - false) +INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads", + false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads", + false, false) + diff --git a/llvm/test/Transforms/LoadCombine/load-combine-aa.ll b/llvm/test/Transforms/LoadCombine/load-combine-aa.ll new file mode 100644 index 0000000..3542dce --- /dev/null +++ b/llvm/test/Transforms/LoadCombine/load-combine-aa.ll @@ -0,0 +1,39 @@ +; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) { +; CHECK-LABEL: @test1 + +; CHECK: load i64* +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + store i32 %load1, i32* %b, align 4 + %load2 = load i32* %arrayidx1, align 4 + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + +define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) { +; CHECK-LABEL: @test2 + +; CHECK: load i32* +; CHECK: load i32* +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + store i32 %load1, i32* %b, align 4 + %load2 = load i32* %arrayidx1, align 4 + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + diff --git a/llvm/test/Transforms/LoadCombine/load-combine-assume.ll b/llvm/test/Transforms/LoadCombine/load-combine-assume.ll new file mode 100644 index 0000000..94f6300 --- /dev/null +++ b/llvm/test/Transforms/LoadCombine/load-combine-assume.ll @@ -0,0 +1,44 @@ +; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.assume(i1) nounwind + +; 'load' before the 'call' gets optimized: +define i64 @test1(i32* nocapture readonly %a, i1 %b) { +; CHECK-LABEL: @test1 + +; CHECK-DAG: load i64* %1, align 4 +; CHECK-DAG: tail call void @llvm.assume(i1 %b) +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + %load2 = load i32* %arrayidx1, align 4 + tail call void @llvm.assume(i1 %b) + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + +; 'call' before the 'load' doesn't get optimized: +define i64 @test2(i32* nocapture readonly %a, i1 %b) { +; CHECK-LABEL: @test2 + +; CHECK-DAG: load i64* %1, align 4 +; CHECK-DAG: tail call void @llvm.assume(i1 %b) +; CHECK: ret i64 + + %load1 = load i32* %a, align 4 + %conv = zext i32 %load1 to i64 + %arrayidx1 = getelementptr inbounds i32* %a, i64 1 + tail call void @llvm.assume(i1 %b) + %load2 = load i32* %arrayidx1, align 4 + %conv2 = zext i32 %load2 to i64 + %shl = shl nuw i64 %conv2, 32 + %add = or i64 %shl, %conv + ret i64 %add +} + -- 2.7.4