From 32351455f6fdd9293850f32c5e92ca29d6bf40a0 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 26 Jan 2015 18:39:52 +0000 Subject: [PATCH] Pass QueryInst down through non-local dependency calculation This change is mostly motivated by exposing information about the original query instruction to the actual scanning work in getPointerDependencyFrom when used by GVN PRE. In a follow up change, I will use this to be more precise with regards to the semantics of volatile instructions encountered in the scan of a basic block. Worth noting, is that this change (despite appearing quite simple) is not semantically preserving. By providing more information to the helper routine, we allow some optimizations to kick in that weren't previously able to (when called from this code path.) In particular, we see that treatment of !invariant.load becomes more precise. In theory, we might see a difference with an ordered/atomic instruction as well, but I'm having a hard time actually finding a test case which shows that. Test wise, I've included new tests for !invariant.load which illustrate this difference. I've also included some updated TBAA tests which highlight that this change isn't needed for that optimization to kick in - it's handled inside alias analysis itself. Eventually, it would be nice to factor the !invariant.load handling inside alias analysis as well. Differential Revision: http://reviews.llvm.org/D6895 llvm-svn: 227110 --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 6 ++-- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 21 +++++++----- llvm/test/Transforms/GVN/invariant-load.ll | 38 +++++++++++++++++++++ llvm/test/Transforms/GVN/tbaa.ll | 39 +++++++++++++++++++++- 4 files changed, 93 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index 67fd70a..77610b3 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -428,13 +428,15 @@ namespace llvm { MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB); - bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, + bool getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl &Result, DenseMap &Visited, bool SkipFirstBlock = false); - MemDepResult GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, + MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries); diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c505aa4..0f3307c 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -924,7 +924,7 @@ getNonLocalPointerDependency(Instruction *QueryInst, // a block with multiple different pointers. This can happen during PHI // translation. DenseMap Visited; - if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); @@ -938,7 +938,8 @@ getNonLocalPointerDependency(Instruction *QueryInst, /// lookup (which may use dirty cache info if available). If we do a lookup, /// add the result to the cache. MemDepResult MemoryDependenceAnalysis:: -GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, +GetNonLocalInfoForBlock(Instruction *QueryInst, + const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { @@ -979,7 +980,8 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, } // Scan the block for the dependency. - MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, + QueryInst); // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. @@ -1052,7 +1054,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, +getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, @@ -1091,7 +1094,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, } else if (CacheInfo->Size > Loc.Size) { // This query's Size is less than the cached one. Conservatively restart // the query using the greater size. - return getNonLocalPointerDepFromBB(Pointer, + return getNonLocalPointerDepFromBB(QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, StartBB, Result, Visited, SkipFirstBlock); @@ -1111,7 +1114,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->NonLocalDeps.clear(); } if (Loc.AATags) - return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(), + return getNonLocalPointerDepFromBB(QueryInst, + Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, Visited, SkipFirstBlock); } @@ -1214,7 +1218,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, + MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, + Loc, isLoad, BB, Cache, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. @@ -1348,7 +1353,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // result conflicted with the Visited list; we have to conservatively // assume it is unknown, but this also does not block PRE of the load. if (!CanTranslate || - getNonLocalPointerDepFromBB(PredPointer, + getNonLocalPointerDepFromBB(QueryInst, PredPointer, Loc.getWithNewPtr(PredPtrVal), isLoad, Pred, Result, Visited)) { diff --git a/llvm/test/Transforms/GVN/invariant-load.ll b/llvm/test/Transforms/GVN/invariant-load.ll index 2f9f2a6..2a83c45 100644 --- a/llvm/test/Transforms/GVN/invariant-load.ll +++ b/llvm/test/Transforms/GVN/invariant-load.ll @@ -27,5 +27,43 @@ entry: ret i32 %add } +; With the invariant.load metadata, what would otherwise +; be a case for PRE becomes a full redundancy. +define i32 @test3(i1 %cnd, i32* %p, i32* %q) { +; CHECK-LABEL: test3 +; CHECK-NOT: load +entry: + %v1 = load i32* %p + br i1 %cnd, label %bb1, label %bb2 + +bb1: + store i32 5, i32* %q + br label %bb2 + +bb2: + %v2 = load i32* %p, !invariant.load !0 + %res = sub i32 %v1, %v2 + ret i32 %res +} + +; This test is here to document a case which doesn't optimize +; as well as it could. +define i32 @test4(i1 %cnd, i32* %p, i32* %q) { +; CHECK-LABEL: test4 +; %v2 is redundant, but GVN currently doesn't catch that +entry: + %v1 = load i32* %p, !invariant.load !0 + br i1 %cnd, label %bb1, label %bb2 + +bb1: + store i32 5, i32* %q + br label %bb2 + +bb2: + %v2 = load i32* %p + %res = sub i32 %v1, %v2 + ret i32 %res +} + !0 = !{ } diff --git a/llvm/test/Transforms/GVN/tbaa.ll b/llvm/test/Transforms/GVN/tbaa.ll index 1b3494c..71fbed41 100644 --- a/llvm/test/Transforms/GVN/tbaa.ll +++ b/llvm/test/Transforms/GVN/tbaa.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -gvn -S < %s | FileCheck %s +; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s define i32 @test1(i8* %p, i8* %q) { ; CHECK: @test1(i8* %p, i8* %q) @@ -72,6 +72,37 @@ define i32 @test7(i8* %p, i8* %q) { ret i32 %c } + + +define i32 @test8(i32* %p, i32* %q) { +; CHECK-LABEL: test8 +; CHECK-NEXT: store i32 15, i32* %p +; CHECK-NEXT: ret i32 0 +; Since we know the location is invariant, we can forward the +; load across the potentially aliasing store. + + %a = load i32* %q, !tbaa !10 + store i32 15, i32* %p + %b = load i32* %q, !tbaa !10 + %c = sub i32 %a, %b + ret i32 %c +} +define i32 @test9(i32* %p, i32* %q) { +; CHECK-LABEL: test9 +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: ret i32 0 +; Since we know the location is invariant, we can forward the +; load across the potentially aliasing store (within the call). + + %a = load i32* %q, !tbaa !10 + call void @clobber() + %b = load i32* %q, !tbaa !10 + %c = sub i32 %a, %b + ret i32 %c +} + + +declare void @clobber() declare i32 @foo(i8*) readonly ; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0} @@ -89,3 +120,9 @@ declare i32 @foo(i8*) readonly !6 = !{!"A", !2} !7 = !{!"B", !6} !8 = !{!"another root", null} + + +;; A TBAA structure who's only point is to have a constant location +!9 = !{!"yet another root"} +!10 = !{!"node", !9, i64 1} + -- 2.7.4