From 422024a1b70be2745733d83aacca14a915e762c7 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 15 Mar 2018 18:12:27 +0000 Subject: [PATCH] [EarlyCSE] Don't hide earler invariant.scopes If we've already established an invariant scope with an earlier generation, we don't want to hide it in the scoped hash table with one with a later generation. I noticed this when working on the invariant-load handling, but it also applies to the invariant.start case as well. Without this change, my previous patch for invariant-load regresses some cases, so I'm pushing this without waiting for review. This is why you don't make last minute tweaks to patches to catch "obvious cases" after it's already been reviewed. Bad Philip! llvm-svn: 327655 --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 11 ++++++++--- llvm/test/Transforms/EarlyCSE/invariant-loads.ll | 21 +++++++++++++++++++++ llvm/test/Transforms/EarlyCSE/invariant.start.ll | 12 ++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 3f70ee4..6f03023 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -799,7 +799,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; auto *CI = cast(Inst); MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI); - AvailableInvariants.insert(MemLoc, CurrentGeneration); + // Don't start a scope if we already have a better one pushed + if (!AvailableInvariants.count(MemLoc)) + AvailableInvariants.insert(MemLoc, CurrentGeneration); continue; } @@ -888,9 +890,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (MemInst.isInvariantLoad()) { // If we pass an invariant load, we know that memory location is // indefinitely constant from the moment of first dereferenceability. - // We conservatively treat the invariant_load as that moment. + // We conservatively treat the invariant_load as that moment. If we + // pass a invariant load after already establishing a scope, don't + // restart it since we want to preserve the earliest point seen. auto MemLoc = MemoryLocation::get(Inst); - AvailableInvariants.insert(MemLoc, CurrentGeneration); + if (!AvailableInvariants.count(MemLoc)) + AvailableInvariants.insert(MemLoc, CurrentGeneration); } // If we have an available version of this load, and if it is the right diff --git a/llvm/test/Transforms/EarlyCSE/invariant-loads.ll b/llvm/test/Transforms/EarlyCSE/invariant-loads.ll index 889e6cb..c3fa32d 100644 --- a/llvm/test/Transforms/EarlyCSE/invariant-loads.ll +++ b/llvm/test/Transforms/EarlyCSE/invariant-loads.ll @@ -135,3 +135,24 @@ define void @test_scope_start_without_load(i32* %p) { call void @clobber_and_use(i32 %v3) ret void } + +; If we already have an invariant scope, don't want to start a new one +; with a potentially greater generation. This hides the earlier invariant +; load +define void @test_scope_restart(i32* %p) { +; CHECK-LABEL: @test_scope_restart +; CHECK: %v1 = load i32, i32* %p +; CHECK: call void @clobber_and_use(i32 %v1) +; CHECK: %add = add i32 %v1, %v1 +; CHECK: call void @clobber_and_use(i32 %add) +; CHECK: call void @clobber_and_use(i32 %v1) +; CHECK: ret void + %v1 = load i32, i32* %p, !invariant.load !{} + call void @clobber_and_use(i32 %v1) + %v2 = load i32, i32* %p, !invariant.load !{} + %add = add i32 %v1, %v2 + call void @clobber_and_use(i32 %add) + %v3 = load i32, i32* %p + call void @clobber_and_use(i32 %v3) + ret void +} diff --git a/llvm/test/Transforms/EarlyCSE/invariant.start.ll b/llvm/test/Transforms/EarlyCSE/invariant.start.ll index 5fabd0c..b5dc9a6 100644 --- a/llvm/test/Transforms/EarlyCSE/invariant.start.ll +++ b/llvm/test/Transforms/EarlyCSE/invariant.start.ll @@ -93,6 +93,18 @@ define i32 @test_before_clobber(i32* %p) { ret i32 %sub } +define i32 @test_duplicate_scope(i32* %p) { +; CHECK-LABEL: @test_duplicate_scope +; CHECK: ret i32 0 + %v1 = load i32, i32* %p + call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) + call void @clobber() + call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) + %v2 = load i32, i32* %p + %sub = sub i32 %v1, %v2 + ret i32 %sub +} + define i32 @test_unanalzyable_load(i32* %p) { ; CHECK-LABEL: @test_unanalzyable_load ; CHECK: ret i32 0 -- 2.7.4