From 139810449ba4c2203dbd9021b7ec4153bbfab269 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 14 Aug 2020 21:08:16 +0100 Subject: [PATCH] [DSE,MemorySSA] Account for ScanLimit == 0 on entry. Currently the code does not account for the fact that getDomMemoryDef can be called with ScanLimit == 0, if we reached the limit while processing an earlier access. Also tighten the check a bit more and bump the scan limit now that it is handled properly. In some cases, this brings a 2x speedup in terms of compile-time. --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 15 +++++-- .../MSSA/memoryssa-scan-limit.ll | 48 +++++++++++----------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 261fb387..faa936e 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -107,7 +107,7 @@ static cl::opt cl::desc("Use the new MemorySSA-backed DSE.")); static cl::opt - MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)")); @@ -1743,7 +1743,12 @@ struct DSEState { Optional getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, - bool DefVisibleToCallerAfterRet, int &ScanLimit) const { + bool DefVisibleToCallerAfterRet, unsigned &ScanLimit) const { + if (ScanLimit == 0) { + LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); + return None; + } + MemoryAccess *DomAccess; bool StepAgain; LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current @@ -1803,10 +1808,12 @@ struct DSEState { MemoryAccess *UseAccess = WorkList[I]; LLVM_DEBUG(dbgs() << " " << *UseAccess); - if (--ScanLimit == 0) { + // Bail out if the number of accesses to check exceeds the scan limit. + if (ScanLimit < (WorkList.size() - I)) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); return None; } + --ScanLimit; if (isa(UseAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n"); @@ -2154,7 +2161,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *KillingDef << " (" << *SI << ")\n"); - int ScanLimit = MemorySSAScanLimit; + unsigned ScanLimit = MemorySSAScanLimit; // Worklist of MemoryAccesses that may be killed by KillingDef. SetVector ToCheck; ToCheck.insert(KillingDef->getDefiningAccess()); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll index c2b2eba..5ee6fe1 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s -; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) { -; ; NO-LIMIT-LABEL: @test2( ; NO-LIMIT-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; NO-LIMIT: bb1: @@ -16,48 +15,49 @@ define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) { ; NO-LIMIT: bb2: ; NO-LIMIT-NEXT: br label [[BB3]] ; NO-LIMIT: bb3: -; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]] -; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]] -; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]], align 4 +; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]], align 4 +; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; NO-LIMIT-NEXT: ret void ; ; LIMIT-0-LABEL: @test2( +; LIMIT-0-NEXT: store i32 1, i32* [[P:%.*]], align 4 ; LIMIT-0-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; LIMIT-0: bb1: ; LIMIT-0-NEXT: br label [[BB3:%.*]] ; LIMIT-0: bb2: ; LIMIT-0-NEXT: br label [[BB3]] ; LIMIT-0: bb3: -; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]] -; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]] -; LIMIT-0-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]], align 4 +; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]], align 4 +; LIMIT-0-NEXT: store i32 0, i32* [[P]], align 4 ; LIMIT-0-NEXT: ret void ; +; LIMIT-2-LABEL: @test2( +; LIMIT-2-NEXT: store i32 1, i32* [[P:%.*]], align 4 +; LIMIT-2-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-2: bb1: +; LIMIT-2-NEXT: br label [[BB3:%.*]] +; LIMIT-2: bb2: +; LIMIT-2-NEXT: br label [[BB3]] +; LIMIT-2: bb3: +; LIMIT-2-NEXT: store i32 0, i32* [[Q:%.*]], align 4 +; LIMIT-2-NEXT: store i32 0, i32* [[R:%.*]], align 4 +; LIMIT-2-NEXT: store i32 0, i32* [[P]], align 4 +; LIMIT-2-NEXT: ret void +; ; LIMIT-3-LABEL: @test2( -; LIMIT-3-NEXT: store i32 1, i32* [[P:%.*]] ; LIMIT-3-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; LIMIT-3: bb1: ; LIMIT-3-NEXT: br label [[BB3:%.*]] ; LIMIT-3: bb2: ; LIMIT-3-NEXT: br label [[BB3]] ; LIMIT-3: bb3: -; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]] -; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]] -; LIMIT-3-NEXT: store i32 0, i32* [[P]] +; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]], align 4 +; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]], align 4 +; LIMIT-3-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; LIMIT-3-NEXT: ret void ; -; LIMIT-4-LABEL: @test2( -; LIMIT-4-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] -; LIMIT-4: bb1: -; LIMIT-4-NEXT: br label [[BB3:%.*]] -; LIMIT-4: bb2: -; LIMIT-4-NEXT: br label [[BB3]] -; LIMIT-4: bb3: -; LIMIT-4-NEXT: store i32 0, i32* [[Q:%.*]] -; LIMIT-4-NEXT: store i32 0, i32* [[R:%.*]] -; LIMIT-4-NEXT: store i32 0, i32* [[P:%.*]] -; LIMIT-4-NEXT: ret void -; store i32 1, i32* %P br i1 true, label %bb1, label %bb2 bb1: -- 2.7.4