From 4a7da98bd9283e2e6aa635031d2f131493c8ad1a Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Thu, 23 May 2019 05:53:10 +0000 Subject: [PATCH] [GlobalOpt] recognize dead struct fields and propagate values Summary: Allow struct fields SRA and dead stores. This works by considering fields accesses from getElementPtr to be considered as a possible pointer root that can be cleaned up. We check that the variable can be SRA by recursively checking the sub expressions with the new isSafeSubSROAGEP function. basically this allows the array in following C code to be optimized out struct Expr { int a[2]; int b; }; static struct Expr e; int foo (int i) { e.b = 2; e.a[i] = 1; return e.b; } Reviewers: greened, bkramer, nicholas, jmolloy Reviewed By: jmolloy Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61911 llvm-svn: 361460 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 27 ++++++++++++++++++---- .../Transforms/GlobalOpt/globalsra-multigep.ll | 11 +++++++-- llvm/test/Transforms/GlobalOpt/globalsra-struct.ll | 18 +++++++++++++++ 3 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/globalsra-struct.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index c4fb3ce..c4f268a 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -184,7 +184,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { /// This GV is a pointer root. Loop over all users of the global and clean up /// any that obviously don't assign the global a value that isn't dynamically /// allocated. -static bool CleanupPointerRootUsers(GlobalVariable *GV, +static bool CleanupPointerRootUsers(Value *V, const TargetLibraryInfo *TLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory @@ -202,7 +202,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, SmallVector, 32> Dead; // Constants can't be pointers to dynamically allocated memory. - for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); + for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E;) { User *U = *UI++; if (StoreInst *SI = dyn_cast(U)) { @@ -232,6 +232,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, Dead.push_back(std::make_pair(I, MTI)); } } else if (ConstantExpr *CE = dyn_cast(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Changed |= CleanupPointerRootUsers(CE, TLI); + } if (CE->use_empty()) { CE->destroyConstant(); Changed = true; @@ -241,7 +244,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV, TLI); + CleanupPointerRootUsers(V, TLI); return true; } } @@ -391,6 +394,22 @@ static bool isSafeSROAGEP(User *U) { [](User *UU) { return isSafeSROAElementUse(UU); }); } +/// Return true if the specified GEP is a safe user of a derived +/// expression from a global that we want to SROA. +static bool isSafeSubSROAGEP(User *U) { + + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa(U->getOperand(1)) || + !cast(U->getOperand(1))->isNullValue()) + return false; + + return llvm::all_of(U->users(), + [](User *UU) { return isSafeSROAElementUse(UU); }); +} + /// Return true if the specified instruction is a safe user of a derived /// expression from a global that we want to SROA. static bool isSafeSROAElementUse(Value *V) { @@ -409,7 +428,7 @@ static bool isSafeSROAElementUse(Value *V) { return SI->getOperand(0) != V; // Otherwise, it must be a GEP. Check it and its users are safe to SRA. - return isa(I) && isSafeSROAGEP(I); + return isa(I) && isSafeSubSROAGEP(I); } /// Look at all uses of the global and decide whether it is safe for us to diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll b/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll index 87a8486..c32a620 100644 --- a/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll +++ b/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll @@ -4,13 +4,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @g_data = internal unnamed_addr global <{ [8 x i16], [8 x i16] }> <{ [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], [8 x i16] zeroinitializer }>, align 16 -; We cannot SRA here due to the second gep meaning the access to g_data may be to either element -; CHECK: @g_data = internal unnamed_addr constant <{ [8 x i16], [8 x i16] }> +; We normally cannot SRA here due to the second gep meaning the access to g_data may be to either element, +; unless the value is always zero. +; CHECK: @g_data.0 = internal unnamed_addr constant [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], align 16 define i16 @test(i64 %a1) { entry: %g1 = getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0 %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* %g1, i64 0, i64 %a1 %r = load i16, i16* %arrayidx.i, align 2 + +; CHECK-NOT: getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0 +; CHECK: %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* @g_data.0, i64 0, i64 %a1 + ret i16 %r + + } diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-struct.ll b/llvm/test/Transforms/GlobalOpt/globalsra-struct.ll new file mode 100644 index 0000000..957fba8 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/globalsra-struct.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s + +%struct.Expr = type { [1 x i32], i32 } + +@e = internal global %struct.Expr zeroinitializer, align 4 +; CHECK-NOT: @e = internal global %struct.Expr zeroinitializer, align 4 + +define dso_local i32 @foo(i32 %i) { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* getelementptr inbounds (%struct.Expr, %struct.Expr* @e, i32 0, i32 0), i32 0, i32 %0 + store i32 57005, i32* %arrayidx, align 4 + %1 = load i32, i32* getelementptr inbounds (%struct.Expr, %struct.Expr* @e, i32 0, i32 1), align 4 + ret i32 %1 +; CHECK: ret i32 0 +} -- 2.7.4