From f65c88c42fdd0e46d16fe31737e6627c56de77c3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Jun 2022 16:02:28 +0200 Subject: [PATCH] [GlobalOpt] Fix memset handling in global ctor evaluation (PR55859) The global ctor evaluator currently handles by checking whether the memset memory is already zero, and skips it in that case. However, it only actually checks the first byte of the memory being set. This patch extends the code to check all bytes being set. This is done byte-by-byte to avoid converting undef values to zeros in larger reads. However, the handling is still not completely correct, because there might still be padding bytes (though probably this doesn't matter much in practice, as I'd expect global variable padding to be zero-initialized in practice). Mostly fixes https://github.com/llvm/llvm-project/issues/55859. Differential Revision: https://reviews.llvm.org/D128532 --- llvm/include/llvm/Transforms/Utils/Evaluator.h | 2 ++ llvm/lib/Transforms/Utils/Evaluator.cpp | 46 ++++++++++++++++++++------ llvm/test/Transforms/GlobalOpt/ctor-memset.ll | 33 +++++++++++++----- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h index 6a34ceb..2b83848 100644 --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -138,6 +138,8 @@ private: SmallVectorImpl &Formals); Constant *ComputeLoadResult(Constant *P, Type *Ty); + Constant *ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset); /// As we compute SSA register values, we store their contents here. The back /// of the deque contains the current function and the stack contains the diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp index 70b5e0e..18abe38 100644 --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -217,10 +217,13 @@ Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { P = cast(P->stripAndAccumulateConstantOffsets( DL, Offset, /* AllowNonInbounds */ true)); Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); - auto *GV = dyn_cast(P); - if (!GV) - return nullptr; + if (auto *GV = dyn_cast(P)) + return ComputeLoadResult(GV, Ty, Offset); + return nullptr; +} +Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty, + const APInt &Offset) { auto It = MutatedMemory.find(GV); if (It != MutatedMemory.end()) return It->second.read(Ty, Offset, DL); @@ -436,16 +439,39 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, << "intrinsic.\n"); return false; } + + auto *LenC = dyn_cast(getVal(MSI->getLength())); + if (!LenC) { + LLVM_DEBUG(dbgs() << "Memset with unknown length.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + auto *GV = dyn_cast(Ptr); + if (!GV) { + LLVM_DEBUG(dbgs() << "Memset with unknown base.\n"); + return false; + } + Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = - ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType()); - if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { - // This memset is a no-op. - LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); - ++CurInst; - continue; + APInt Len = LenC->getValue(); + while (Len != 0) { + Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset); + if (DestVal != Val) { + LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset " + << Offset << " of " << *GV << ".\n"); + return false; + } + ++Offset; + --Len; } + + LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; } if (II->isLifetimeStartOrEnd()) { diff --git a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll index 79ba25f..694b6fe 100644 --- a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll +++ b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -globalopt < %s | FileCheck %s -@llvm.global_ctors = appending global [8 x { i32, ptr, ptr }] [ +target datalayout = "p1:32:32" + +@llvm.global_ctors = appending global [9 x { i32, ptr, ptr }] [ { i32, ptr, ptr } { i32 65535, ptr @ctor0, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor1, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor2, ptr null }, @@ -9,11 +11,12 @@ { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor5, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor6, ptr null }, - { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null } + { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }, + { i32, ptr, ptr } { i32 65535, ptr @ctor8, ptr null } ] ;. -; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor6, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }] +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor3, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor4, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @ctor7, ptr null }] ; CHECK: @[[G0:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } zeroinitializer ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 0, i32 0, i32 1 } ; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32, i32 } { i32 1, i32 0, i32 0 } @@ -22,11 +25,11 @@ ; CHECK: @[[G5:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i16, i32 } { i16 0, i32 1 } ; CHECK: @[[G6:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } { i32 -1, i32 -1 } ; CHECK: @[[G7:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr global { i32, i32 } { i32 -1, i32 1 } +; CHECK: @[[G8:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr addrspace(1) global { i32, i32 } zeroinitializer ;. ; memset of all-zero global @g0 = global { i32, i32 } zeroinitializer - define internal void @ctor0() { call void @llvm.memset.p0.i64(ptr @g0, i8 0, i64 8, i1 false) ret void @@ -52,6 +55,10 @@ define internal void @ctor2() { @g3 = global { i32, i32 } { i32 0, i32 1 } define internal void @ctor3() { +; CHECK-LABEL: @ctor3( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr @g3, i8 0, i64 8, i1 false) ret void } @@ -60,11 +67,17 @@ define internal void @ctor3() { @g4 = global { i32, i32 } { i32 0, i32 undef } define internal void @ctor4() { +; CHECK-LABEL: @ctor4( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0.i64(ptr @g4, i8 0, i64 8, i1 false) ret void } ; memset including padding bytes +; FIXME: We still incorrectly optimize the memset away here, even though code +; might access the padding. @g5 = global { i16, i32 } { i16 0, i32 1 } define internal void @ctor5() { @@ -76,10 +89,6 @@ define internal void @ctor5() { @g6 = global { i32, i32 } { i32 -1, i32 -1 } define internal void @ctor6() { -; CHECK-LABEL: @ctor6( -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr @g6, i8 -1, i64 8, i1 false) -; CHECK-NEXT: ret void -; call void @llvm.memset.p0.i64(ptr @g6, i8 -1, i64 8, i1 false) ret void } @@ -96,6 +105,14 @@ define internal void @ctor7() { ret void } +; memset of zero value in differently-sized address space +@g8 = addrspace(1) global { i32, i32 } zeroinitializer + +define internal void @ctor8() { + call void @llvm.memset.p0.i64(ptr addrspacecast (ptr addrspace(1) @g8 to ptr), i8 0, i64 8, i1 false) + ret void +} + declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } -- 2.7.4