From 6baaad740a5abb4bfcff022a8114abb4eea66a2d Mon Sep 17 00:00:00 2001 From: Wende Tan Date: Tue, 10 May 2022 15:44:46 -0700 Subject: [PATCH] [Bitcode] Include indirect users of BlockAddresses in bitcode The original fix (commit 23ec5782c3cc) of https://github.com/llvm/llvm-project/issues/52787 only adds `Function`s that have `Instruction`s that directly use `BlockAddress`es into the bitcode (`FUNC_CODE_BLOCKADDR_USERS`). However, in either @rickyz's original reproducing code: ``` void f(long); __attribute__((noinline)) static void fun(long x) { f(x + 1); } void repro(void) { fun(({ label: (long)&&label; })); } ``` ``` ... define dso_local void @repro() #0 { entry: br label %label label: ; preds = %entry tail call fastcc void @fun() ret void } define internal fastcc void @fun() unnamed_addr #1 { entry: tail call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 1)) #3 ret void } ... ``` or the xfs and overlayfs in the Linux kernel, `BlockAddress`es (e.g., `i8* blockaddress(@repro, %label)`) may first compose `ConstantExpr`s (e.g., `i64 ptrtoint (i8* blockaddress(@repro, %label) to i64)`) and then used by `Instruction`s. This case is not handled by the original fix. This patch adds *indirect* users of `BlockAddress`es, i.e., the `Instruction`s using some `Constant`s which further use the `BlockAddress`es, into the bitcode as well, by doing depth-first searches. Fixes: https://github.com/llvm/llvm-project/issues/52787 Fixes: 23ec5782c3cc ("[Bitcode] materialize Functions early when BlockAddress taken") Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D124878 --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 26 ++++++++++---- llvm/test/Bitcode/blockaddress-aggregate-users.ll | 40 ++++++++++++++++++++++ llvm/test/Bitcode/blockaddress-expr-users.ll | 38 ++++++++++++++++++++ .../test/Bitcode/blockaddress-globalvalue-users.ll | 38 ++++++++++++++++++++ llvm/test/Bitcode/blockaddress-nested-users.ll | 38 ++++++++++++++++++++ 5 files changed, 174 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Bitcode/blockaddress-aggregate-users.ll create mode 100644 llvm/test/Bitcode/blockaddress-expr-users.ll create mode 100644 llvm/test/Bitcode/blockaddress-globalvalue-users.ll create mode 100644 llvm/test/Bitcode/blockaddress-nested-users.ll diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index dcfe12a..1288c91 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -3365,7 +3366,7 @@ void ModuleBitcodeWriter::writeFunction( bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; - SmallPtrSet BlockAddressUsers; + SmallSetVector BlockAddressUsers; // Finally, emit all the instructions, in order. for (const BasicBlock &BB : F) { @@ -3401,11 +3402,24 @@ void ModuleBitcodeWriter::writeFunction( } if (BlockAddress *BA = BlockAddress::lookup(&BB)) { - for (User *U : BA->users()) { - if (auto *I = dyn_cast(U)) { - Function *P = I->getParent()->getParent(); - if (P != &F) - BlockAddressUsers.insert(P); + SmallVector BlockAddressUsersStack { BA }; + SmallPtrSet BlockAddressUsersVisited { BA }; + + while (!BlockAddressUsersStack.empty()) { + Value *V = BlockAddressUsersStack.pop_back_val(); + + for (User *U : V->users()) { + if ((isa(U) || isa(U)) && + !BlockAddressUsersVisited.contains(U)) { + BlockAddressUsersStack.push_back(U); + BlockAddressUsersVisited.insert(U); + } + + if (auto *I = dyn_cast(U)) { + Function *P = I->getParent()->getParent(); + if (P != &F) + BlockAddressUsers.insert(P); + } } } } diff --git a/llvm/test/Bitcode/blockaddress-aggregate-users.ll b/llvm/test/Bitcode/blockaddress-aggregate-users.ll new file mode 100644 index 0000000..febfc6d --- /dev/null +++ b/llvm/test/Bitcode/blockaddress-aggregate-users.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s +; RUN: llvm-dis %t.bc + +; There's a curious case where blockaddress constants may refer to functions +; outside of the function they're used in. There's a special bitcode function +; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case. + +; The intent of this test is two-fold: +; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn, +; @repro, since @fun and @fun2 both refer to @repro via blockaddress +; constants. +; 2. Ensure we can round-trip serializing+desearlizing such case. + +; CHECK: