From 5acd29eb4d9e411b3631c26babcd1d2655623f4a Mon Sep 17 00:00:00 2001 From: Martin Braenne Date: Thu, 23 Mar 2023 07:45:40 +0000 Subject: [PATCH] [clang][dataflow] Fix crash when RHS of `&&` or `||` calls `noreturn` func. The crash happened because the transfer fucntion for `&&` and `||` unconditionally tried to retrieve the value of the RHS. However, if the RHS is unreachable, there is no environment for it, and trying to retrieve the operand's value causes an assertion failure. See also the comments in the code for further details. Reviewed By: xazax.hun, ymandel, sgatev, gribozavr2 Differential Revision: https://reviews.llvm.org/D146514 --- .../Analysis/FlowSensitive/ControlFlowContext.h | 13 ++++- .../clang/Analysis/FlowSensitive/Transfer.h | 6 +- .../Analysis/FlowSensitive/ControlFlowContext.cpp | 29 +++++++++- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 42 +++++++++----- .../FlowSensitive/TypeErasedDataflowAnalysis.cpp | 2 + .../Analysis/FlowSensitive/TestingSupport.h | 14 +++++ .../Analysis/FlowSensitive/TransferTest.cpp | 66 ++++++++++++++++++++++ 7 files changed, 153 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h index e641468..3495bdf 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -18,6 +18,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include @@ -47,18 +48,26 @@ public: return StmtToBlock; } + /// Returns whether `B` is reachable from the entry block. + bool isBlockReachable(const CFGBlock &B) const { + return BlockReachable[B.getBlockID()]; + } + private: // FIXME: Once the deprecated `build` method is removed, mark `D` as "must not // be null" and add an assertion. ControlFlowContext(const Decl *D, std::unique_ptr Cfg, - llvm::DenseMap StmtToBlock) + llvm::DenseMap StmtToBlock, + llvm::BitVector BlockReachable) : ContainingDecl(D), Cfg(std::move(Cfg)), - StmtToBlock(std::move(StmtToBlock)) {} + StmtToBlock(std::move(StmtToBlock)), + BlockReachable(std::move(BlockReachable)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl *ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; + llvm::BitVector BlockReachable; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h index 78a426e..db3d780 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h +++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h @@ -26,9 +26,9 @@ class StmtToEnvMap { public: virtual ~StmtToEnvMap() = default; - /// Returns the environment of the basic block that contains `S` or nullptr if - /// there isn't one. - /// FIXME: Ensure that the result can't be null and return a const reference. + /// Retrieves the environment of the basic block that contains `S`. + /// If `S` is reachable, returns a non-null pointer to the environment. + /// If `S` is not reachable, returns nullptr. virtual const Environment *getEnvironment(const Stmt &S) const = 0; }; diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 2492b52..6699a0f 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -16,6 +16,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include @@ -44,6 +45,28 @@ buildStmtToBasicBlockMap(const CFG &Cfg) { return StmtToBlock; } +static llvm::BitVector findReachableBlocks(const CFG &Cfg) { + llvm::BitVector BlockReachable(Cfg.getNumBlockIDs(), false); + + llvm::SmallVector BlocksToVisit; + BlocksToVisit.push_back(&Cfg.getEntry()); + while (!BlocksToVisit.empty()) { + const CFGBlock *Block = BlocksToVisit.back(); + BlocksToVisit.pop_back(); + + if (BlockReachable[Block->getBlockID()]) + continue; + + BlockReachable[Block->getBlockID()] = true; + + for (const CFGBlock *Succ : Block->succs()) + if (Succ) + BlocksToVisit.push_back(Succ); + } + + return BlockReachable; +} + llvm::Expected ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { CFG::BuildOptions Options; @@ -64,7 +87,11 @@ ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { llvm::DenseMap StmtToBlock = buildStmtToBasicBlockMap(*Cfg); - return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock)); + + llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock), + std::move(BlockReachable)); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index e427f14..a1ed37d 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -162,15 +162,27 @@ public: } case BO_LAnd: case BO_LOr: { - BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); - BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); - auto &Loc = Env.createStorageLocation(*S); Env.setStorageLocation(*S, Loc); + + BoolValue *LHSVal = getLogicOperatorSubExprValue(*LHS); + // If the LHS was not reachable, this BinaryOperator would also not be + // reachable, and we would never get here. + assert(LHSVal != nullptr); + BoolValue *RHSVal = getLogicOperatorSubExprValue(*RHS); + if (RHSVal == nullptr) { + // If the RHS isn't reachable and we evaluate this BinaryOperator, + // then the value of the LHS must have triggered the short-circuit + // logic. This implies that the value of the entire expression must be + // equal to the value of the LHS. + Env.setValue(Loc, *LHSVal); + break; + } + if (S->getOpcode() == BO_LAnd) - Env.setValue(Loc, Env.makeAnd(LHSVal, RHSVal)); + Env.setValue(Loc, Env.makeAnd(*LHSVal, *RHSVal)); else - Env.setValue(Loc, Env.makeOr(LHSVal, RHSVal)); + Env.setValue(Loc, Env.makeOr(*LHSVal, *RHSVal)); break; } case BO_NE: @@ -779,15 +791,19 @@ public: } private: - BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { + /// If `SubExpr` is reachable, returns a non-null pointer to the value for + /// `SubExpr`. If `SubExpr` is not reachable, returns nullptr. + BoolValue *getLogicOperatorSubExprValue(const Expr &SubExpr) { // `SubExpr` and its parent logic operator might be part of different basic // blocks. We try to access the value that is assigned to `SubExpr` in the // corresponding environment. - if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) { - if (auto *Val = dyn_cast_or_null( - SubExprEnv->getValue(SubExpr, SkipPast::Reference))) - return *Val; - } + const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr); + if (!SubExprEnv) + return nullptr; + + if (auto *Val = dyn_cast_or_null( + SubExprEnv->getValue(SubExpr, SkipPast::Reference))) + return Val; if (Env.getStorageLocation(SubExpr, SkipPast::None) == nullptr) { // Sub-expressions that are logic operators are not added in basic blocks @@ -800,11 +816,11 @@ private: if (auto *Val = dyn_cast_or_null( Env.getValue(SubExpr, SkipPast::Reference))) - return *Val; + return Val; // If the value of `SubExpr` is still unknown, we create a fresh symbolic // boolean value for it. - return Env.makeAtomicBoolValue(); + return &Env.makeAtomicBoolValue(); } // If context sensitivity is enabled, try to analyze the body of the callee diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index fe00d76..d94b547 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -51,6 +51,8 @@ public: const Environment *getEnvironment(const Stmt &S) const override { auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); assert(BlockIt != CFCtx.getStmtToBlock().end()); + if (!CFCtx.isBlockReachable(*BlockIt->getSecond())) + return nullptr; const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; assert(State); return &State->Env; diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index bc089f1..ef67dc9 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -389,6 +389,20 @@ checkDataflow(AnalysisInputs AI, /// `Name` must be unique in `ASTCtx`. const ValueDecl *findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name); +/// Returns the value (of type `ValueT`) for the given identifier. +/// `ValueT` must be a subclass of `Value` and must be of the appropriate type. +/// +/// Requirements: +/// +/// `Name` must be unique in `ASTCtx`. +template +ValueT &getValueForDecl(ASTContext &ASTCtx, const Environment &Env, + llvm::StringRef Name) { + const ValueDecl *VD = findValueDecl(ASTCtx, Name); + assert(VD != nullptr); + return *cast(Env.getValue(*VD, SkipPast::None)); +} + /// Creates and owns constraints which are boolean values. class ConstraintContext { public: diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 9c16335..1bb772a 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -5104,4 +5104,70 @@ TEST(TransferTest, UnnamedBitfieldInitializer) { }); } +// Repro for a crash that used to occur when we call a `noreturn` function +// within one of the operands of a `&&` or `||` operator. +TEST(TransferTest, NoReturnFunctionInsideShortCircuitedBooleanOp) { + std::string Code = R"( + __attribute__((noreturn)) int doesnt_return(); + bool some_condition(); + void target(bool b1, bool b2) { + // Neither of these should crash. In addition, if we don't terminate the + // program, we know that the operators need to trigger the short-circuit + // logic, so `NoreturnOnRhsOfAnd` will be false and `NoreturnOnRhsOfOr` + // will be true. + bool NoreturnOnRhsOfAnd = b1 && doesnt_return() > 0; + bool NoreturnOnRhsOfOr = b2 || doesnt_return() > 0; + + // Calling a `noreturn` function on the LHS of an `&&` or `||` makes the + // entire expression unreachable. So we know that in both of the following + // cases, if `target()` terminates, the `else` branch was taken. + bool NoreturnOnLhsMakesAndUnreachable = false; + if (some_condition()) + doesnt_return() > 0 && some_condition(); + else + NoreturnOnLhsMakesAndUnreachable = true; + + bool NoreturnOnLhsMakesOrUnreachable = false; + if (some_condition()) + doesnt_return() > 0 || some_condition(); + else + NoreturnOnLhsMakesOrUnreachable = true; + + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + // Check that [[p]] is reachable with a non-false flow condition. + EXPECT_FALSE(Env.flowConditionImplies(Env.getBoolLiteralValue(false))); + + auto &B1 = getValueForDecl(ASTCtx, Env, "b1"); + EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(B1))); + + auto &NoreturnOnRhsOfAnd = + getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfAnd"); + EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(NoreturnOnRhsOfAnd))); + + auto &B2 = getValueForDecl(ASTCtx, Env, "b2"); + EXPECT_TRUE(Env.flowConditionImplies(B2)); + + auto &NoreturnOnRhsOfOr = + getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfOr"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnRhsOfOr)); + + auto &NoreturnOnLhsMakesAndUnreachable = getValueForDecl( + ASTCtx, Env, "NoreturnOnLhsMakesAndUnreachable"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesAndUnreachable)); + + auto &NoreturnOnLhsMakesOrUnreachable = getValueForDecl( + ASTCtx, Env, "NoreturnOnLhsMakesOrUnreachable"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesOrUnreachable)); + }); +} + } // namespace -- 2.7.4