From dd4dde8d39a9c36ea692635bdfc0c90cc8d755fd Mon Sep 17 00:00:00 2001 From: Stanislav Gatev Date: Wed, 16 Feb 2022 16:47:37 +0000 Subject: [PATCH] [clang][dataflow] Add transfer functions for logical and, or, not. This is part of the implementation of the dataflow analysis framework. See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev. Reviewed-by: xazax.hun Differential Revision: https://reviews.llvm.org/D119953 --- .../FlowSensitive/DataflowAnalysisContext.h | 12 +- .../Analysis/FlowSensitive/DataflowEnvironment.h | 2 +- .../clang/Analysis/FlowSensitive/Transfer.h | 13 ++- clang/include/clang/Analysis/FlowSensitive/Value.h | 97 +++++++++++++++- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 71 ++++++++++-- .../FlowSensitive/TypeErasedDataflowAnalysis.cpp | 44 ++++++-- .../Analysis/FlowSensitive/TransferTest.cpp | 124 ++++++++++++++++++++- .../TypeErasedDataflowAnalysisTest.cpp | 3 +- 8 files changed, 334 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index 5c1b41d..52f738d 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -34,8 +34,8 @@ namespace dataflow { class DataflowAnalysisContext { public: DataflowAnalysisContext() - : TrueVal(&takeOwnership(std::make_unique())), - FalseVal(&takeOwnership(std::make_unique())) {} + : TrueVal(takeOwnership(std::make_unique())), + FalseVal(takeOwnership(std::make_unique())) {} /// Takes ownership of `Loc` and returns a reference to it. /// @@ -115,8 +115,8 @@ public: /// Returns a symbolic boolean value that models a boolean literal equal to /// `Value`. - BoolValue &getBoolLiteralValue(bool Value) const { - return Value ? *TrueVal : *FalseVal; + AtomicBoolValue &getBoolLiteralValue(bool Value) const { + return Value ? TrueVal : FalseVal; } private: @@ -135,8 +135,8 @@ private: StorageLocation *ThisPointeeLoc = nullptr; // FIXME: Add support for boolean expressions. - BoolValue *TrueVal; - BoolValue *FalseVal; + AtomicBoolValue &TrueVal; + AtomicBoolValue &FalseVal; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index cebfb66..af613c9 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -226,7 +226,7 @@ public: /// Returns a symbolic boolean value that models a boolean literal equal to /// `Value` - BoolValue &getBoolLiteralValue(bool Value) const { + AtomicBoolValue &getBoolLiteralValue(bool Value) const { return DACtx->getBoolLiteralValue(Value); } diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h index a12674a..a6b663b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h +++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h @@ -20,12 +20,23 @@ namespace clang { namespace dataflow { +/// Maps statements to the environments of basic blocks that contain them. +class StmtToEnvMap { +public: + virtual ~StmtToEnvMap() = default; + + /// Returns the environment of the basic block that contains `S` or nullptr if + /// there isn't one. + /// FIXME: Ensure that the result can't be null and return a const reference. + virtual const Environment *getEnvironment(const Stmt &S) const = 0; +}; + /// Evaluates `S` and updates `Env` accordingly. /// /// Requirements: /// /// The type of `S` must not be `ParenExpr`. -void transfer(const Stmt &S, Environment &Env); +void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env); } // namespace dataflow } // namespace clang diff --git a/clang/include/clang/Analysis/FlowSensitive/Value.h b/clang/include/clang/Analysis/FlowSensitive/Value.h index da04f92..7c02cc6 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Value.h +++ b/clang/include/clang/Analysis/FlowSensitive/Value.h @@ -28,7 +28,19 @@ namespace dataflow { /// Base class for all values computed by abstract interpretation. class Value { public: - enum class Kind { Bool, Integer, Reference, Pointer, Struct }; + enum class Kind { + Integer, + Reference, + Pointer, + Struct, + + // Synthetic boolean values are either atomic values or composites that + // represent conjunctions, disjunctions, and negations. + AtomicBool, + Conjunction, + Disjunction, + Negation + }; explicit Value(Kind ValKind) : ValKind(ValKind) {} @@ -43,9 +55,88 @@ private: /// Models a boolean. class BoolValue : public Value { public: - explicit BoolValue() : Value(Kind::Bool) {} + explicit BoolValue(Kind ValueKind) : Value(ValueKind) {} - static bool classof(const Value *Val) { return Val->getKind() == Kind::Bool; } + static bool classof(const Value *Val) { + return Val->getKind() == Kind::AtomicBool || + Val->getKind() == Kind::Conjunction || + Val->getKind() == Kind::Disjunction || + Val->getKind() == Kind::Negation; + } +}; + +/// Models an atomic boolean. +class AtomicBoolValue : public BoolValue { +public: + explicit AtomicBoolValue() : BoolValue(Kind::AtomicBool) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::AtomicBool; + } +}; + +/// Models a boolean conjunction. +// FIXME: Consider representing binary and unary boolean operations similar +// to how they are represented in the AST. This might become more pressing +// when such operations need to be added for other data types. +class ConjunctionValue : public BoolValue { +public: + explicit ConjunctionValue(BoolValue &LeftSubVal, BoolValue &RightSubVal) + : BoolValue(Kind::Conjunction), LeftSubVal(LeftSubVal), + RightSubVal(RightSubVal) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Conjunction; + } + + /// Returns the left sub-value of the conjunction. + BoolValue &getLeftSubValue() const { return LeftSubVal; } + + /// Returns the right sub-value of the conjunction. + BoolValue &getRightSubValue() const { return RightSubVal; } + +private: + BoolValue &LeftSubVal; + BoolValue &RightSubVal; +}; + +/// Models a boolean disjunction. +class DisjunctionValue : public BoolValue { +public: + explicit DisjunctionValue(BoolValue &LeftSubVal, BoolValue &RightSubVal) + : BoolValue(Kind::Disjunction), LeftSubVal(LeftSubVal), + RightSubVal(RightSubVal) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Disjunction; + } + + /// Returns the left sub-value of the disjunction. + BoolValue &getLeftSubValue() const { return LeftSubVal; } + + /// Returns the right sub-value of the disjunction. + BoolValue &getRightSubValue() const { return RightSubVal; } + +private: + BoolValue &LeftSubVal; + BoolValue &RightSubVal; +}; + +/// Models a boolean negation. +class NegationValue : public BoolValue { +public: + explicit NegationValue(BoolValue &SubVal) + : BoolValue(Kind::Negation), SubVal(SubVal) {} + + static bool classof(const Value *Val) { + return Val->getKind() == Kind::Negation; + } + + /// Returns the sub-value of the negation. + BoolValue &getSubVal() const { return SubVal; } + +private: + BoolValue &SubVal; }; /// Models an integer. diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 51a86b7..72475e0 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -39,10 +39,12 @@ static const Expr *skipExprWithCleanups(const Expr *E) { class TransferVisitor : public ConstStmtVisitor { public: - TransferVisitor(Environment &Env) : Env(Env) {} + TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env) + : StmtToEnv(StmtToEnv), Env(Env) {} void VisitBinaryOperator(const BinaryOperator *S) { - if (S->getOpcode() == BO_Assign) { + switch (S->getOpcode()) { + case BO_Assign: { // The CFG does not contain `ParenExpr` as top-level statements in basic // blocks, however sub-expressions can still be of that type. assert(S->getLHS() != nullptr); @@ -51,7 +53,7 @@ public: assert(LHS != nullptr); auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference); if (LHSLoc == nullptr) - return; + break; // The CFG does not contain `ParenExpr` as top-level statements in basic // blocks, however sub-expressions can still be of that type. @@ -61,15 +63,57 @@ public: assert(RHS != nullptr); Value *RHSVal = Env.getValue(*RHS, SkipPast::Reference); if (RHSVal == nullptr) - return; + break; // Assign a value to the storage location of the left-hand side. Env.setValue(*LHSLoc, *RHSVal); // Assign a storage location for the whole expression. Env.setStorageLocation(*S, *LHSLoc); + break; + } + case BO_LAnd: + case BO_LOr: { + const Expr *LHS = S->getLHS(); + assert(LHS != nullptr); + + const Expr *RHS = S->getRHS(); + assert(RHS != nullptr); + + BoolValue *LHSVal = + dyn_cast_or_null(Env.getValue(*LHS, SkipPast::Reference)); + + // `RHS` and `S` might be part of different basic blocks. We need to + // access their values from the corresponding environments. + BoolValue *RHSVal = nullptr; + const Environment *RHSEnv = StmtToEnv.getEnvironment(*RHS); + if (RHSEnv != nullptr) + RHSVal = dyn_cast_or_null( + RHSEnv->getValue(*RHS, SkipPast::Reference)); + + // Create fresh values for unknown boolean expressions. + // FIXME: Consider providing a `GetOrCreateFresh` util in case this style + // is expected to be common or make sure that all expressions are assigned + // values and drop this. + if (LHSVal == nullptr) + LHSVal = &Env.takeOwnership(std::make_unique()); + if (RHSVal == nullptr) + RHSVal = &Env.takeOwnership(std::make_unique()); + + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (S->getOpcode() == BO_LAnd) + Env.setValue(Loc, Env.takeOwnership(std::make_unique( + *LHSVal, *RHSVal))); + else + Env.setValue(Loc, Env.takeOwnership(std::make_unique( + *LHSVal, *RHSVal))); + break; + } + default: + // FIXME: Add support for BO_EQ, BO_NE. + break; } - // FIXME: Add support for BO_EQ, BO_NE. } void VisitDeclRefExpr(const DeclRefExpr *S) { @@ -212,8 +256,18 @@ public: Env.setValue(PointerLoc, PointerVal); break; } + case UO_LNot: { + auto *SubExprVal = + dyn_cast_or_null(Env.getValue(*SubExpr, SkipPast::None)); + if (SubExprVal == nullptr) + return; + + auto &ExprLoc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, ExprLoc); + Env.setValue(ExprLoc, Env.takeOwnership( + std::make_unique(*SubExprVal))); + } default: - // FIXME: Add support for UO_LNot. break; } } @@ -450,12 +504,13 @@ public: } private: + const StmtToEnvMap &StmtToEnv; Environment &Env; }; -void transfer(const Stmt &S, Environment &Env) { +void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) { assert(!isa(&S)); - TransferVisitor(Env).Visit(&S); + TransferVisitor(StmtToEnv, Env).Visit(&S); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 6b14b5c..3acfc65 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -24,6 +24,7 @@ #include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -32,6 +33,27 @@ namespace clang { namespace dataflow { +class StmtToEnvMapImpl : public StmtToEnvMap { +public: + StmtToEnvMapImpl( + const ControlFlowContext &CFCtx, + llvm::ArrayRef> + BlockToState) + : CFCtx(CFCtx), BlockToState(BlockToState) {} + + const Environment *getEnvironment(const Stmt &S) const override { + auto BlockIT = CFCtx.getStmtToBlock().find(&S); + assert(BlockIT != CFCtx.getStmtToBlock().end()); + const auto &State = BlockToState[BlockIT->getSecond()->getBlockID()]; + assert(State.hasValue()); + return &State.getValue().Env; + } + +private: + const ControlFlowContext &CFCtx; + llvm::ArrayRef> BlockToState; +}; + /// Computes the input state for a given basic block by joining the output /// states of its predecessors. /// @@ -42,7 +64,7 @@ namespace dataflow { /// `llvm::None` represent basic blocks that are not evaluated yet. static TypeErasedDataflowAnalysisState computeBlockInputState( const ControlFlowContext &CFCtx, - std::vector> &BlockStates, + llvm::ArrayRef> BlockStates, const CFGBlock &Block, const Environment &InitEnv, TypeErasedDataflowAnalysis &Analysis) { llvm::DenseSet Preds; @@ -111,17 +133,19 @@ static TypeErasedDataflowAnalysisState computeBlockInputState( /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`. /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it /// is evaluated. -static void -transferCFGStmt(const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, - TypeErasedDataflowAnalysisState &State, - std::function - HandleTransferredStmt) { +static void transferCFGStmt( + const ControlFlowContext &CFCtx, + llvm::ArrayRef> BlockStates, + const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, + TypeErasedDataflowAnalysisState &State, + std::function + HandleTransferredStmt) { const Stmt *S = CfgStmt.getStmt(); assert(S != nullptr); if (Analysis.applyBuiltinTransfer()) - transfer(*S, State.Env); + transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env); Analysis.transferTypeErased(S, State.Lattice, State.Env); if (HandleTransferredStmt != nullptr) @@ -176,8 +200,8 @@ TypeErasedDataflowAnalysisState transferBlock( for (const CFGElement &Element : Block) { switch (Element.getKind()) { case CFGElement::Statement: - transferCFGStmt(*Element.getAs(), Analysis, State, - HandleTransferredStmt); + transferCFGStmt(CFCtx, BlockStates, *Element.getAs(), Analysis, + State, HandleTransferredStmt); break; case CFGElement::Initializer: if (Analysis.applyBuiltinTransfer()) diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 9787683..83ccba1 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -2006,6 +2006,42 @@ TEST_F(TransferTest, AssignFromBoolLiteral) { // [[p]] } )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const auto *FooVal = dyn_cast_or_null( + Env.getValue(*FooDecl, SkipPast::None)); + ASSERT_THAT(FooVal, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const auto *BarVal = dyn_cast_or_null( + Env.getValue(*BarDecl, SkipPast::None)); + ASSERT_THAT(BarVal, NotNull()); + + EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true)); + EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false)); + }); +} + +TEST_F(TransferTest, AssignFromBoolConjunction) { + std::string Code = R"( + void target() { + bool Foo = true; + bool Bar = true; + bool Baz = (Foo) && (Bar); + // [[p]] + } + )"; runDataflow( Code, [](llvm::ArrayRef< std::pair>> @@ -2028,9 +2064,93 @@ TEST_F(TransferTest, AssignFromBoolLiteral) { dyn_cast_or_null(Env.getValue(*BarDecl, SkipPast::None)); ASSERT_THAT(BarVal, NotNull()); - EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true)); - EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false)); + const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz"); + ASSERT_THAT(BazDecl, NotNull()); + + const auto *BazVal = dyn_cast_or_null( + Env.getValue(*BazDecl, SkipPast::None)); + ASSERT_THAT(BazVal, NotNull()); + + EXPECT_EQ(&BazVal->getLeftSubValue(), FooVal); + EXPECT_EQ(&BazVal->getRightSubValue(), BarVal); }); } +TEST_F(TransferTest, AssignFromBoolDisjunction) { + std::string Code = R"( + void target() { + bool Foo = true; + bool Bar = true; + bool Baz = (Foo) || (Bar); + // [[p]] + } + )"; + runDataflow( + Code, [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const auto *FooVal = + dyn_cast_or_null(Env.getValue(*FooDecl, SkipPast::None)); + ASSERT_THAT(FooVal, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const auto *BarVal = + dyn_cast_or_null(Env.getValue(*BarDecl, SkipPast::None)); + ASSERT_THAT(BarVal, NotNull()); + + const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz"); + ASSERT_THAT(BazDecl, NotNull()); + + const auto *BazVal = dyn_cast_or_null( + Env.getValue(*BazDecl, SkipPast::None)); + ASSERT_THAT(BazVal, NotNull()); + + EXPECT_EQ(&BazVal->getLeftSubValue(), FooVal); + EXPECT_EQ(&BazVal->getRightSubValue(), BarVal); + }); +} + +TEST_F(TransferTest, AssignFromBoolNegation) { + std::string Code = R"( + void target() { + bool Foo = true; + bool Bar = !(Foo); + // [[p]] + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const auto *FooVal = dyn_cast_or_null( + Env.getValue(*FooDecl, SkipPast::None)); + ASSERT_THAT(FooVal, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const auto *BarVal = dyn_cast_or_null( + Env.getValue(*BarDecl, SkipPast::None)); + ASSERT_THAT(BarVal, NotNull()); + + EXPECT_EQ(&BarVal->getSubVal(), FooVal); + }); +} + } // namespace diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 90d7d73..faeac00 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -387,7 +387,8 @@ protected: Code, "target", [this](ASTContext &Context, Environment &Env) { assert(HasValueTop == nullptr); - HasValueTop = &Env.takeOwnership(std::make_unique()); + HasValueTop = + &Env.takeOwnership(std::make_unique()); return OptionalIntAnalysis(Context, *HasValueTop); }, [&Match]( -- 2.7.4