From: Stanislav Gatev Date: Fri, 10 Dec 2021 09:37:07 +0000 (+0100) Subject: [clang][dataflow] Implement a basic algorithm for dataflow analysis X-Git-Tag: upstream/15.0.7~23426 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8dcaf3aa0bf25508700a7452ed963c1487221dfd;p=platform%2Fupstream%2Fllvm.git [clang][dataflow] Implement a basic algorithm for dataflow analysis This is part of the implementation of the dataflow analysis framework. See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev. Reviewed By: xazax.hun, gribozavr2 Differential Revision: https://reviews.llvm.org/D115235 --- diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 69a5c2e..4a3c023 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -15,11 +15,20 @@ #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" + namespace clang { namespace dataflow { /// Holds the state of the program (store and heap) at a given program point. -class Environment {}; +class Environment { +public: + bool operator==(const Environment &) const { return true; } + + LatticeJoinEffect join(const Environment &) { + return LatticeJoinEffect::Unchanged; + } +}; } // namespace dataflow } // namespace clang diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h index 9009573..52d84eb 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h @@ -61,11 +61,12 @@ struct ReversePostOrderCompare { /// the same block multiple times at once. struct ForwardDataflowWorklist : DataflowWorklistBase { + ForwardDataflowWorklist(const CFG &Cfg, PostOrderCFGView *POV) + : DataflowWorklistBase(Cfg, POV, + ReversePostOrderCompare{POV->getComparator()}) {} + ForwardDataflowWorklist(const CFG &Cfg, AnalysisDeclContext &Ctx) - : DataflowWorklistBase( - Cfg, Ctx.getAnalysis(), - ReversePostOrderCompare{ - Ctx.getAnalysis()->getComparator()}) {} + : ForwardDataflowWorklist(Cfg, Ctx.getAnalysis()) {} void enqueueSuccessors(const CFGBlock *Block) { for (auto B : Block->succs()) diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h index 9448b91..55fae24 100644 --- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -76,6 +76,20 @@ struct TypeErasedDataflowAnalysisState { Environment Env; }; +/// Transfers the state of a basic block by evaluating each of its statements in +/// the context of `Analysis` and the states of its predecessors that are +/// available in `BlockStates`. +/// +/// Requirements: +/// +/// All predecessors of `Block` except those with loop back edges must have +/// already been transferred. States in `BlockStates` that are set to +/// `llvm::None` represent basic blocks that are not evaluated yet. +TypeErasedDataflowAnalysisState transferBlock( + std::vector> &BlockStates, + const CFGBlock &Block, const Environment &InitEnv, + TypeErasedDataflowAnalysis &Analysis); + /// Performs dataflow analysis and returns a mapping from basic block IDs to /// dataflow analysis states that model the respective basic blocks. Indices /// of the returned vector correspond to basic block IDs. diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index bb7eb99..45afd59 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -11,15 +11,77 @@ // //===----------------------------------------------------------------------===// +#include #include +#include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/Support/raw_ostream.h" -using namespace clang; -using namespace dataflow; +namespace clang { +namespace dataflow { + +/// Computes the input state for a given basic block by joining the output +/// states of its predecessors. +/// +/// Requirements: +/// +/// All predecessors of `Block` except those with loop back edges must have +/// already been transferred. States in `BlockStates` that are set to +/// `llvm::None` represent basic blocks that are not evaluated yet. +static TypeErasedDataflowAnalysisState computeBlockInputState( + std::vector> &BlockStates, + const CFGBlock &Block, const Environment &InitEnv, + TypeErasedDataflowAnalysis &Analysis) { + // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` + // to enable building analyses like computation of dominators that initialize + // the state of each basic block differently. + TypeErasedDataflowAnalysisState State = {Analysis.typeErasedInitialElement(), + InitEnv}; + for (const CFGBlock *Pred : Block.preds()) { + // Skip if the `Block` is unreachable or control flow cannot get past it. + if (!Pred || Pred->hasNoReturnElement()) + continue; + + // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a + // loop back edge to `Block`. + const llvm::Optional &MaybePredState = + BlockStates[Pred->getBlockID()]; + if (!MaybePredState.hasValue()) + continue; + + const TypeErasedDataflowAnalysisState &PredState = + MaybePredState.getValue(); + Analysis.joinTypeErased(State.Lattice, PredState.Lattice); + State.Env.join(PredState.Env); + } + return State; +} + +TypeErasedDataflowAnalysisState transferBlock( + std::vector> &BlockStates, + const CFGBlock &Block, const Environment &InitEnv, + TypeErasedDataflowAnalysis &Analysis) { + TypeErasedDataflowAnalysisState State = + computeBlockInputState(BlockStates, Block, InitEnv, Analysis); + for (const CFGElement &Element : Block) { + // FIXME: Evaluate other kinds of `CFGElement`. + const llvm::Optional Stmt = Element.getAs(); + if (!Stmt.hasValue()) + continue; + + // FIXME: Evaluate the statement contained in `Stmt`. + + State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(), + State.Lattice, State.Env); + } + return State; +} std::vector> runTypeErasedDataflowAnalysis(const CFG &Cfg, @@ -29,7 +91,59 @@ runTypeErasedDataflowAnalysis(const CFG &Cfg, // are specified in the header. This could be done by remembering // what options were used to build `Cfg` and asserting on them here. - // FIXME: Implement work list-based algorithm to compute the fixed - // point of `Analysis::transform` for every basic block in `Cfg`. - return {}; + PostOrderCFGView POV(&Cfg); + ForwardDataflowWorklist Worklist(Cfg, &POV); + + std::vector> BlockStates; + BlockStates.resize(Cfg.size(), llvm::None); + + // The entry basic block doesn't contain statements so it can be skipped. + const CFGBlock &Entry = Cfg.getEntry(); + BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), + InitEnv}; + Worklist.enqueueSuccessors(&Entry); + + // Bugs in lattices and transfer functions can prevent the analysis from + // converging. To limit the damage (infinite loops) that these bugs can cause, + // limit the number of iterations. + // FIXME: Consider making the maximum number of iterations configurable. + // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number + // of iterations, number of functions that time out, etc. + unsigned Iterations = 0; + static constexpr unsigned MaxIterations = 1 << 16; + while (const CFGBlock *Block = Worklist.dequeue()) { + if (++Iterations > MaxIterations) { + llvm::errs() << "Maximum number of iterations reached, giving up.\n"; + break; + } + + const llvm::Optional &OldBlockState = + BlockStates[Block->getBlockID()]; + TypeErasedDataflowAnalysisState NewBlockState = + transferBlock(BlockStates, *Block, InitEnv, Analysis); + + if (OldBlockState.hasValue() && + Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice, + NewBlockState.Lattice) && + OldBlockState->Env == NewBlockState.Env) { + // The state of `Block` didn't change after transfer so there's no need to + // revisit its successors. + continue; + } + + BlockStates[Block->getBlockID()] = std::move(NewBlockState); + + // Do not add unreachable successor blocks to `Worklist`. + if (Block->hasNoReturnElement()) + continue; + + Worklist.enqueueSuccessors(Block); + } + // FIXME: Consider evaluating unreachable basic blocks (those that have a + // state set to `llvm::None` at this point) to also analyze dead code. + + return BlockStates; } + +} // namespace dataflow +} // namespace clang diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt index 0002687..7e2a00b 100644 --- a/clang/unittests/Analysis/CMakeLists.txt +++ b/clang/unittests/Analysis/CMakeLists.txt @@ -28,3 +28,5 @@ target_link_libraries(ClangAnalysisTests PRIVATE LLVMTestingSupport ) + +add_subdirectory(FlowSensitive) diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt new file mode 100644 index 0000000..d463e31 --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -0,0 +1,20 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_unittest(ClangAnalysisFlowSensitiveTests + TypeErasedDataflowAnalysisTest.cpp + ) + +clang_target_link_libraries(ClangAnalysisFlowSensitiveTests + PRIVATE + clangAnalysis + clangAnalysisFlowSensitive + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangTesting + clangTooling + ) + diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp new file mode 100644 index 0000000..61765eb --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -0,0 +1,148 @@ +//===- unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Decl.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/StringRef.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include + +using namespace clang; +using namespace dataflow; + +template +class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback { +public: + void run(const ast_matchers::MatchFinder::MatchResult &Result) override { + assert(BlockStates.empty()); + + const auto *Func = Result.Nodes.getNodeAs("func"); + assert(Func != nullptr); + + Stmt *Body = Func->getBody(); + assert(Body != nullptr); + + // FIXME: Consider providing a utility that returns a `CFG::BuildOptions` + // which is a good default for most clients or a utility that directly + // builds the `CFG` using default `CFG::BuildOptions`. + CFG::BuildOptions Options; + Options.AddImplicitDtors = true; + Options.AddTemporaryDtors = true; + Options.setAllAlwaysAdd(); + + std::unique_ptr Cfg = + CFG::buildCFG(nullptr, Body, Result.Context, Options); + assert(Cfg != nullptr); + + AnalysisT Analysis(*Result.Context); + Environment Env; + BlockStates = runDataflowAnalysis(*Cfg, Analysis, Env); + } + + std::vector< + llvm::Optional>> + BlockStates; +}; + +template +std::vector>> +runAnalysis(llvm::StringRef Code) { + std::unique_ptr AST = + tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"}); + + AnalysisCallback Callback; + ast_matchers::MatchFinder Finder; + Finder.addMatcher( + ast_matchers::functionDecl(ast_matchers::hasName("target")).bind("func"), + &Callback); + Finder.matchAST(AST->getASTContext()); + + return Callback.BlockStates; +} + +class NoopLattice { +public: + bool operator==(const NoopLattice &) const { return true; } + + LatticeJoinEffect join(const NoopLattice &) { + return LatticeJoinEffect::Unchanged; + } +}; + +class NoopAnalysis : public DataflowAnalysis { +public: + NoopAnalysis(ASTContext &Context) + : DataflowAnalysis(Context) {} + + static NoopLattice initialElement() { return {}; } + + NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) { + return {}; + } +}; + +TEST(DataflowAnalysisTest, NoopAnalysis) { + auto BlockStates = runAnalysis(R"( + void target() {} + )"); + EXPECT_EQ(BlockStates.size(), 2u); + EXPECT_TRUE(BlockStates[0].hasValue()); + EXPECT_TRUE(BlockStates[1].hasValue()); +} + +struct NonConvergingLattice { + int State; + + bool operator==(const NonConvergingLattice &Other) const { + return State == Other.State; + } + + LatticeJoinEffect join(const NonConvergingLattice &Other) { + if (Other.State == 0) + return LatticeJoinEffect::Unchanged; + State += Other.State; + return LatticeJoinEffect::Changed; + } +}; + +class NonConvergingAnalysis + : public DataflowAnalysis { +public: + explicit NonConvergingAnalysis(ASTContext &Context) + : DataflowAnalysis(Context) { + } + + static NonConvergingLattice initialElement() { return {0}; } + + NonConvergingLattice transfer(const Stmt *S, const NonConvergingLattice &E, + Environment &Env) { + return {E.State + 1}; + } +}; + +TEST(DataflowAnalysisTest, NonConvergingAnalysis) { + auto BlockStates = runAnalysis(R"( + void target() { + while(true) {} + } + )"); + EXPECT_EQ(BlockStates.size(), 4u); + EXPECT_FALSE(BlockStates[0].hasValue()); + EXPECT_TRUE(BlockStates[1].hasValue()); + EXPECT_TRUE(BlockStates[2].hasValue()); + EXPECT_TRUE(BlockStates[3].hasValue()); +}