From 5e8f597c2fedc740b71f07dfdb1ef3c2d348b193 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Tue, 27 Dec 2022 14:21:29 +0000 Subject: [PATCH] [clang][dataflow] Only model struct fields that are used in the function being analyzed. Previously, the model for structs modeled all fields in a struct when `createValue` was called for that type. This patch adds a prepass on the function under analysis to discover the fields referenced in the scope and then limits modeling to only those fields. This reduces wasted memory usage (modeling unused fields) which can be important for programss that use large structs. Note: This patch obviates the need for https://reviews.llvm.org/D123032. Differential Revision: https://reviews.llvm.org/D140694 --- .../FlowSensitive/DataflowAnalysisContext.h | 24 +++- .../Analysis/FlowSensitive/DataflowEnvironment.h | 3 + .../FlowSensitive/DataflowAnalysisContext.cpp | 27 +++- .../Analysis/FlowSensitive/DataflowEnvironment.cpp | 137 +++++++++++++-------- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 2 +- .../FlowSensitive/DataflowEnvironmentTest.cpp | 10 +- .../Analysis/FlowSensitive/TestingSupport.h | 12 +- .../Analysis/FlowSensitive/TransferTest.cpp | 64 ++++++++-- 8 files changed, 211 insertions(+), 68 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index f8ee586..9813550 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -54,14 +54,21 @@ llvm::DenseSet getObjectFields(QualType Type); /// is used during dataflow analysis. class DataflowAnalysisContext { public: + // FIXME: merge with TransferOptions from Transfer.h. + struct Options { + bool EnableContextSensitiveAnalysis; + }; + /// Constructs a dataflow analysis context. /// /// Requirements: /// /// `S` must not be null. - DataflowAnalysisContext(std::unique_ptr S) + DataflowAnalysisContext(std::unique_ptr S, + Options Opts = { + /*EnableContextSensitiveAnalysis=*/false}) : S(std::move(S)), TrueVal(createAtomicBoolValue()), - FalseVal(createAtomicBoolValue()) { + FalseVal(createAtomicBoolValue()), Options(Opts) { assert(this->S != nullptr); } @@ -253,7 +260,11 @@ public: /// returns null. const ControlFlowContext *getControlFlowContext(const FunctionDecl *F); + void addFieldsReferencedInScope(llvm::DenseSet Fields); + private: + friend class Environment; + struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo { static QualType getEmptyKey() { // Allow a NULL `QualType` by using a different value as the empty key. @@ -265,6 +276,10 @@ private: using DenseMapInfo::isEqual; }; + /// Returns the subset of fields of `Type` that are referenced in the scope of + /// the analysis. + llvm::DenseSet getReferencedFields(QualType Type); + /// Adds all constraints of the flow condition identified by `Token` and all /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used /// to track tokens of flow conditions that were already visited by recursive @@ -330,6 +345,8 @@ private: AtomicBoolValue &TrueVal; AtomicBoolValue &FalseVal; + Options Options; + // Indices that are used to avoid recreating the same composite boolean // values. llvm::DenseMap, ConjunctionValue *> @@ -359,6 +376,9 @@ private: llvm::DenseMap FlowConditionConstraints; llvm::DenseMap FunctionContexts; + + // All fields referenced (statically) in the scope of the analysis. + llvm::DenseSet FieldsReferencedInScope; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index c1653c2..fbc5f6d 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -452,6 +452,9 @@ private: void pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef Args); + /// Assigns storage locations and values to all variables in `Vars`. + void initVars(llvm::DenseSet Vars); + // `DACtx` is not null and not owned by this object. DataflowAnalysisContext *DACtx; diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index af2f1fc..6b7b2dc 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/Support/Debug.h" #include #include @@ -24,13 +25,33 @@ namespace clang { namespace dataflow { +void DataflowAnalysisContext::addFieldsReferencedInScope( + llvm::DenseSet Fields) { + llvm::set_union(FieldsReferencedInScope, Fields); +} + +llvm::DenseSet +DataflowAnalysisContext::getReferencedFields(QualType Type) { + llvm::DenseSet Fields = getObjectFields(Type); + llvm::set_intersect(Fields, FieldsReferencedInScope); + return Fields; +} + StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { if (!Type.isNull() && (Type->isStructureOrClassType() || Type->isUnionType())) { - // FIXME: Explore options to avoid eager initialization of fields as some of - // them might not be needed for a particular analysis. llvm::DenseMap FieldLocs; - for (const FieldDecl *Field : getObjectFields(Type)) + // During context-sensitive analysis, a struct may be allocated in one + // function, but its field accessed in a function lower in the stack than + // the allocation. Since we only collect fields used in the function where + // the allocation occurs, we can't apply that filter when performing + // context-sensitive analysis. But, this only applies to storage locations, + // since fields access it not allowed to fail. In contrast, field *values* + // don't need this allowance, since the API allows for uninitialized fields. + auto Fields = Options.EnableContextSensitiveAnalysis + ? getObjectFields(Type) + : getReferencedFields(Type); + for (const FieldDecl *Field : Fields) FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); return takeOwnership( std::make_unique(Type, std::move(FieldLocs))); diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 1a561b3..936f10e 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -151,44 +151,62 @@ static Value &widenDistinctValues(QualType Type, Value &Prev, } /// Initializes a global storage value. -static void initGlobalVar(const VarDecl &D, Environment &Env) { - if (!D.hasGlobalStorage() || - Env.getStorageLocation(D, SkipPast::None) != nullptr) - return; - - auto &Loc = Env.createStorageLocation(D); - Env.setStorageLocation(D, Loc); - if (auto *Val = Env.createValue(D.getType())) - Env.setValue(Loc, *Val); -} - -/// Initializes a global storage value. -static void initGlobalVar(const Decl &D, Environment &Env) { +static void insertIfGlobal(const Decl &D, + llvm::DenseSet &Fields, + llvm::DenseSet &Vars) { if (auto *V = dyn_cast(&D)) - initGlobalVar(*V, Env); -} - -/// Initializes global storage values that are declared or referenced from -/// sub-statements of `S`. -// FIXME: Add support for resetting globals after function calls to enable -// the implementation of sound analyses. -static void initGlobalVars(const Stmt &S, Environment &Env) { - for (auto *Child : S.children()) { + if (V->hasGlobalStorage()) + Vars.insert(V); +} + +static void getFieldsAndGlobalVars(const Decl &D, + llvm::DenseSet &Fields, + llvm::DenseSet &Vars) { + insertIfGlobal(D, Fields, Vars); + if (const auto *Decomp = dyn_cast(&D)) + for (const auto *B : Decomp->bindings()) + if (auto *ME = dyn_cast_or_null(B->getBinding())) + // FIXME: should we be using `E->getFoundDecl()`? + if (const auto *FD = dyn_cast(ME->getMemberDecl())) + Fields.insert(FD); +} + +/// Traverses `S` and inserts into `Vars` any global storage values that are +/// declared in or referenced from sub-statements. +static void getFieldsAndGlobalVars(const Stmt &S, + llvm::DenseSet &Fields, + llvm::DenseSet &Vars) { + for (auto *Child : S.children()) if (Child != nullptr) - initGlobalVars(*Child, Env); - } + getFieldsAndGlobalVars(*Child, Fields, Vars); if (auto *DS = dyn_cast(&S)) { - if (DS->isSingleDecl()) { - initGlobalVar(*DS->getSingleDecl(), Env); - } else { + if (DS->isSingleDecl()) + getFieldsAndGlobalVars(*DS->getSingleDecl(), Fields, Vars); + else for (auto *D : DS->getDeclGroup()) - initGlobalVar(*D, Env); - } + getFieldsAndGlobalVars(*D, Fields, Vars); } else if (auto *E = dyn_cast(&S)) { - initGlobalVar(*E->getDecl(), Env); + insertIfGlobal(*E->getDecl(), Fields, Vars); } else if (auto *E = dyn_cast(&S)) { - initGlobalVar(*E->getMemberDecl(), Env); + // FIXME: should we be using `E->getFoundDecl()`? + const ValueDecl *VD = E->getMemberDecl(); + insertIfGlobal(*VD, Fields, Vars); + if (const auto *FD = dyn_cast(VD)) + Fields.insert(FD); + } +} + +// FIXME: Add support for resetting globals after function calls to enable +// the implementation of sound analyses. +void Environment::initVars(llvm::DenseSet Vars) { + for (const VarDecl *D : Vars) { + if (getStorageLocation(*D, SkipPast::None) != nullptr) + continue; + auto &Loc = createStorageLocation(*D); + setStorageLocation(*D, Loc); + if (auto *Val = createValue(D->getType())) + setValue(Loc, *Val); } } @@ -216,7 +234,27 @@ Environment::Environment(DataflowAnalysisContext &DACtx, if (const auto *FuncDecl = dyn_cast(&DeclCtx)) { assert(FuncDecl->getBody() != nullptr); - initGlobalVars(*FuncDecl->getBody(), *this); + + llvm::DenseSet Fields; + llvm::DenseSet Vars; + + // Look for global variable references in the constructor-initializers. + if (const auto *CtorDecl = dyn_cast(&DeclCtx)) { + for (const auto *Init : CtorDecl->inits()) { + if (const auto *M = Init->getAnyMember()) + Fields.insert(M); + const Expr *E = Init->getInit(); + assert(E != nullptr); + getFieldsAndGlobalVars(*E, Fields, Vars); + } + } + getFieldsAndGlobalVars(*FuncDecl->getBody(), Fields, Vars); + + initVars(Vars); + // These have to be set before the lines that follow to ensure that create* + // work correctly for structs. + DACtx.addFieldsReferencedInScope(std::move(Fields)); + for (const auto *ParamDecl : FuncDecl->parameters()) { assert(ParamDecl != nullptr); auto &ParamLoc = createStorageLocation(*ParamDecl); @@ -243,15 +281,6 @@ Environment::Environment(DataflowAnalysisContext &DACtx, setValue(*ThisPointeeLoc, *ThisPointeeVal); } } - - // Look for global variable references in the constructor-initializers. - if (const auto *CtorDecl = dyn_cast(&DeclCtx)) { - for (const auto *Init : CtorDecl->inits()) { - const Expr *E = Init->getInit(); - assert(E != nullptr); - initGlobalVars(*E, *this); - } - } } bool Environment::canDescend(unsigned MaxDepth, @@ -298,10 +327,24 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef Args) { CallStack.push_back(FuncDecl); - // FIXME: In order to allow the callee to reference globals, we probably need - // to call `initGlobalVars` here in some way. + // FIXME: Share this code with the constructor, rather than duplicating it. + llvm::DenseSet Fields; + llvm::DenseSet Vars; + // Look for global variable references in the constructor-initializers. + if (const auto *CtorDecl = dyn_cast(FuncDecl)) { + for (const auto *Init : CtorDecl->inits()) { + if (const auto *M = Init->getAnyMember()) + Fields.insert(M); + const Expr *E = Init->getInit(); + assert(E != nullptr); + getFieldsAndGlobalVars(*E, Fields, Vars); + } + } + getFieldsAndGlobalVars(*FuncDecl->getBody(), Fields, Vars); + initVars(Vars); + DACtx->addFieldsReferencedInScope(std::move(Fields)); - auto ParamIt = FuncDecl->param_begin(); + const auto *ParamIt = FuncDecl->param_begin(); // FIXME: Parameters don't always map to arguments 1:1; examples include // overloaded operators implemented as member functions, and parameter packs. @@ -570,7 +613,7 @@ void Environment::setValue(const StorageLocation &Loc, Value &Val) { const QualType Type = AggregateLoc.getType(); assert(Type->isStructureOrClassType() || Type->isUnionType()); - for (const FieldDecl *Field : getObjectFields(Type)) { + for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { assert(Field != nullptr); StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); @@ -684,10 +727,8 @@ Value *Environment::createValueUnlessSelfReferential( if (Type->isStructureOrClassType() || Type->isUnionType()) { CreatedValuesCount++; - // FIXME: Initialize only fields that are accessed in the context that is - // being analyzed. llvm::DenseMap FieldValues; - for (const FieldDecl *Field : getObjectFields(Type)) { + for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { assert(Field != nullptr); QualType FieldType = Field->getType(); diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 32e9d6f..465dbc9 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -283,7 +283,7 @@ public: } else if (auto *VD = B->getHoldingVar()) { // Holding vars are used to back the BindingDecls of tuple-like // types. The holding var declarations appear *after* this statement, - // so we have to create a location or them here to share with `B`. We + // so we have to create a location for them here to share with `B`. We // don't visit the binding, because we know it will be a DeclRefExpr // to `VD`. auto &VDLoc = Env.createStorageLocation(*VD); diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index b237f3b..5e55e1b 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -57,6 +57,8 @@ TEST_F(EnvironmentTest, CreateValueRecursiveType) { bool X; Recursive *R; }; + // Use both fields to force them to be created with `createValue`. + void Usage(Recursive R) { (void)R.X; (void)R.R; } )cc"; auto Unit = @@ -74,11 +76,15 @@ TEST_F(EnvironmentTest, CreateValueRecursiveType) { const QualType *Ty = selectFirst("target", Results); const FieldDecl *R = selectFirst("field-r", Results); ASSERT_TRUE(Ty != nullptr && !Ty->isNull()); - ASSERT_TRUE(R != nullptr); + ASSERT_THAT(R, NotNull()); + + Results = match(functionDecl(hasName("Usage")).bind("fun"), Context); + const auto *Fun = selectFirst("fun", Results); + ASSERT_THAT(Fun, NotNull()); // Verify that the struct and the field (`R`) with first appearance of the // type is created successfully. - Environment Env(DAContext); + Environment Env(DAContext, *Fun); Value *Val = Env.createValue(*Ty); ASSERT_NE(Val, nullptr); StructValue *SVal = clang::dyn_cast(Val); diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index c72e8e4..e38826c 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -134,6 +134,11 @@ template struct AnalysisInputs { ASTBuildVirtualMappedFiles = std::move(Arg); return std::move(*this); } + AnalysisInputs && + withContextSensitivity() && { + EnableContextSensitivity = true; + return std::move(*this); + } /// Required. Input code that is analyzed. llvm::StringRef Code; @@ -159,6 +164,9 @@ template struct AnalysisInputs { ArrayRef ASTBuildArgs = {}; /// Optional. Options for building the AST context. tooling::FileContentMappings ASTBuildVirtualMappedFiles = {}; + /// Enables context-sensitive analysis when constructing the + /// `DataflowAnalysisContext`. + bool EnableContextSensitivity = false; }; /// Returns assertions based on annotations that are present after statements in @@ -222,7 +230,9 @@ checkDataflow(AnalysisInputs AI, auto &CFCtx = *MaybeCFCtx; // Initialize states for running dataflow analysis. - DataflowAnalysisContext DACtx(std::make_unique()); + DataflowAnalysisContext DACtx( + std::make_unique(), + {/*EnableContextSensitiveAnalysis=*/AI.EnableContextSensitivity}); Environment InitEnv(DACtx, *Target); auto Analysis = AI.MakeAnalysis(Context, InitEnv); std::function ASTBuildArgs = { + "-fsyntax-only", "-fno-delayed-template-parsing", + "-std=" + + std::string(LangStandard::getLangStandardForKind(Std).getName())}; + auto AI = + AnalysisInputs(Code, hasName(TargetFun), + [&Options](ASTContext &C, Environment &) { + return NoopAnalysis(C, Options); + }) + .withASTBuildArgs(ASTBuildArgs); + if (Options.BuiltinTransferOpts && + Options.BuiltinTransferOpts->ContextSensitiveOpts) + AI = std::move(AI).withContextSensitivity(); ASSERT_THAT_ERROR( checkDataflow( - AnalysisInputs(Code, hasName(TargetFun), - [Options](ASTContext &C, Environment &) { - return NoopAnalysis(C, Options); - }) - .withASTBuildArgs( - {"-fsyntax-only", "-fno-delayed-template-parsing", - "-std=" + - std::string(LangStandard::getLangStandardForKind(Std) - .getName())}), + std::move(AI), /*VerifyResults=*/ [&Match](const llvm::StringMap> &Results, @@ -151,6 +157,7 @@ TEST(TransferTest, StructVarDecl) { void target() { A Foo; + (void)Foo.Bar; // [[p]] } )"; @@ -198,6 +205,7 @@ TEST(TransferTest, StructVarDeclWithInit) { void target() { A Foo = Gen(); + (void)Foo.Bar; // [[p]] } )"; @@ -238,11 +246,13 @@ TEST(TransferTest, StructVarDeclWithInit) { TEST(TransferTest, ClassVarDecl) { std::string Code = R"( class A { + public: int Bar; }; void target() { A Foo; + (void)Foo.Bar; // [[p]] } )"; @@ -336,6 +346,10 @@ TEST(TransferTest, SelfReferentialReferenceVarDecl) { void target() { A &Foo = getA(); + (void)Foo.Bar.FooRef; + (void)Foo.Bar.FooPtr; + (void)Foo.Bar.BazRef; + (void)Foo.Bar.BazPtr; // [[p]] } )"; @@ -478,6 +492,10 @@ TEST(TransferTest, SelfReferentialPointerVarDecl) { void target() { A *Foo = getA(); + (void)Foo->Bar->FooRef; + (void)Foo->Bar->FooPtr; + (void)Foo->Bar->BazRef; + (void)Foo->Bar->BazPtr; // [[p]] } )"; @@ -891,7 +909,7 @@ TEST(TransferTest, StructParamDecl) { }; void target(A Foo) { - (void)0; + (void)Foo.Bar; // [[p]] } )"; @@ -1052,6 +1070,9 @@ TEST(TransferTest, DerivedBaseMemberClass) { int APrivate; public: int APublic; + + private: + friend void target(); }; class B : public A { @@ -1060,10 +1081,20 @@ TEST(TransferTest, DerivedBaseMemberClass) { int BProtected; private: int BPrivate; + + private: + friend void target(); }; void target() { B Foo; + (void)Foo.ADefault; + (void)Foo.AProtected; + (void)Foo.APrivate; + (void)Foo.APublic; + (void)Foo.BDefault; + (void)Foo.BProtected; + (void)Foo.BPrivate; // [[p]] } )"; @@ -1202,6 +1233,7 @@ TEST(TransferTest, DerivedBaseMemberStructDefault) { void target() { B Foo; + (void)Foo.Bar; // [[p]] } )"; @@ -1525,7 +1557,11 @@ TEST(TransferTest, UnionThisMember) { int Bar; void target() { - (void)0; // [[p]] + A a; + // Mention the fields to ensure they're included in the analysis. + (void)a.Foo; + (void)a.Bar; + // [[p]] } }; )"; @@ -1777,6 +1813,7 @@ TEST(TransferTest, TemporaryObject) { void target() { A Foo = A(); + (void)Foo.Bar; // [[p]] } )"; @@ -1814,6 +1851,7 @@ TEST(TransferTest, ElidableConstructor) { void target() { A Foo = A(); + (void)Foo.Bar; // [[p]] } )"; @@ -1851,6 +1889,7 @@ TEST(TransferTest, AssignmentOperator) { void target() { A Foo; A Bar; + (void)Foo.Baz; // [[p1]] Foo = Bar; // [[p2]] @@ -1913,6 +1952,7 @@ TEST(TransferTest, CopyConstructor) { void target() { A Foo; + (void)Foo.Baz; A Bar = Foo; // [[p]] } @@ -1958,6 +1998,7 @@ TEST(TransferTest, CopyConstructorWithParens) { void target() { A Foo; + (void)Foo.Baz; A Bar((A(Foo))); // [[p]] } @@ -2018,6 +2059,7 @@ TEST(TransferTest, MoveConstructor) { void target() { A Foo; A Bar; + (void)Foo.Baz; // [[p1]] Foo = std::move(Bar); // [[p2]] -- 2.7.4