[dataflow] Disallow implicit copy of Environment, use fork() instead
authorSam McCall <sam.mccall@gmail.com>
Sat, 24 Jun 2023 00:45:17 +0000 (02:45 +0200)
committerSam McCall <sam.mccall@gmail.com>
Mon, 26 Jun 2023 13:26:02 +0000 (15:26 +0200)
Environments are heavyweight, and copies are observably different from the
original: they introduce new SAT variables, which degrade performance &
debugging. Copies should only be done deliberately, where justified.

Empirically there are several places in the framework where we perform dubious
copies, sometimes entirely accidentally. (see e.g. D153491). Making these
explicit makes this mistake harder.

This patch forces copies to go through fork(), the copy-constructor is private.
This requires changes to existing callsites: some are correct and call fork(),
some are incorrect and are fixed, others are difficult and I left a FIXME.

Differential Revision: https://reviews.llvm.org/D153674

clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
clang/lib/Analysis/FlowSensitive/Transfer.cpp
clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
clang/unittests/Analysis/FlowSensitive/TestingSupport.h

index 3be29cf..b6ba3be 100644 (file)
@@ -205,8 +205,10 @@ runDataflowAnalysis(
                               const TypeErasedDataflowAnalysisState &State) {
       auto *Lattice =
           llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
+      // FIXME: we should not be copying the environment here!
+      // Ultimately the PostVisitCFG only gets a const reference anyway.
       PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
-                                *Lattice, State.Env});
+                                *Lattice, State.Env.fork()});
     };
   }
 
@@ -222,12 +224,13 @@ runDataflowAnalysis(
   llvm::transform(
       std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
       [](auto &OptState) {
-        return llvm::transformOptional(std::move(OptState), [](auto &&State) {
-          return DataflowAnalysisState<typename AnalysisT::Lattice>{
-              llvm::any_cast<typename AnalysisT::Lattice>(
-                  std::move(State.Lattice.Value)),
-              std::move(State.Env)};
-        });
+        return llvm::transformOptional(
+            std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
+              return DataflowAnalysisState<typename AnalysisT::Lattice>{
+                  llvm::any_cast<typename AnalysisT::Lattice>(
+                      std::move(State.Lattice.Value)),
+                  std::move(State.Env)};
+            });
       });
   return BlockStates;
 }
index 059cb84..1184fa6 100644 (file)
@@ -160,8 +160,8 @@ public:
   /// the state of a program.
   explicit Environment(DataflowAnalysisContext &DACtx);
 
-  Environment(const Environment &Other);
-  Environment &operator=(const Environment &Other);
+  // Copy-constructor is private, Environments should not be copied. See fork().
+  Environment &operator=(const Environment &Other) = delete;
 
   Environment(Environment &&Other) = default;
   Environment &operator=(Environment &&Other) = default;
@@ -176,6 +176,16 @@ public:
   /// with a symbolic representation of the `this` pointee.
   Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx);
 
+  /// Returns a new environment that is a copy of this one.
+  ///
+  /// The state of the program is initially the same, but can be mutated without
+  /// affecting the original.
+  ///
+  /// However the original should not be further mutated, as this may interfere
+  /// with the fork. (In practice, values are stored independently, but the
+  /// forked flow condition references the original).
+  Environment fork() const;
+
   /// Creates and returns an environment to use for an inline analysis  of the
   /// callee. Uses the storage location from each argument in the `Call` as the
   /// storage location for the corresponding parameter in the callee.
@@ -540,6 +550,9 @@ public:
   LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
 
 private:
+  // The copy-constructor is for use in fork() only.
+  Environment(const Environment &) = default;
+
   /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise
   /// return null.
   ///
index e012b13..9ed3e25 100644 (file)
@@ -126,6 +126,10 @@ struct TypeErasedDataflowAnalysisState {
 
   TypeErasedDataflowAnalysisState(TypeErasedLattice Lattice, Environment Env)
       : Lattice(std::move(Lattice)), Env(std::move(Env)) {}
+
+  TypeErasedDataflowAnalysisState fork() const {
+    return TypeErasedDataflowAnalysisState(Lattice, Env.fork());
+  }
 };
 
 /// Transfers the state of a basic block by evaluating each of its elements in
index 4f5a877..c7ecdf4 100644 (file)
@@ -265,19 +265,10 @@ Environment::Environment(DataflowAnalysisContext &DACtx)
     : DACtx(&DACtx),
       FlowConditionToken(&DACtx.arena().makeFlowConditionToken()) {}
 
-Environment::Environment(const Environment &Other)
-    : DACtx(Other.DACtx), CallStack(Other.CallStack),
-      ReturnVal(Other.ReturnVal), ReturnLoc(Other.ReturnLoc),
-      ThisPointeeLoc(Other.ThisPointeeLoc), DeclToLoc(Other.DeclToLoc),
-      ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
-      MemberLocToStruct(Other.MemberLocToStruct),
-      FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
-}
-
-Environment &Environment::operator=(const Environment &Other) {
-  Environment Copy(Other);
-  *this = std::move(Copy);
-  return *this;
+Environment Environment::fork() const {
+  Environment Copy(*this);
+  Copy.FlowConditionToken = &DACtx->forkFlowCondition(*FlowConditionToken);
+  return Copy;
 }
 
 Environment::Environment(DataflowAnalysisContext &DACtx,
index e7f596a..a7c84b1 100644 (file)
@@ -865,7 +865,7 @@ private:
     assert(BlockToOutputState);
     assert(ExitBlock < BlockToOutputState->size());
 
-    auto ExitState = (*BlockToOutputState)[ExitBlock];
+    auto &ExitState = (*BlockToOutputState)[ExitBlock];
     assert(ExitState);
 
     Env.popCall(S, ExitState->Env);
index de22d63..1f23b6c 100644 (file)
@@ -282,7 +282,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
     if (!MaybePredState)
       continue;
 
-    TypeErasedDataflowAnalysisState PredState = *MaybePredState;
+    TypeErasedDataflowAnalysisState PredState = MaybePredState->fork();
     if (Analysis.builtinOptions()) {
       if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
         const StmtToEnvMap StmtToEnv(AC.CFCtx, AC.BlockStates);
@@ -309,7 +309,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
     // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
     // to enable building analyses like computation of dominators that
     // initialize the state of each basic block differently.
-    MaybeState.emplace(Analysis.typeErasedInitialElement(), AC.InitEnv);
+    MaybeState.emplace(Analysis.typeErasedInitialElement(), AC.InitEnv.fork());
   }
   return std::move(*MaybeState);
 }
@@ -447,12 +447,12 @@ runTypeErasedDataflowAnalysis(
   ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);
 
   std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates(
-      CFCtx.getCFG().size(), std::nullopt);
+      CFCtx.getCFG().size());
 
   // The entry basic block doesn't contain statements so it can be skipped.
   const CFGBlock &Entry = CFCtx.getCFG().getEntry();
   BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
-                                     InitEnv};
+                                     InitEnv.fork()};
   Worklist.enqueueSuccessors(&Entry);
 
   AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates);
index aa2b2a2..c827422 100644 (file)
@@ -363,7 +363,7 @@ checkDataflow(AnalysisInputs<AnalysisT> AI,
         if (It == StmtToAnnotations.end())
           return;
         auto [_, InsertSuccess] = AnnotationStates.insert(
-            {It->second, StateT{State.Lattice, State.Env}});
+            {It->second, StateT{State.Lattice, State.Env.fork()}});
         (void)_;
         (void)InsertSuccess;
         assert(InsertSuccess);