From 20f8733d4b8d5bdb93080b8824de57b7fae31785 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 1 Dec 2021 16:47:22 +0100 Subject: [PATCH] [Analyzer][solver] Simplification: Do a fixpoint iteration before the eq class merge This reverts commit f02c5f3478318075d1a469203900e452ba651421 and addresses the issue mentioned in D114619 differently. Repeating the issue here: Currently, during symbol simplification we remove the original member symbol from the equivalence class (`ClassMembers` trait). However, we keep the reverse link (`ClassMap` trait), in order to be able the query the related constraints even for the old member. This asymmetry can lead to a problem when we merge equivalence classes: ``` ClassA: [a, b] // ClassMembers trait, a->a, b->a // ClassMap trait, a is the representative symbol ``` Now let,s delete `a`: ``` ClassA: [b] a->a, b->a ``` Let's merge ClassA into the trivial class `c`: ``` ClassA: [c, b] c->c, b->c, a->a ``` Now, after the merge operation, `c` and `a` are actually in different equivalence classes, which is inconsistent. This issue manifests in a test case (added in D103317): ``` void recurring_symbol(int b) { if (b * b != b) if ((b * b) * b * b != (b * b) * b) if (b * b == 1) } ``` Before the simplification we have these equivalence classes: ``` trivial EQ1: [b * b != b] trivial EQ2: [(b * b) * b * b != (b * b) * b] ``` During the simplification with `b * b == 1`, EQ1 is merged with `1 != b` `EQ1: [b * b != b, 1 != b]` and we remove the complex symbol, so `EQ1: [1 != b]` Then we start to simplify the only symbol in EQ2: `(b * b) * b * b != (b * b) * b --> 1 * b * b != 1 * b --> b * b != b` But `b * b != b` is such a symbol that had been removed previously from EQ1, thus we reach the above mentioned inconsistency. This patch addresses the issue by making it impossible to synthesise a symbol that had been simplified before. We achieve this by simplifying the given symbol to the absolute simplest form. Differential Revision: https://reviews.llvm.org/D114887 --- .../StaticAnalyzer/Core/RangeConstraintManager.cpp | 93 ++++++++++++++++++---- .../expr-inspection-printState-eq-classes.c | 4 +- .../symbol-simplification-disequality-info.cpp | 57 +++++++------ ...ymbol-simplification-fixpoint-one-iteration.cpp | 17 ++-- ...mbol-simplification-fixpoint-two-iterations.cpp | 25 +++--- 5 files changed, 128 insertions(+), 68 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 3f8d9e4..23c67c6 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -601,6 +601,10 @@ public: LLVM_NODISCARD static inline Optional areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second); + /// Remove one member from the class. + LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State, + const SymbolRef Old); + /// Iterate over all symbols and try to simplify them. LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB, RangeSet::Factory &F, @@ -2132,6 +2136,34 @@ inline Optional EquivalenceClass::areEqual(ProgramStateRef State, return llvm::None; } +LLVM_NODISCARD ProgramStateRef +EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { + + SymbolSet ClsMembers = getClassMembers(State); + assert(ClsMembers.contains(Old)); + + // We don't remove `Old`'s Sym->Class relation for two reasons: + // 1) This way constraints for the old symbol can still be found via it's + // equivalence class that it used to be the member of. + // 2) Performance and resource reasons. We can spare one removal and thus one + // additional tree in the forest of `ClassMap`. + + // Remove `Old`'s Class->Sym relation. + SymbolSet::Factory &F = getMembersFactory(State); + ClassMembersTy::Factory &EMFactory = State->get_context(); + ClsMembers = F.remove(ClsMembers, Old); + // Ensure another precondition of the removeMember function (we can check + // this only with isEmpty, thus we have to do the remove first). + assert(!ClsMembers.isEmpty() && + "Class should have had at least two members before member removal"); + // Overwrite the existing members assigned to this class. + ClassMembersTy ClassMembersMap = State->get(); + ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers); + State = State->set(ClassMembersMap); + + return State; +} + // Re-evaluate an SVal with top-level `State->assume` logic. LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, const RangeSet *Constraint, @@ -2159,6 +2191,42 @@ LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, Constraint->getMaxValue(), true); } +// Simplify the given symbol with the help of the SValBuilder. In +// SValBuilder::symplifySval, we traverse the symbol tree and query the +// constraint values for the sub-trees and if a value is a constant we do the +// constant folding. Compound symbols might collapse to simpler symbol tree +// that is still possible to further simplify. Thus, we do the simplification on +// a new symbol tree until we reach the simplest form, i.e. the fixpoint. +// +// Consider the following symbol `(b * b) * b * b` which has this tree: +// * +// / \ +// * b +// / \ +// / b +// (b * b) +// Now, if the `b * b == 1` new constraint is added then during the first +// iteration we have the following transformations: +// * * +// / \ / \ +// * b --> b b +// / \ +// / b +// 1 +// We need another iteration to reach the final result `1`. +LLVM_NODISCARD +static SVal simplifyUntilFixpoint(SValBuilder &SVB, ProgramStateRef State, + const SymbolRef Sym) { + SVal Val = SVB.makeSymbolVal(Sym); + SVal SimplifiedVal = SVB.simplifySVal(State, Val); + // Do the simplification until we can. + while (SimplifiedVal != Val) { + Val = SimplifiedVal; + SimplifiedVal = SVB.simplifySVal(State, Val); + } + return SimplifiedVal; +} + // Iterate over all symbols and try to simplify them. Once a symbol is // simplified then we check if we can merge the simplified symbol's equivalence // class to this class. This way, we simplify not just the symbols but the @@ -2170,7 +2238,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, SymbolSet ClassMembers = Class.getClassMembers(State); for (const SymbolRef &MemberSym : ClassMembers) { - const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym); + const SVal SimplifiedMemberVal = + simplifyUntilFixpoint(SVB, State, MemberSym); const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol(); // The symbol is collapsed to a constant, check if the current State is @@ -2196,6 +2265,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, continue; assert(find(State, MemberSym) == find(State, SimplifiedMemberSym)); + // Remove the old and more complex symbol. + State = find(State, MemberSym).removeMember(State, MemberSym); // Query the class constraint again b/c that may have changed during the // merge above. @@ -2207,25 +2278,19 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, // About performance and complexity: Let us assume that in a State we // have N non-trivial equivalence classes and that all constraints and // disequality info is related to non-trivial classes. In the worst case, - // we can simplify only one symbol of one class in each iteration. + // we can simplify only one symbol of one class in each iteration. The + // number of symbols in one class cannot grow b/c we replace the old + // symbol with the simplified one. Also, the number of the equivalence + // classes can decrease only, b/c the algorithm does a merge operation + // optionally. We need N iterations in this case to reach the fixpoint. + // Thus, the steps needed to be done in the worst case is proportional to + // N*N. // - // The number of the equivalence classes can decrease only, because the - // algorithm does a merge operation optionally. - // ASSUMPTION G: Let us assume that the - // number of symbols in one class cannot grow because we replace the old - // symbol with the simplified one. - // If assumption G holds then we need N iterations in this case to reach - // the fixpoint. Thus, the steps needed to be done in the worst case is - // proportional to N*N. // This worst case scenario can be extended to that case when we have // trivial classes in the constraints and in the disequality map. This // case can be reduced to the case with a State where there are only // non-trivial classes. This is because a merge operation on two trivial // classes results in one non-trivial class. - // - // Empirical measurements show that if we relax assumption G (i.e. if we - // do not replace the complex symbol just add the simplified one) then - // the runtime and memory consumption does not grow noticeably. State = reAssume(State, ClassConstraint, SimplifiedMemberVal); if (!State) return nullptr; diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c index c56fcd6..08a1c6c 100644 --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -16,6 +16,6 @@ void test_equivalence_classes(int a, int b, int c, int d) { } // CHECK: "equivalence_classes": [ -// CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ], -// CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2", "reg_$3" ] +// CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], +// CHECK-NEXT: [ "reg_$0", "reg_$2", "reg_$3" ] // CHECK-NEXT: ], diff --git a/clang/test/Analysis/symbol-simplification-disequality-info.cpp b/clang/test/Analysis/symbol-simplification-disequality-info.cpp index 45d8c05..69238b5 100644 --- a/clang/test/Analysis/symbol-simplification-disequality-info.cpp +++ b/clang/test/Analysis/symbol-simplification-disequality-info.cpp @@ -12,18 +12,18 @@ void test(int a, int b, int c, int d) { if (a + b + c == d) return; clang_analyzer_printState(); - // CHECK: "disequality_info": [ - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3" ]] - // CHECK-NEXT: }, - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)" ]] - // CHECK-NEXT: } - // CHECK-NEXT: ], + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], // Simplification starts here. @@ -32,33 +32,32 @@ void test(int a, int b, int c, int d) { clang_analyzer_printState(); // CHECK: "disequality_info": [ // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)" ], + // CHECK-NEXT: "class": [ "(reg_$0) + (reg_$2)" ], // CHECK-NEXT: "disequal_to": [ // CHECK-NEXT: [ "reg_$3" ]] // CHECK-NEXT: }, // CHECK-NEXT: { // CHECK-NEXT: "class": [ "reg_$3" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)" ]] - // CHECK-NEXT: } - // CHECK-NEXT: ], + // CHECK-NEXT: [ "(reg_$0) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], if (c != 0) return; clang_analyzer_printState(); - - // CHECK: "disequality_info": [ - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)", "reg_$0" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3" ]] - // CHECK-NEXT: }, - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)", "reg_$0" ]] - // CHECK-NEXT: } - // CHECK-NEXT: ], + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$0" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$0" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], // Keep the symbols and the constraints! alive. (void)(a * b * c * d); diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp index aec0da1..73922d4 100644 --- a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp +++ b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp @@ -24,15 +24,14 @@ void test(int a, int b, int c) { if (b != 0) return; clang_analyzer_printState(); - // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "((reg_$0) + (reg_$1)) != (reg_$2)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$2)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" } - // CHECK-NEXT: ], - // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ], - // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2" ] - // CHECK-NEXT: ], + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$2)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], + // CHECK-NEXT: [ "reg_$0", "reg_$2" ] + // CHECK-NEXT: ], // CHECK-NEXT: "disequality_info": null, // Keep the symbols and the constraints! alive. diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp index f1b057b..679ed3f 100644 --- a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp +++ b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp @@ -27,20 +27,17 @@ void test(int a, int b, int c, int d) { if (b != 0) return; clang_analyzer_printState(); - // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "(((reg_$0) + (reg_$1)) + (reg_$2)) != (reg_$3)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "((reg_$0) + (reg_$2)) != (reg_$3)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$3)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "(reg_$2) + (reg_$1)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$2", "range": "{ [0, 0] }" } - // CHECK-NEXT: ], - // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "(((reg_$0) + (reg_$1)) + (reg_$2)) != (reg_$3)", "((reg_$0) + (reg_$2)) != (reg_$3)", "(reg_$0) != (reg_$3)" ], - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)", "reg_$0", "reg_$3" ], - // CHECK-NEXT: [ "(reg_$2) + (reg_$1)", "reg_$2" ] - // CHECK-NEXT: ], - // CHECK-NEXT: "disequality_info": null, + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$2", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) != (reg_$3)" ], + // CHECK-NEXT: [ "reg_$0", "reg_$3" ], + // CHECK-NEXT: [ "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, // Keep the symbols and the constraints! alive. (void)(a * b * c * d); -- 2.7.4