From 2fc1985db3ea6f60156d9fb4b969cc05636586c0 Mon Sep 17 00:00:00 2001 From: Artem Dergachev Date: Thu, 18 Aug 2016 12:29:41 +0000 Subject: [PATCH] [analyzer] Teach CloneDetector to find clones that look like copy-paste errors. The original clone checker tries to find copy-pasted code that is exactly identical to the original code, up to minor details. As an example, if the copy-pasted code has all references to variable 'a' replaced with references to variable 'b', it is still considered to be an exact clone. The new check finds copy-pasted code in which exactly one variable seems out of place compared to the original code, which likely indicates a copy-paste error (a variable was forgotten to be renamed in one place). Patch by Raphael Isemann! Differential Revision: https://reviews.llvm.org/D23314 llvm-svn: 279056 --- clang/include/clang/Analysis/CloneDetection.h | 39 +++++- clang/lib/Analysis/CloneDetection.cpp | 143 ++++++++++++++++++--- clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp | 72 ++++++++++- .../test/Analysis/copypaste/suspicious-clones.cpp | 97 ++++++++++++++ 4 files changed, 328 insertions(+), 23 deletions(-) create mode 100644 clang/test/Analysis/copypaste/suspicious-clones.cpp diff --git a/clang/include/clang/Analysis/CloneDetection.h b/clang/include/clang/Analysis/CloneDetection.h index 9bb4022c..d031bc3 100644 --- a/clang/include/clang/Analysis/CloneDetection.h +++ b/clang/include/clang/Analysis/CloneDetection.h @@ -24,6 +24,7 @@ namespace clang { class Stmt; class Decl; +class VarDecl; class ASTContext; class CompoundStmt; @@ -222,7 +223,43 @@ public: /// that were identified to be clones of each other. /// \param MinGroupComplexity Only return clones which have at least this /// complexity value. - void findClones(std::vector &Result, unsigned MinGroupComplexity); + /// \param CheckPatterns Returns only clone groups in which the referenced + /// variables follow the same pattern. + void findClones(std::vector &Result, unsigned MinGroupComplexity, + bool CheckPatterns = true); + + /// \brief Describes two clones that reference their variables in a different + /// pattern which could indicate a programming error. + struct SuspiciousClonePair { + /// \brief Utility class holding the relevant information about a single + /// clone in this pair. + struct SuspiciousCloneInfo { + /// The variable which referencing in this clone was against the pattern. + const VarDecl *Variable; + /// Where the variable was referenced. + SourceRange VarRange; + /// The variable that should have been referenced to follow the pattern. + /// If Suggestion is a nullptr then it's not possible to fix the pattern + /// by referencing a different variable in this clone. + const VarDecl *Suggestion; + SuspiciousCloneInfo(const VarDecl *Variable, SourceRange Range, + const VarDecl *Suggestion) + : Variable(Variable), VarRange(Range), Suggestion(Suggestion) {} + SuspiciousCloneInfo() {} + }; + /// The first clone in the pair which always has a suggested variable. + SuspiciousCloneInfo FirstCloneInfo; + /// This other clone in the pair which can have a suggested variable. + SuspiciousCloneInfo SecondCloneInfo; + }; + + /// \brief Searches the provided statements for pairs of clones that don't + /// follow the same pattern when referencing variables. + /// \param Result Output parameter that will contain the clone pairs. + /// \param MinGroupComplexity Only clone pairs in which the clones have at + /// least this complexity value. + void findSuspiciousClones(std::vector &Result, + unsigned MinGroupComplexity); private: /// Stores all found clone groups including invalid groups with only a single diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp index 27815f3..a26409f 100644 --- a/clang/lib/Analysis/CloneDetection.cpp +++ b/clang/lib/Analysis/CloneDetection.cpp @@ -88,8 +88,11 @@ class VariablePattern { struct VariableOccurence { /// The index of the associated VarDecl in the Variables vector. size_t KindID; + /// The source range in the code where the variable was referenced. + SourceRange Range; - VariableOccurence(size_t KindID) : KindID(KindID) {} + VariableOccurence(size_t KindID, SourceRange Range) + : KindID(KindID), Range(Range) {} }; /// All occurences of referenced variables in the order of appearance. @@ -100,19 +103,20 @@ class VariablePattern { /// \brief Adds a new variable referenced to this pattern. /// \param VarDecl The declaration of the variable that is referenced. - void addVariableOccurence(const VarDecl *VarDecl) { + /// \param Range The SourceRange where this variable is referenced. + void addVariableOccurence(const VarDecl *VarDecl, SourceRange Range) { // First check if we already reference this variable for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { if (Variables[KindIndex] == VarDecl) { // If yes, add a new occurence that points to the existing entry in // the Variables vector. - Occurences.emplace_back(KindIndex); + Occurences.emplace_back(KindIndex, Range); return; } } // If this variable wasn't already referenced, add it to the list of // referenced variables and add a occurence that points to this new entry. - Occurences.emplace_back(Variables.size()); + Occurences.emplace_back(Variables.size(), Range); Variables.push_back(VarDecl); } @@ -127,7 +131,7 @@ class VariablePattern { // Check if S is a reference to a variable. If yes, add it to the pattern. if (auto D = dyn_cast(S)) { if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl())) - addVariableOccurence(VD); + addVariableOccurence(VD, D->getSourceRange()); } // Recursively check all children of the given statement. @@ -144,33 +148,93 @@ public: addVariables(S); } - /// \brief Compares this pattern with the given one. + /// \brief Counts the differences between this pattern and the given one. /// \param Other The given VariablePattern to compare with. - /// \return Returns true if and only if the references variables in this - /// object follow the same pattern than the ones in the given - /// VariablePattern. + /// \param FirstMismatch Output parameter that will be filled with information + /// about the first difference between the two patterns. This parameter + /// can be a nullptr, in which case it will be ignored. + /// \return Returns the number of differences between the pattern this object + /// is following and the given VariablePattern. /// - /// For example, the following statements all have the same pattern: + /// For example, the following statements all have the same pattern and this + /// function would return zero: /// /// if (a < b) return a; return b; /// if (x < y) return x; return y; /// if (u2 < u1) return u2; return u1; /// - /// but the following statement has a different pattern (note the changed - /// variables in the return statements). + /// But the following statement has a different pattern (note the changed + /// variables in the return statements) and would have two differences when + /// compared with one of the statements above. /// /// if (a < b) return b; return a; /// /// This function should only be called if the related statements of the given /// pattern and the statements of this objects are clones of each other. - bool comparePattern(const VariablePattern &Other) { + unsigned countPatternDifferences( + const VariablePattern &Other, + CloneDetector::SuspiciousClonePair *FirstMismatch = nullptr) { + unsigned NumberOfDifferences = 0; + assert(Other.Occurences.size() == Occurences.size()); for (unsigned i = 0; i < Occurences.size(); ++i) { - if (Occurences[i].KindID != Other.Occurences[i].KindID) { - return false; - } + auto ThisOccurence = Occurences[i]; + auto OtherOccurence = Other.Occurences[i]; + if (ThisOccurence.KindID == OtherOccurence.KindID) + continue; + + ++NumberOfDifferences; + + // If FirstMismatch is not a nullptr, we need to store information about + // the first difference between the two patterns. + if (FirstMismatch == nullptr) + continue; + + // Only proceed if we just found the first difference as we only store + // information about the first difference. + if (NumberOfDifferences != 1) + continue; + + const VarDecl *FirstSuggestion = nullptr; + // If there is a variable available in the list of referenced variables + // which wouldn't break the pattern if it is used in place of the + // current variable, we provide this variable as the suggested fix. + if (OtherOccurence.KindID < Variables.size()) + FirstSuggestion = Variables[OtherOccurence.KindID]; + + // Store information about the first clone. + FirstMismatch->FirstCloneInfo = + CloneDetector::SuspiciousClonePair::SuspiciousCloneInfo( + Variables[ThisOccurence.KindID], ThisOccurence.Range, + FirstSuggestion); + + // Same as above but with the other clone. We do this for both clones as + // we don't know which clone is the one containing the unintended + // pattern error. + const VarDecl *SecondSuggestion = nullptr; + if (ThisOccurence.KindID < Other.Variables.size()) + SecondSuggestion = Other.Variables[ThisOccurence.KindID]; + + // Store information about the second clone. + FirstMismatch->SecondCloneInfo = + CloneDetector::SuspiciousClonePair::SuspiciousCloneInfo( + Variables[ThisOccurence.KindID], OtherOccurence.Range, + SecondSuggestion); + + // SuspiciousClonePair guarantees that the first clone always has a + // suggested variable associated with it. As we know that one of the two + // clones in the pair always has suggestion, we swap the two clones + // in case the first clone has no suggested variable which means that + // the second clone has a suggested variable and should be first. + if (!FirstMismatch->FirstCloneInfo.Suggestion) + std::swap(FirstMismatch->FirstCloneInfo, + FirstMismatch->SecondCloneInfo); + + // This ensures that we always have at least one suggestion in a pair. + assert(FirstMismatch->FirstCloneInfo.Suggestion); } - return true; + + return NumberOfDifferences; } }; } @@ -529,7 +593,8 @@ static void createCloneGroups(std::vector &Result, // and assign them to our new clone group. auto I = UnassignedSequences.begin(), E = UnassignedSequences.end(); while (I != E) { - if (VariablePattern(*I).comparePattern(PrototypeFeatures)) { + + if (VariablePattern(*I).countPatternDifferences(PrototypeFeatures) == 0) { FilteredGroup.Sequences.push_back(*I); I = UnassignedSequences.erase(I); continue; @@ -546,11 +611,15 @@ static void createCloneGroups(std::vector &Result, } void CloneDetector::findClones(std::vector &Result, - unsigned MinGroupComplexity) { + unsigned MinGroupComplexity, + bool CheckPatterns) { // Add every valid clone group that fulfills the complexity requirement. for (const CloneGroup &Group : CloneGroups) { if (Group.isValid() && Group.Complexity >= MinGroupComplexity) { - createCloneGroups(Result, Group); + if (CheckPatterns) + createCloneGroups(Result, Group); + else + Result.push_back(Group); } } @@ -580,3 +649,37 @@ void CloneDetector::findClones(std::vector &Result, Result.erase(Result.begin() + *I); } } + +void CloneDetector::findSuspiciousClones( + std::vector &Result, + unsigned MinGroupComplexity) { + std::vector Clones; + // Reuse the normal search for clones but specify that the clone groups don't + // need to have a common referenced variable pattern so that we can manually + // search for the kind of pattern errors this function is supposed to find. + findClones(Clones, MinGroupComplexity, false); + + for (const CloneGroup &Group : Clones) { + for (unsigned i = 0; i < Group.Sequences.size(); ++i) { + VariablePattern PatternA(Group.Sequences[i]); + + for (unsigned j = i + 1; j < Group.Sequences.size(); ++j) { + VariablePattern PatternB(Group.Sequences[j]); + + CloneDetector::SuspiciousClonePair ClonePair; + // For now, we only report clones which break the variable pattern just + // once because multiple differences in a pattern are an indicator that + // those differences are maybe intended (e.g. because it's actually + // a different algorithm). + // TODO: In very big clones even multiple variables can be unintended, + // so replacing this number with a percentage could better handle such + // cases. On the other hand it could increase the false-positive rate + // for all clones if the percentage is too high. + if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) { + Result.push_back(ClonePair); + break; + } + } + } + } +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp index 87c813d..3b4b78b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -34,6 +34,15 @@ public: void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, AnalysisManager &Mgr, BugReporter &BR) const; + + /// \brief Reports all clones to the user. + void reportClones(SourceManager &SM, AnalysisManager &Mgr, + int MinComplexity) const; + + /// \brief Reports only suspicious clones to the user along with informaton + /// that explain why they are suspicious. + void reportSuspiciousClones(SourceManager &SM, AnalysisManager &Mgr, + int MinComplexity) const; }; } // end anonymous namespace @@ -52,10 +61,23 @@ void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger( "MinimumCloneComplexity", 10, this); - assert(MinComplexity >= 0); - SourceManager &SM = BR.getSourceManager(); + bool ReportSuspiciousClones = Mgr.getAnalyzerOptions().getBooleanOption( + "ReportSuspiciousClones", true, this); + + bool ReportNormalClones = Mgr.getAnalyzerOptions().getBooleanOption( + "ReportNormalClones", true, this); + + if (ReportSuspiciousClones) + reportSuspiciousClones(BR.getSourceManager(), Mgr, MinComplexity); + + if (ReportNormalClones) + reportClones(BR.getSourceManager(), Mgr, MinComplexity); +} + +void CloneChecker::reportClones(SourceManager &SM, AnalysisManager &Mgr, + int MinComplexity) const { std::vector CloneGroups; Detector.findClones(CloneGroups, MinComplexity); @@ -87,6 +109,52 @@ void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, } } +void CloneChecker::reportSuspiciousClones(SourceManager &SM, + AnalysisManager &Mgr, + int MinComplexity) const { + + std::vector Clones; + Detector.findSuspiciousClones(Clones, MinComplexity); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + auto SuspiciousCloneWarning = DiagEngine.getCustomDiagID( + DiagnosticsEngine::Warning, "suspicious code clone detected; did you " + "mean to use %0?"); + + auto RelatedCloneNote = DiagEngine.getCustomDiagID( + DiagnosticsEngine::Note, "suggestion is based on the usage of this " + "variable in a similar piece of code"); + + auto RelatedSuspiciousCloneNote = DiagEngine.getCustomDiagID( + DiagnosticsEngine::Note, "suggestion is based on the usage of this " + "variable in a similar piece of code; did you " + "mean to use %0?"); + + for (CloneDetector::SuspiciousClonePair &Pair : Clones) { + // The first clone always has a suggestion and we report it to the user + // along with the place where the suggestion should be used. + DiagEngine.Report(Pair.FirstCloneInfo.VarRange.getBegin(), + SuspiciousCloneWarning) + << Pair.FirstCloneInfo.VarRange << Pair.FirstCloneInfo.Suggestion; + + // The second clone can have a suggestion and if there is one, we report + // that suggestion to the user. + if (Pair.SecondCloneInfo.Suggestion) { + DiagEngine.Report(Pair.SecondCloneInfo.VarRange.getBegin(), + RelatedSuspiciousCloneNote) + << Pair.SecondCloneInfo.VarRange << Pair.SecondCloneInfo.Suggestion; + continue; + } + + // If there isn't a suggestion in the second clone, we only inform the + // user where we got the idea that his code could contain an error. + DiagEngine.Report(Pair.SecondCloneInfo.VarRange.getBegin(), + RelatedCloneNote) + << Pair.SecondCloneInfo.VarRange; + } +} + //===----------------------------------------------------------------------===// // Register CloneChecker //===----------------------------------------------------------------------===// diff --git a/clang/test/Analysis/copypaste/suspicious-clones.cpp b/clang/test/Analysis/copypaste/suspicious-clones.cpp new file mode 100644 index 0000000..fb22d38c8 --- /dev/null +++ b/clang/test/Analysis/copypaste/suspicious-clones.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:ReportSuspiciousClones=true -analyzer-config alpha.clone.CloneChecker:ReportNormalClones=false -verify %s + +// Tests finding a suspicious clone that references local variables. + +void log(); + +int max(int a, int b) { + log(); + if (a > b) + return a; + return b; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code}} +} + +int maxClone(int x, int y, int z) { + log(); + if (x > y) + return x; + return z; // expected-warning{{suspicious code clone detected; did you mean to use 'y'?}} +} + +// Tests finding a suspicious clone that references global variables. + +struct mutex { + bool try_lock(); + void unlock(); +}; + +mutex m1; +mutex m2; +int i; + +void busyIncrement() { + while (true) { + if (m1.try_lock()) { + ++i; + m1.unlock(); // expected-note{{suggestion is based on the usage of this variable in a similar piece of code}} + if (i > 1000) { + return; + } + } + } +} + +void faultyBusyIncrement() { + while (true) { + if (m1.try_lock()) { + ++i; + m2.unlock(); // expected-warning{{suspicious code clone detected; did you mean to use 'm1'?}} + if (i > 1000) { + return; + } + } + } +} + +// Tests that we provide two suggestions in cases where two fixes are possible. + +int foo(int a, int b, int c) { + a += b + c; + b /= a + b; + c -= b * a; // expected-warning{{suspicious code clone detected; did you mean to use 'a'?}} + return c; +} + +int fooClone(int a, int b, int c) { + a += b + c; + b /= a + b; + c -= a * a; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code; did you mean to use 'b'?}} + return c; +} + + +// Tests that for clone groups with a many possible suspicious clone pairs, at +// most one warning per clone group is generated and every relevant clone is +// reported through either a warning or a note. + +long bar1(long a, long b, long c, long d) { + c = a - b; + c = c / d * a; + d = b * b - c; // expected-warning{{suspicious code clone detected; did you mean to use 'c'?}} + return d; +} + +long bar2(long a, long b, long c, long d) { + c = a - b; + c = c / d * a; + d = c * b - c; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code; did you mean to use 'b'?}} \ + // expected-warning{{suspicious code clone detected; did you mean to use 'a'?}} + return d; +} + +long bar3(long a, long b, long c, long d) { + c = a - b; + c = c / d * a; + d = a * b - c; // expected-note{{suggestion is based on the usage of this variable in a similar piece of code; did you mean to use 'c'?}} + return d; +} -- 2.7.4