From 51b9a0e8e8ab7b6ad0458163248035afef08d5b4 Mon Sep 17 00:00:00 2001 From: Artem Dergachev Date: Sat, 20 Aug 2016 10:06:59 +0000 Subject: [PATCH] [analyzer] Make CloneDetector consider macro expansions. So far macro-generated code was treated by the CloneDetector as normal code. This caused that some macros where reported as false-positive clones because large chunks of code coming from otherwise concise macro expansions were treated as copy-pasted code. This patch ensures that macros are treated in the same way as literals/function calls. This prevents macros that expand into multiple statements from being reported as clones. Patch by Raphael Isemann! Differential Revision: https://reviews.llvm.org/D23316 llvm-svn: 279367 --- clang/include/clang/Analysis/CloneDetection.h | 13 +++- clang/lib/Analysis/CloneDetection.cpp | 71 ++++++++++++++++++++-- clang/test/Analysis/copypaste/macro-complexity.cpp | 50 +++++++++++++++ clang/test/Analysis/copypaste/macros.cpp | 67 ++++++++++++++++++++ 4 files changed, 194 insertions(+), 7 deletions(-) create mode 100644 clang/test/Analysis/copypaste/macro-complexity.cpp create mode 100644 clang/test/Analysis/copypaste/macros.cpp diff --git a/clang/include/clang/Analysis/CloneDetection.h b/clang/include/clang/Analysis/CloneDetection.h index d031bc3..0385349 100644 --- a/clang/include/clang/Analysis/CloneDetection.h +++ b/clang/include/clang/Analysis/CloneDetection.h @@ -171,9 +171,16 @@ public: /// \brief The complexity of the StmtSequence. /// - /// This scalar value serves as a simple way of filtering clones that are - /// too small to be reported. A greater value indicates that the related - /// StmtSequence is probably more interesting to the user. + /// This value gives an approximation on how many direct or indirect child + /// statements are contained in the related StmtSequence. In general, the + /// greater this value, the greater the amount of statements. However, this + /// is only an approximation and the actual amount of statements can be + /// higher or lower than this value. Statements that are generated by the + /// compiler (e.g. macro expansions) for example barely influence the + /// complexity value. + /// + /// The main purpose of this value is to filter clones that are too small + /// and therefore probably not interesting enough for the user. unsigned Complexity; /// \brief Creates an empty CloneSignature without any data. diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp index ee1c694..9bba611 100644 --- a/clang/lib/Analysis/CloneDetection.cpp +++ b/clang/lib/Analysis/CloneDetection.cpp @@ -17,7 +17,9 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Lex/Lexer.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" using namespace clang; @@ -239,6 +241,38 @@ public: }; } +/// \brief Prints the macro name that contains the given SourceLocation into +/// the given raw_string_ostream. +static void printMacroName(llvm::raw_string_ostream &MacroStack, + ASTContext &Context, SourceLocation Loc) { + MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(), + Context.getLangOpts()); + + // Add an empty space at the end as a padding to prevent + // that macro names concatenate to the names of other macros. + MacroStack << " "; +} + +/// \brief Returns a string that represents all macro expansions that +/// expanded into the given SourceLocation. +/// +/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations +/// A and B are expanded from the same macros in the same order. +static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) { + std::string MacroStack; + llvm::raw_string_ostream MacroStackStream(MacroStack); + SourceManager &SM = Context.getSourceManager(); + + // Iterate over all macros that expanded into the given SourceLocation. + while (Loc.isMacroID()) { + // Add the macro name to the stream. + printMacroName(MacroStackStream, Context, Loc); + Loc = SM.getImmediateMacroCallerLoc(Loc); + } + MacroStackStream.flush(); + return MacroStack; +} + namespace { /// \brief Collects the data of a single Stmt. /// @@ -299,7 +333,13 @@ public: ConstStmtVisitor::Visit##CLASS(S); \ } - DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); }) + DEF_ADD_DATA(Stmt, { + addData(S->getStmtClass()); + // This ensures that macro generated code isn't identical to macro-generated + // code. + addData(getMacroStack(S->getLocStart(), Context)); + addData(getMacroStack(S->getLocEnd(), Context)); + }) DEF_ADD_DATA(Expr, { addData(S->getType()); }) //--- Builtin functionality ----------------------------------------------// @@ -434,14 +474,36 @@ class CloneSignatureGenerator { /// tree and stores them in the CloneDetector. /// /// \param S The root of the given statement tree. + /// \param ParentMacroStack A string representing the macros that generated + /// the parent statement or an empty string if no + /// macros generated the parent statement. + /// See getMacroStack() for generating such a string. /// \return The CloneSignature of the root statement. - CloneDetector::CloneSignature generateSignatures(const Stmt *S) { + CloneDetector::CloneSignature + generateSignatures(const Stmt *S, const std::string &ParentMacroStack) { // Create an empty signature that will be filled in this method. CloneDetector::CloneSignature Signature; // Collect all relevant data from S and put it into the empty signature. StmtDataCollector(S, Context, Signature.Data); + // Look up what macros expanded into the current statement. + std::string StartMacroStack = getMacroStack(S->getLocStart(), Context); + std::string EndMacroStack = getMacroStack(S->getLocEnd(), Context); + + // First, check if ParentMacroStack is not empty which means we are currently + // dealing with a parent statement which was expanded from a macro. + // If this parent statement was expanded from the same macros as this + // statement, we reduce the initial complexity of this statement to zero. + // This causes that a group of statements that were generated by a single + // macro expansion will only increase the total complexity by one. + // Note: This is not the final complexity of this statement as we still + // add the complexity of the child statements to the complexity value. + if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack && + EndMacroStack == ParentMacroStack)) { + Signature.Complexity = 0; + } + // Storage for the signatures of the direct child statements. This is only // needed if the current statement is a CompoundStmt. std::vector ChildSignatures; @@ -457,7 +519,8 @@ class CloneSignatureGenerator { // Recursive call to create the signature of the child statement. This // will also create and store all clone groups in this child statement. - auto ChildSignature = generateSignatures(Child); + // We pass only the StartMacroStack along to keep things simple. + auto ChildSignature = generateSignatures(Child, StartMacroStack); // Add the collected data to the signature of the current statement. Signature.add(ChildSignature); @@ -518,7 +581,7 @@ public: : CD(CD), Context(Context) {} /// \brief Generates signatures for all statements in the given function body. - void consumeCodeBody(const Stmt *S) { generateSignatures(S); } + void consumeCodeBody(const Stmt *S) { generateSignatures(S, ""); } }; } // end anonymous namespace diff --git a/clang/test/Analysis/copypaste/macro-complexity.cpp b/clang/test/Analysis/copypaste/macro-complexity.cpp new file mode 100644 index 0000000..58b884a --- /dev/null +++ b/clang/test/Analysis/copypaste/macro-complexity.cpp @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:MinimumCloneComplexity=10 -verify %s + +// Tests that the complexity value of a macro expansion is about the same as +// the complexity value of a normal function call and the the macro body doesn't +// influence the complexity. See the CloneSignature class in CloneDetection.h +// for more information about complexity values of clones. + +#define MACRO_FOO(a, b) a > b ? -a * a : -b * b; + +// First, manually apply MACRO_FOO and see if the code gets detected as a clone. +// This confirms that with the current configuration the macro body would be +// considered large enough to pass the MinimumCloneComplexity constraint. + +int manualMacro(int a, int b) { // expected-warning{{Detected code clone.}} + return a > b ? -a * a : -b * b; +} + +int manualMacroClone(int a, int b) { // expected-note{{Related code clone is here.}} + return a > b ? -a * a : -b * b; +} + +// Now we actually use the macro to generate the same AST as above. They +// shouldn't be reported because the macros only slighly increase the complexity +// value and the resulting code will never pass the MinimumCloneComplexity +// constraint. + +int macro(int a, int b) { + return MACRO_FOO(a, b); +} + +int macroClone(int a, int b) { + return MACRO_FOO(a, b); +} + +// So far we only tested that macros increase the complexity by a lesser amount +// than normal code. We also need to be sure this amount is not zero because +// we otherwise macro code would be 'invisible' for the CloneDetector. +// This tests that it is possible to increase the reach the minimum complexity +// by only using macros. This is only possible if the complexity value is bigger +// than zero. + +#define NEG(A) -(A) + +int nestedMacros() { // expected-warning{{Detected code clone.}} + return NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(1)))))))))); +} + +int nestedMacrosClone() { // expected-note{{Related code clone is here.}} + return NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(1)))))))))); +} diff --git a/clang/test/Analysis/copypaste/macros.cpp b/clang/test/Analysis/copypaste/macros.cpp new file mode 100644 index 0000000..170e034 --- /dev/null +++ b/clang/test/Analysis/copypaste/macros.cpp @@ -0,0 +1,67 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// Tests that macros and non-macro clones aren't mixed into the same hash +// group. This is currently necessary as all clones in a hash group need +// to have the same complexity value. Macros have smaller complexity values +// and need to be in their own hash group. + +int foo(int a) { // expected-warning{{Detected code clone.}} + a = a + 1; + a = a + 1 / 1; + a = a + 1 + 1 + 1; + a = a + 1 - 1 + 1 + 1; + a = a + 1 * 1 + 1 + 1 + 1; + a = a + 1 / 1 + 1 + 1 + 1; + return a; +} + +int fooClone(int a) { // expected-note{{Related code clone is here.}} + a = a + 1; + a = a + 1 / 1; + a = a + 1 + 1 + 1; + a = a + 1 - 1 + 1 + 1; + a = a + 1 * 1 + 1 + 1 + 1; + a = a + 1 / 1 + 1 + 1 + 1; + return a; +} + +// Below is the same AST as above but this time generated with macros. The +// clones below should land in their own hash group for the reasons given above. + +#define ASSIGN(T, V) T = T + V + +int macro(int a) { // expected-warning{{Detected code clone.}} + ASSIGN(a, 1); + ASSIGN(a, 1 / 1); + ASSIGN(a, 1 + 1 + 1); + ASSIGN(a, 1 - 1 + 1 + 1); + ASSIGN(a, 1 * 1 + 1 + 1 + 1); + ASSIGN(a, 1 / 1 + 1 + 1 + 1); + return a; +} + +int macroClone(int a) { // expected-note{{Related code clone is here.}} + ASSIGN(a, 1); + ASSIGN(a, 1 / 1); + ASSIGN(a, 1 + 1 + 1); + ASSIGN(a, 1 - 1 + 1 + 1); + ASSIGN(a, 1 * 1 + 1 + 1 + 1); + ASSIGN(a, 1 / 1 + 1 + 1 + 1); + return a; +} + +// FIXME: Macros with empty definitions in the AST are currently ignored. + +#define EMPTY + +int fooFalsePositiveClone(int a) { // expected-note{{Related code clone is here.}} + a = EMPTY a + 1; + a = a + 1 / 1; + a = a + 1 + 1 + 1; + a = a + 1 - 1 + 1 + 1; + a = a + 1 * 1 + 1 + 1 + 1; + a = a + 1 / 1 + 1 + 1 + 1; + return a; +} + + -- 2.7.4