[StaticAnalyzer] Completely unrolling specific loops with known bound option
authorPeter Szecsi <szepet95@gmail.com>
Tue, 25 Jul 2017 19:23:23 +0000 (19:23 +0000)
committerPeter Szecsi <szepet95@gmail.com>
Tue, 25 Jul 2017 19:23:23 +0000 (19:23 +0000)
This feature allows the analyzer to consider loops to completely unroll.
New requirements/rules (for unrolling) can be added easily via ASTMatchers.

Right now it is hidden behind a flag, the aim is to find the correct heuristic
and create a solution which results higher coverage % and more precise
analysis, thus can be enabled by default.

Right now the blocks which belong to an unrolled loop are marked by the
LoopVisitor which adds them to the ProgramState.
Then whenever we encounter a CFGBlock in the processCFGBlockEntrance which is
marked then we skip its investigating. That means, it won't be considered to
be visited more than the maximal bound for visiting since it won't be checked.

llvm-svn: 309006

clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
clang/include/clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h [new file with mode: 0644]
clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
clang/lib/StaticAnalyzer/Core/CMakeLists.txt
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp [new file with mode: 0644]
clang/test/Analysis/analyzer-config.c
clang/test/Analysis/analyzer-config.cpp
clang/test/Analysis/loop-unrolling.cpp [new file with mode: 0644]

index 5dd6bdf..66e1a74 100644 (file)
@@ -275,6 +275,9 @@ private:
   /// \sa shouldWidenLoops
   Optional<bool> WidenLoops;
 
+  /// \sa shouldUnrollLoops
+  Optional<bool> UnrollLoops;
+
   /// \sa shouldDisplayNotesAsEvents
   Optional<bool> DisplayNotesAsEvents;
 
@@ -560,6 +563,10 @@ public:
   /// This is controlled by the 'widen-loops' config option.
   bool shouldWidenLoops();
 
+  /// Returns true if the analysis should try to unroll loops with known bounds.
+  /// This is controlled by the 'unroll-loops' config option.
+  bool shouldUnrollLoops();
+
   /// Returns true if the bug reporter should transparently treat extra note
   /// diagnostic pieces as event diagnostic pieces. Useful when the diagnostic
   /// consumer doesn't support the extra note pieces.
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h
new file mode 100644 (file)
index 0000000..9bb4d72
--- /dev/null
@@ -0,0 +1,34 @@
+//===--- LoopUnrolling.h - Unroll loops -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This header contains the declarations of functions which are used to decide
+/// which loops should be completely unrolled and mark their corresponding
+/// CFGBlocks.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_LOOPUNROLLING_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_LOOPUNROLLING_H
+
+#include "clang/Analysis/CFG.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
+
+namespace clang {
+namespace ento {
+ProgramStateRef markLoopAsUnrolled(const Stmt *Term, ProgramStateRef State,
+                                   const FunctionDecl *FD);
+bool isUnrolledLoopBlock(const CFGBlock *Block, ExplodedNode *Pred,
+                         AnalysisManager &AMgr);
+bool shouldCompletelyUnroll(const Stmt *LoopStmt, ASTContext &ASTCtx);
+
+} // end namespace ento
+} // end namespace clang
+
+#endif
index 32040e7..4a581bd 100644 (file)
@@ -272,6 +272,7 @@ void ExprInspectionChecker::checkEndAnalysis(ExplodedGraph &G, BugReporter &BR,
 
     reportBug(llvm::to_string(NumTimesReached), BR, N);
   }
+  ReachedStats.clear();
 }
 
 void ExprInspectionChecker::analyzerCrash(const CallExpr *CE,
index 6f48fcb..d5d6527 100644 (file)
@@ -375,6 +375,12 @@ bool AnalyzerOptions::shouldWidenLoops() {
   return WidenLoops.getValue();
 }
 
+bool AnalyzerOptions::shouldUnrollLoops() {
+  if (!UnrollLoops.hasValue())
+    UnrollLoops = getBooleanOption("unroll-loops", /*Default=*/false);
+  return UnrollLoops.getValue();
+}
+
 bool AnalyzerOptions::shouldDisplayNotesAsEvents() {
   if (!DisplayNotesAsEvents.hasValue())
     DisplayNotesAsEvents =
index 85878f5..5ac4f94 100644 (file)
@@ -35,6 +35,7 @@ add_clang_library(clangStaticAnalyzerCore
   ExprEngineObjC.cpp
   FunctionSummary.cpp
   HTMLDiagnostics.cpp
+  LoopUnrolling.cpp
   LoopWidening.cpp
   MemRegion.cpp
   PathDiagnostic.cpp
@@ -54,6 +55,7 @@ add_clang_library(clangStaticAnalyzerCore
 
   LINK_LIBS
   clangAST
+  clangASTMatchers
   clangAnalysis
   clangBasic
   clangLex
index eee5400..2970a6c 100644 (file)
@@ -17,6 +17,7 @@
 #include "PrettyStackTraceLocationContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/ParentMap.h"
+#include "clang/Analysis/CFGStmtMap.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
 #include "clang/Basic/Builtins.h"
@@ -27,6 +28,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1497,6 +1499,26 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
                                          NodeBuilderWithSinks &nodeBuilder,
                                          ExplodedNode *Pred) {
   PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
+  // If we reach a loop which has a known bound (and meets
+  // other constraints) then consider completely unrolling it.
+  if (AMgr.options.shouldUnrollLoops()) {
+    const CFGBlock *ActualBlock = nodeBuilder.getContext().getBlock();
+    const Stmt *Term = ActualBlock->getTerminator();
+    if (Term && shouldCompletelyUnroll(Term, AMgr.getASTContext())) {
+      ProgramStateRef UnrolledState = markLoopAsUnrolled(
+              Term, Pred->getState(),
+              cast<FunctionDecl>(Pred->getStackFrame()->getDecl()));
+      if (UnrolledState != Pred->getState())
+        nodeBuilder.generateNode(UnrolledState, Pred);
+      return;
+    }
+
+    if (ActualBlock->empty())
+      return;
+
+    if (isUnrolledLoopBlock(ActualBlock, Pred, AMgr))
+      return;
+  }
 
   // If this block is terminated by a loop and it has already been visited the
   // maximum number of times, widen the loop.
diff --git a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
new file mode 100644 (file)
index 0000000..a88e062
--- /dev/null
@@ -0,0 +1,207 @@
+//===--- LoopUnrolling.cpp - Unroll loops -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains functions which are used to decide if a loop worth to be
+/// unrolled. Moreover contains function which mark the CFGBlocks which belongs
+/// to the unrolled loop and store them in ProgramState.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/CFGStmtMap.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/AST/ParentMap.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace clang;
+using namespace ento;
+using namespace clang::ast_matchers;
+
+#define DEBUG_TYPE "LoopUnrolling"
+
+STATISTIC(NumTimesLoopUnrolled,
+          "The # of times a loop has got completely unrolled");
+
+REGISTER_MAP_WITH_PROGRAMSTATE(UnrolledLoops, const Stmt *,
+                               const FunctionDecl *)
+
+namespace clang {
+namespace ento {
+
+static bool isLoopStmt(const Stmt *S) {
+  return S && (isa<ForStmt>(S) || isa<WhileStmt>(S) || isa<DoStmt>(S));
+}
+
+static internal::Matcher<Stmt> simpleCondition(StringRef BindName) {
+  return binaryOperator(
+      anyOf(hasOperatorName("<"), hasOperatorName(">"), hasOperatorName("<="),
+            hasOperatorName(">="), hasOperatorName("!=")),
+      hasEitherOperand(ignoringParenImpCasts(
+          declRefExpr(to(varDecl(hasType(isInteger())).bind(BindName))))),
+      hasEitherOperand(ignoringParenImpCasts(integerLiteral())));
+}
+
+static internal::Matcher<Stmt> changeIntBoundNode(StringRef NodeName) {
+  return anyOf(hasDescendant(unaryOperator(
+                   anyOf(hasOperatorName("--"), hasOperatorName("++")),
+                   hasUnaryOperand(ignoringParenImpCasts(
+                       declRefExpr(to(varDecl(equalsBoundNode(NodeName)))))))),
+               hasDescendant(binaryOperator(
+                   anyOf(hasOperatorName("="), hasOperatorName("+="),
+                         hasOperatorName("/="), hasOperatorName("*="),
+                         hasOperatorName("-=")),
+                   hasLHS(ignoringParenImpCasts(
+                       declRefExpr(to(varDecl(equalsBoundNode(NodeName)))))))));
+}
+
+static internal::Matcher<Stmt> callByRef(StringRef NodeName) {
+  return hasDescendant(callExpr(forEachArgumentWithParam(
+      declRefExpr(to(varDecl(equalsBoundNode(NodeName)))),
+      parmVarDecl(hasType(references(qualType(unless(isConstQualified()))))))));
+}
+
+static internal::Matcher<Stmt> assignedToRef(StringRef NodeName) {
+  return hasDescendant(varDecl(
+      allOf(hasType(referenceType()),
+            hasInitializer(
+                anyOf(initListExpr(has(
+                          declRefExpr(to(varDecl(equalsBoundNode(NodeName)))))),
+                      declRefExpr(to(varDecl(equalsBoundNode(NodeName)))))))));
+}
+
+static internal::Matcher<Stmt> getAddrTo(StringRef NodeName) {
+  return hasDescendant(unaryOperator(
+      hasOperatorName("&"),
+      hasUnaryOperand(declRefExpr(hasDeclaration(equalsBoundNode(NodeName))))));
+}
+
+static internal::Matcher<Stmt> hasSuspiciousStmt(StringRef NodeName) {
+  return anyOf(hasDescendant(gotoStmt()), hasDescendant(switchStmt()),
+               // Escaping and not known mutation of the loop counter is handled
+               // by exclusion of assigning and address-of operators and
+               // pass-by-ref function calls on the loop counter from the body.
+               changeIntBoundNode(NodeName), callByRef(NodeName),
+               getAddrTo(NodeName), assignedToRef(NodeName));
+}
+
+static internal::Matcher<Stmt> forLoopMatcher() {
+  return forStmt(
+             hasCondition(simpleCondition("initVarName")),
+             // Initialization should match the form: 'int i = 6' or 'i = 42'.
+             hasLoopInit(
+                 anyOf(declStmt(hasSingleDecl(
+                           varDecl(allOf(hasInitializer(integerLiteral()),
+                                         equalsBoundNode("initVarName"))))),
+                       binaryOperator(hasLHS(declRefExpr(to(varDecl(
+                                          equalsBoundNode("initVarName"))))),
+                                      hasRHS(integerLiteral())))),
+             // Incrementation should be a simple increment or decrement
+             // operator call.
+             hasIncrement(unaryOperator(
+                 anyOf(hasOperatorName("++"), hasOperatorName("--")),
+                 hasUnaryOperand(declRefExpr(
+                     to(varDecl(allOf(equalsBoundNode("initVarName"),
+                                      hasType(isInteger())))))))),
+             unless(hasBody(hasSuspiciousStmt("initVarName")))).bind("forLoop");
+}
+
+bool shouldCompletelyUnroll(const Stmt *LoopStmt, ASTContext &ASTCtx) {
+
+  if (!isLoopStmt(LoopStmt))
+    return false;
+
+  // TODO: Match the cases where the bound is not a concrete literal but an
+  // integer with known value
+
+  auto Matches = match(forLoopMatcher(), *LoopStmt, ASTCtx);
+  return !Matches.empty();
+}
+
+namespace {
+class LoopBlockVisitor : public ConstStmtVisitor<LoopBlockVisitor> {
+public:
+  LoopBlockVisitor(llvm::SmallPtrSet<const CFGBlock *, 8> &BS) : BlockSet(BS) {}
+
+  void VisitChildren(const Stmt *S) {
+    for (const Stmt *Child : S->children())
+      if (Child)
+        Visit(Child);
+  }
+
+  void VisitStmt(const Stmt *S) {
+    // In case of nested loops we only unroll the inner loop if it's marked too.
+    if (!S || (isLoopStmt(S) && S != LoopStmt))
+      return;
+    BlockSet.insert(StmtToBlockMap->getBlock(S));
+    VisitChildren(S);
+  }
+
+  void setBlocksOfLoop(const Stmt *Loop, const CFGStmtMap *M) {
+    BlockSet.clear();
+    StmtToBlockMap = M;
+    LoopStmt = Loop;
+    Visit(LoopStmt);
+  }
+
+private:
+  llvm::SmallPtrSet<const CFGBlock *, 8> &BlockSet;
+  const CFGStmtMap *StmtToBlockMap;
+  const Stmt *LoopStmt;
+};
+}
+// TODO: refactor this function using LoopExit CFG element - once we have the
+// information when the simulation reaches the end of the loop we can cleanup
+// the state
+bool isUnrolledLoopBlock(const CFGBlock *Block, ExplodedNode *Pred,
+                         AnalysisManager &AMgr) {
+  const Stmt *Term = Block->getTerminator();
+  auto State = Pred->getState();
+  // In case of nested loops in an inlined function should not be unrolled only
+  // if the inner loop is marked.
+  if (Term && isLoopStmt(Term) && !State->contains<UnrolledLoops>(Term))
+    return false;
+
+  const CFGBlock *SearchedBlock;
+  llvm::SmallPtrSet<const CFGBlock *, 8> BlockSet;
+  LoopBlockVisitor LBV(BlockSet);
+  // Check the CFGBlocks of every marked loop.
+  for (auto &E : State->get<UnrolledLoops>()) {
+    SearchedBlock = Block;
+    const StackFrameContext *StackFrame = Pred->getStackFrame();
+    ParentMap PM(E.second->getBody());
+    CFGStmtMap *M = CFGStmtMap::Build(AMgr.getCFG(E.second), &PM);
+    LBV.setBlocksOfLoop(E.first, M);
+    // In case of an inlined function call check if any of its callSiteBlock is
+    // marked.
+    while (SearchedBlock && BlockSet.find(SearchedBlock) == BlockSet.end()) {
+      SearchedBlock = StackFrame->getCallSiteBlock();
+      StackFrame = StackFrame->getParent()->getCurrentStackFrame();
+    }
+    delete M;
+    if (SearchedBlock)
+      return true;
+  }
+  return false;
+}
+
+ProgramStateRef markLoopAsUnrolled(const Stmt *Term, ProgramStateRef State,
+                                   const FunctionDecl *FD) {
+  if (State->contains<UnrolledLoops>(Term))
+    return State;
+
+  State = State->set<UnrolledLoops>(Term, FD);
+  ++NumTimesLoopUnrolled;
+  return State;
+}
+}
+}
index 0d8d34f..7bed7cb 100644 (file)
@@ -27,6 +27,7 @@ void foo() {
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
 // CHECK-NEXT: region-store-small-struct-limit = 2
+// CHECK-NEXT: unroll-loops = false
 // CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 17
+// CHECK-NEXT: num-entries = 18
index 4166a73..155ee02 100644 (file)
@@ -38,6 +38,7 @@ public:
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
 // CHECK-NEXT: region-store-small-struct-limit = 2
+// CHECK-NEXT: unroll-loops = false
 // CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 22
+// CHECK-NEXT: num-entries = 23
diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp
new file mode 100644 (file)
index 0000000..9381c4c
--- /dev/null
@@ -0,0 +1,176 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true -analyzer-stats -verify -std=c++11 %s
+
+void clang_analyzer_numTimesReached();
+
+int getNum();
+void foo(int &);
+// Testing for loops.
+int simple_unroll1() {
+  int a[9];
+  int k = 42;
+  for (int i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{9}}
+    a[i] = 42;
+  }
+  int b = 22 / (k - 42); // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int simple_unroll2() {
+  int a[9];
+  int k = 42;
+  int i;
+  for (i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{9}}
+    a[i] = 42;
+  }
+  int b = 22 / (k - 42); // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int simple_no_unroll1() {
+  int a[9];
+  int k = 42;
+  for (int i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    a[i] = 42;
+    foo(i);
+  }
+  int b = 22 / (k - 42); // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int simple_no_unroll2() {
+  int a[9];
+  int k = 42;
+  int i;
+  for (i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    a[i] = 42;
+    i += getNum();
+  }
+  int b = 22 / (k - 42); // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int simple_no_unroll3() {
+  int a[9];
+  int k = 42;
+  int i;
+  for (i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    a[i] = 42;
+    (void)&i;
+  }
+  int b = 22 / (k - 42); // no-warning
+  return 0;
+}
+
+int simple_no_unroll4() {
+  int a[9];
+  int k = 42;
+  int i;
+  for (i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    a[i] = 42;
+    int &j = i;
+  }
+  int b = 22 / (k - 42); // no-warning
+  return 0;
+}
+
+int simple_no_unroll5() {
+  int a[9];
+  int k = 42;
+  int i;
+  for (i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    a[i] = 42;
+    int &j{i};
+  }
+  int b = 22 / (k - 42); // no-warning
+  return 0;
+}
+
+int nested_outer_unrolled() {
+  int a[9];
+  int k = 42;
+  int j = 0;
+  for (int i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{16}}
+    for (j = 0; j < getNum(); ++j) {
+      clang_analyzer_numTimesReached(); // expected-warning {{15}}
+      a[j] = 22;
+    }
+    a[i] = 42;
+  }
+  int b = 22 / (k - 42); // no-warning
+  return 0;
+}
+
+int nested_inner_unrolled() {
+  int a[9];
+  int k = 42;
+  int j = 0;
+  for (int i = 0; i < getNum(); i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{4}}
+    for (j = 0; j < 8; ++j) {
+      clang_analyzer_numTimesReached(); // expected-warning {{32}}
+      a[j] = 22;
+    }
+    a[i] = 42;
+  }
+  int b = 22 / (k - 42); // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int nested_both_unrolled() {
+  int a[9];
+  int k = 42;
+  int j = 0;
+  for (int i = 0; i < 7; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{7}}
+    for (j = 0; j < 6; ++j) {
+      clang_analyzer_numTimesReached(); // expected-warning {{42}}
+      a[j] = 22;
+    }
+    a[i] = 42;
+  }
+  int b = 22 / (k - 42); // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int simple_known_bound_loop() {
+  for (int i = 2; i < 12; i++) {
+    // This function is inlined in nested_inlined_unroll1()
+    clang_analyzer_numTimesReached(); // expected-warning {{90}}
+  }
+  return 0;
+}
+
+int simple_unknown_bound_loop() {
+  for (int i = 2; i < getNum(); i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{10}}
+  }
+  return 0;
+}
+
+int nested_inlined_unroll1() {
+  int k;
+  for (int i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{9}}
+    k = simple_known_bound_loop();    // no reevaluation without inlining
+  }
+  int a = 22 / k; // expected-warning {{Division by zero}}
+  return 0;
+}
+
+int nested_inlined_no_unroll1() {
+  int k;
+  for (int i = 0; i < 9; i++) {
+    clang_analyzer_numTimesReached(); // expected-warning {{26}}
+    k = simple_unknown_bound_loop();  // reevaluation without inlining
+  }
+  int a = 22 / k; // no-warning
+  return 0;
+}