[libFuzzer] Scale energy assigned to each input based on input execution time.
authorDokyung Song <dokyungs@google.com>
Mon, 17 Aug 2020 16:59:59 +0000 (16:59 +0000)
committerDokyung Song <dokyungs@google.com>
Thu, 3 Sep 2020 20:38:20 +0000 (20:38 +0000)
This patch scales the energy computed by the Entropic schedule based on the
execution time of each input. The input execution time is compared with the
average execution time of inputs in the corpus, and, based on the amount by
which they differ, the energy is scaled from 0.1x (for inputs executing slow) to
3x (for inputs executing fast). Note that the exact scaling criteria and formula
is borrowed from AFL.

On FuzzBench, this gives a sizeable throughput increase, which in turn leads to
more coverage on several benchmarks. For details, see the following report.

https://storage.googleapis.com/fuzzer-test-suite-public/exectime-report/index.html

Differential Revision: https://reviews.llvm.org/D86092

compiler-rt/lib/fuzzer/FuzzerCorpus.h
compiler-rt/lib/fuzzer/FuzzerDriver.cpp
compiler-rt/lib/fuzzer/FuzzerFlags.def
compiler-rt/lib/fuzzer/FuzzerLoop.cpp
compiler-rt/lib/fuzzer/FuzzerOptions.h
compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp [new file with mode: 0644]
compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test [new file with mode: 0644]

index 6b45d37..daea4f5 100644 (file)
@@ -18,6 +18,7 @@
 #include "FuzzerSHA1.h"
 #include "FuzzerTracePC.h"
 #include <algorithm>
+#include <chrono>
 #include <numeric>
 #include <random>
 #include <unordered_set>
@@ -26,6 +27,7 @@ namespace fuzzer {
 
 struct InputInfo {
   Unit U;  // The actual input data.
+  std::chrono::microseconds TimeOfUnit;
   uint8_t Sha1[kSHA1NumBytes];  // Checksum.
   // Number of features that this input has and no smaller input has.
   size_t NumFeatures = 0;
@@ -62,11 +64,15 @@ struct InputInfo {
   }
 
   // Assign more energy to a high-entropy seed, i.e., that reveals more
-  // information about the globally rare features in the neighborhood
-  // of the seed. Since we do not know the entropy of a seed that has
-  // never been executed we assign fresh seeds maximum entropy and
-  // let II->Energy approach the true entropy from above.
-  void UpdateEnergy(size_t GlobalNumberOfFeatures) {
+  // information about the globally rare features in the neighborhood of the
+  // seed. Since we do not know the entropy of a seed that has never been
+  // executed we assign fresh seeds maximum entropy and let II->Energy approach
+  // the true entropy from above. If ScalePerExecTime is true, the computed
+  // entropy is scaled based on how fast this input executes compared to the
+  // average execution time of inputs. The faster an input executes, the more
+  // energy gets assigned to the input.
+  void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime,
+                    std::chrono::microseconds AverageUnitExecutionTime) {
     Energy = 0.0;
     SumIncidence = 0;
 
@@ -89,6 +95,27 @@ struct InputInfo {
     // Normalize.
     if (SumIncidence != 0)
       Energy = (Energy / SumIncidence) + logl(SumIncidence);
+
+    if (ScalePerExecTime) {
+      // Scaling to favor inputs with lower execution time.
+      uint32_t PerfScore = 100;
+      if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10)
+        PerfScore = 10;
+      else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4)
+        PerfScore = 25;
+      else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2)
+        PerfScore = 50;
+      else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4)
+        PerfScore = 75;
+      else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count())
+        PerfScore = 300;
+      else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count())
+        PerfScore = 200;
+      else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count())
+        PerfScore = 150;
+
+      Energy *= PerfScore;
+    }
   }
 
   // Increment the frequency of the feature Idx.
@@ -121,6 +148,7 @@ struct EntropicOptions {
   bool Enabled;
   size_t NumberOfRarestFeatures;
   size_t FeatureFrequencyThreshold;
+  bool ScalePerExecTime;
 };
 
 class InputCorpus {
@@ -179,6 +207,7 @@ public:
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
                          bool HasFocusFunction, bool NeverReduce,
+                         std::chrono::microseconds TimeOfUnit,
                          const Vector<uint32_t> &FeatureSet,
                          const DataFlowTrace &DFT, const InputInfo *BaseII) {
     assert(!U.empty());
@@ -189,6 +218,7 @@ public:
     II.U = U;
     II.NumFeatures = NumFeatures;
     II.NeverReduce = NeverReduce;
+    II.TimeOfUnit = TimeOfUnit;
     II.MayDeleteFile = MayDeleteFile;
     II.UniqFeatureSet = FeatureSet;
     II.HasFocusFunction = HasFocusFunction;
@@ -471,12 +501,19 @@ private:
     Weights.resize(N);
     std::iota(Intervals.begin(), Intervals.end(), 0);
 
+    std::chrono::microseconds AverageUnitExecutionTime(0);
+    for (auto II : Inputs) {
+      AverageUnitExecutionTime += II->TimeOfUnit;
+    }
+    AverageUnitExecutionTime /= N;
+
     bool VanillaSchedule = true;
     if (Entropic.Enabled) {
       for (auto II : Inputs) {
         if (II->NeedsEnergyUpdate && II->Energy != 0.0) {
           II->NeedsEnergyUpdate = false;
-          II->UpdateEnergy(RareFeatures.size());
+          II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime,
+                           AverageUnitExecutionTime);
         }
       }
 
index 93b1ff6..caafd1d 100644 (file)
@@ -764,6 +764,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
       (size_t)Flags.entropic_feature_frequency_threshold;
   Options.EntropicNumberOfRarestFeatures =
       (size_t)Flags.entropic_number_of_rarest_features;
+  Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time;
   if (Options.Entropic) {
     if (!Options.FocusFunction.empty()) {
       Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot "
@@ -779,6 +780,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
   Entropic.FeatureFrequencyThreshold =
       Options.EntropicFeatureFrequencyThreshold;
   Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures;
+  Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime;
 
   unsigned Seed = Flags.seed;
   // Initialize Seed.
index 5194666..fdb8362 100644 (file)
@@ -175,6 +175,11 @@ FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If "
      "entropic is enabled, we keep track of the frequencies only for the "
      "Top-X least abundant features (union features that are considered as "
      "rare).")
+FUZZER_FLAG_INT(entropic_scale_per_exec_time, 0, "Experimental. If 1, "
+     "the Entropic power schedule gets scaled based on the input execution "
+     "time. Inputs with lower execution time get scheduled more (up to 30x). "
+     "Note that, if 1, fuzzer stops from being deterministic even if a "
+     "non-zero random seed is given.")
 
 FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
 FUZZER_DEPRECATED_FLAG(use_clang_coverage)
index f97b4d2..f9986dd 100644 (file)
@@ -470,6 +470,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
     return false;
 
   ExecuteCallback(Data, Size);
+  auto TimeOfUnit = duration_cast<microseconds>(UnitStopTime - UnitStartTime);
 
   UniqFeatureSetTmp.clear();
   size_t FoundUniqFeaturesOfII = 0;
@@ -493,7 +494,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
     auto NewII =
         Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
                            TPC.ObservedFocusFunction(), ForceAddToCorpus,
-                           UniqFeatureSetTmp, DFT, II);
+                           TimeOfUnit, UniqFeatureSetTmp, DFT, II);
     WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
                           NewII->UniqFeatureSet);
     return true;
index 281658d..b17a747 100644 (file)
@@ -49,6 +49,7 @@ struct FuzzingOptions {
   bool Entropic = false;
   size_t EntropicFeatureFrequencyThreshold = 0xFF;
   size_t EntropicNumberOfRarestFeatures = 100;
+  bool EntropicScalePerExecTime = false;
   std::string OutputCorpus;
   std::string ArtifactPrefix = "./";
   std::string ExactArtifactPath;
index 93b54f5..e17fca8 100644 (file)
@@ -599,7 +599,9 @@ TEST(Corpus, Distribution) {
   for (size_t i = 0; i < N; i++)
     C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
                    /*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
-                   /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
+                   /*ForceAddToCorpus*/ false,
+                   /*TimeOfUnit*/ std::chrono::microseconds(0),
+                   /*FeatureSet*/ {}, DFT,
                    /*BaseII*/ nullptr);
 
   Vector<size_t> Hist(N);
@@ -1101,17 +1103,17 @@ TEST(Entropic, ComputeEnergy) {
   Vector<std::pair<uint32_t, uint16_t>> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}};
   II->FeatureFreqs = FeatureFreqs;
   II->NumExecutedMutations = 0;
-  II->UpdateEnergy(4);
+  II->UpdateEnergy(4, false, std::chrono::microseconds(0));
   EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision);
 
   II->NumExecutedMutations = 9;
-  II->UpdateEnergy(5);
+  II->UpdateEnergy(5, false, std::chrono::microseconds(0));
   EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision);
 
   II->FeatureFreqs[0].second++;
   II->FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(42, 6));
   II->NumExecutedMutations = 20;
-  II->UpdateEnergy(10);
+  II->UpdateEnergy(10, false, std::chrono::microseconds(0));
   EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision);
 }
 
diff --git a/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp
new file mode 100644 (file)
index 0000000..ec3a690
--- /dev/null
@@ -0,0 +1,33 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Tests whether scaling the Entropic scheduling weight based on input execution
+// time is effective or not. Inputs of size 10 will take at least 100
+// microseconds more than any input of size 1-9. The input of size 2 in the
+// corpus should be favored by the exec-time-scaled Entropic scheduling policy
+// than the input of size 10 in the corpus, eventually finding the crashing
+// input {0xab, 0xcd} with less executions.
+#include <chrono>
+#include <cstdint>
+#include <thread>
+
+static volatile int Sink;
+static volatile int *Nil = nullptr;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 10)
+    return 0; // To make the test quicker.
+
+  if (Size == 10) {
+    size_t ExecTimeUSec = 100;
+    std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec));
+
+    Sink = 0; // execute a lot slower than the crashing input below.
+  }
+
+  if (Size == 2 && Data[0] == 0xab && Data[1] == 0xcd)
+    *Nil = 42; // crash.
+
+  return 0;
+}
diff --git a/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test
new file mode 100644 (file)
index 0000000..d34550f
--- /dev/null
@@ -0,0 +1,8 @@
+REQUIRES: linux, x86_64
+RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest
+RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=100000 -max_len=10
+
+# The following test is added as a comment here for reference, which should
+# take more runs than with -entropic_scale_per_exec_time=1 to find the crash.
+# (it takes 126,633 runs)
+# RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10