From 945761b8c29f9cc36372b6485720656371c736bc Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Fri, 18 Mar 2016 00:23:29 +0000 Subject: [PATCH] [libFuzzer] improve -merge functionality llvm-svn: 263769 --- llvm/lib/Fuzzer/FuzzerDriver.cpp | 3 +- llvm/lib/Fuzzer/FuzzerFlags.def | 6 +- llvm/lib/Fuzzer/FuzzerInternal.h | 8 +- llvm/lib/Fuzzer/FuzzerLoop.cpp | 126 +++++++++++++++++++------------- llvm/lib/Fuzzer/test/fuzzer-traces.test | 18 ++--- llvm/lib/Fuzzer/test/merge.test | 13 ++-- 6 files changed, 101 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Fuzzer/FuzzerDriver.cpp b/llvm/lib/Fuzzer/FuzzerDriver.cpp index 4e559b4..c162adf 100644 --- a/llvm/lib/Fuzzer/FuzzerDriver.cpp +++ b/llvm/lib/Fuzzer/FuzzerDriver.cpp @@ -284,8 +284,7 @@ static int FuzzerDriver(const std::vector &Args, Options.UseTraces = Flags.use_traces; Options.UseMemcmp = Flags.use_memcmp; Options.ShuffleAtStartUp = Flags.shuffle; - Options.PreferSmallDuringInitialShuffle = - Flags.prefer_small_during_initial_shuffle; + Options.PreferSmall = Flags.prefer_small; Options.Reload = Flags.reload; Options.OnlyASCII = Flags.only_ascii; Options.OutputCSV = Flags.output_csv; diff --git a/llvm/lib/Fuzzer/FuzzerFlags.def b/llvm/lib/Fuzzer/FuzzerFlags.def index f794d32..33389d2 100644 --- a/llvm/lib/Fuzzer/FuzzerFlags.def +++ b/llvm/lib/Fuzzer/FuzzerFlags.def @@ -21,10 +21,8 @@ FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup") -FUZZER_FLAG_INT( - prefer_small_during_initial_shuffle, -1, - "If 1, always prefer smaller inputs during the initial corpus shuffle." - " If 0, never do that. If -1, do it sometimes.") +FUZZER_FLAG_INT(prefer_small, 1, + "If 1, always prefer smaller inputs during the corpus shuffle.") FUZZER_FLAG_INT( timeout, 1200, "Timeout in seconds (if positive). " diff --git a/llvm/lib/Fuzzer/FuzzerInternal.h b/llvm/lib/Fuzzer/FuzzerInternal.h index 5c6abc7..ebed95c 100644 --- a/llvm/lib/Fuzzer/FuzzerInternal.h +++ b/llvm/lib/Fuzzer/FuzzerInternal.h @@ -29,6 +29,7 @@ namespace fuzzer { using namespace std::chrono; typedef std::vector Unit; +typedef std::vector UnitVector; // A simple POD sized array of bytes. template class FixedWord { @@ -288,7 +289,7 @@ public: bool UseFullCoverageSet = false; bool Reload = true; bool ShuffleAtStartUp = true; - int PreferSmallDuringInitialShuffle = -1; + bool PreferSmall = true; size_t MaxNumberOfRuns = ULONG_MAX; int ReportSlowUnits = 10; bool OnlyASCII = false; @@ -342,6 +343,8 @@ public: // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector &Corpora); + // Returns a subset of 'Extra' that adds coverage to 'Initial'. + UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra); MutationDispatcher &GetMD() { return MD; } void PrintFinalStats(); void SetMaxLen(size_t MaxLen); @@ -359,6 +362,8 @@ private: void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n"); void PrintStatusForNewUnit(const Unit &U); + void ShuffleCorpus(UnitVector *V); + // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. void UpdateCorpusDistribution(); @@ -367,6 +372,7 @@ private: size_t RecordCallerCalleeCoverage(); void PrepareCoverageBeforeRun(); bool CheckCoverageAfterRun(); + void ResetCoverage(); // Trace-based fuzzing: we run a unit with some kind of tracing // enabled and record potentially useful mutations. Then diff --git a/llvm/lib/Fuzzer/FuzzerLoop.cpp b/llvm/lib/Fuzzer/FuzzerLoop.cpp index 4f3b5a7..3bc5f93 100644 --- a/llvm/lib/Fuzzer/FuzzerLoop.cpp +++ b/llvm/lib/Fuzzer/FuzzerLoop.cpp @@ -243,34 +243,25 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { } } +void Fuzzer::ShuffleCorpus(UnitVector *V) { + std::random_shuffle(V->begin(), V->end(), MD.GetRand()); + if (Options.PreferSmall) + std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) { + return A.size() < B.size(); + }); +} + void Fuzzer::ShuffleAndMinimize() { - bool PreferSmall = (Options.PreferSmallDuringInitialShuffle == 1 || - (Options.PreferSmallDuringInitialShuffle == -1 && - MD.GetRand().RandBool())); - if (Options.Verbosity) - Printf("INFO: PreferSmall: %d\n", PreferSmall); PrintStats("READ "); std::vector NewCorpus; - if (Options.ShuffleAtStartUp) { - std::random_shuffle(Corpus.begin(), Corpus.end(), MD.GetRand()); - if (PreferSmall) - std::stable_sort( - Corpus.begin(), Corpus.end(), - [](const Unit &A, const Unit &B) { return A.size() < B.size(); }); - } - Unit U; - for (const auto &C : Corpus) { - for (size_t First = 0; First < 1; First++) { - U.clear(); - size_t Last = std::min(First + Options.MaxLen, C.size()); - U.insert(U.begin(), C.begin() + First, C.begin() + Last); - if (Options.OnlyASCII) - ToASCII(U.data(), U.size()); - if (RunOne(U)) { - NewCorpus.push_back(U); - if (Options.Verbosity >= 2) - Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size()); - } + if (Options.ShuffleAtStartUp) + ShuffleCorpus(&Corpus); + + for (const auto &U : Corpus) { + if (RunOne(U)) { + NewCorpus.push_back(U); + if (Options.Verbosity >= 2) + Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size()); } } Corpus = NewCorpus; @@ -438,37 +429,65 @@ void Fuzzer::ReportNewCoverage(const Unit &U) { NumberOfNewUnitsAdded++; } +// Finds minimal number of units in 'Extra' that add coverage to 'Initial'. +// We do it by actually executing the units, sometimes more than once, +// because we may be using different coverage-like signals and the only +// common thing between them is that we can say "this unit found new stuff". +UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial, + const UnitVector &Extra) { + UnitVector Res = Extra; + size_t OldSize = Res.size(); + for (int Iter = 0; Iter < 10; Iter++) { + ShuffleCorpus(&Res); + ResetCoverage(); + + for (auto &U : Initial) + RunOne(U); + + Corpus.clear(); + for (auto &U : Res) + if (RunOne(U)) + Corpus.push_back(U); + + char Stat[7] = "MIN "; + Stat[3] = '0' + Iter; + PrintStats(Stat); + + size_t NewSize = Corpus.size(); + Res.swap(Corpus); + + if (NewSize == OldSize) + break; + OldSize = NewSize; + } + return Res; +} + void Fuzzer::Merge(const std::vector &Corpora) { if (Corpora.size() <= 1) { Printf("Merge requires two or more corpus dirs\n"); return; } - auto InitialCorpusDir = Corpora[0]; - assert(Options.MaxLen > 0); - ReadDir(InitialCorpusDir, nullptr, Options.MaxLen); - Printf("Merge: running the initial corpus '%s' of %d units\n", - InitialCorpusDir.c_str(), Corpus.size()); - for (auto &U : Corpus) - RunOne(U); - std::vector ExtraCorpora(Corpora.begin() + 1, Corpora.end()); - size_t NumTried = 0; - size_t NumMerged = 0; - for (auto &C : ExtraCorpora) { - Corpus.clear(); - ReadDir(C, nullptr, Options.MaxLen); - Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(), - Corpus.size()); - for (auto &U : Corpus) { - NumTried++; - if (RunOne(U)) { - WriteToOutputCorpus(U); - NumMerged++; - } - } + assert(Options.MaxLen > 0); + UnitVector Initial, Extra; + ReadDirToVectorOfUnits(Corpora[0].c_str(), &Initial, nullptr, Options.MaxLen); + for (auto &C : ExtraCorpora) + ReadDirToVectorOfUnits(C.c_str(), &Extra, nullptr, Options.MaxLen); + + if (!Initial.empty()) { + Printf("=== Minimizing the initial corpus of %zd units\n", Initial.size()); + Initial = FindExtraUnits({}, Initial); } - Printf("Merge: written %zd out of %zd units\n", NumMerged, NumTried); + + Printf("=== Merging extra %zd units\n", Extra.size()); + auto Res = FindExtraUnits(Initial, Extra); + + for (auto &U: Res) + WriteToOutputCorpus(U); + + Printf("=== Merge: written %zd units\n", Res.size()); } void Fuzzer::MutateAndTestOne() { @@ -507,6 +526,12 @@ size_t Fuzzer::ChooseUnitIdxToMutate() { return Idx; } +void Fuzzer::ResetCoverage() { + CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage); + __sanitizer_reset_coverage(); + CounterBitmap.clear(); +} + // Experimental search heuristic: drilling. // - Read, shuffle, execute and minimize the corpus. // - Choose one random unit. @@ -521,8 +546,7 @@ void Fuzzer::Drill() { Unit U = ChooseUnitToMutate(); - CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage); - __sanitizer_reset_coverage(); + ResetCoverage(); std::vector SavedCorpus; SavedCorpus.swap(Corpus); @@ -535,7 +559,7 @@ void Fuzzer::Drill() { SavedOutputCorpusPath.swap(Options.OutputCorpus); Loop(); - __sanitizer_reset_coverage(); + ResetCoverage(); PrintStats("REINIT"); SavedOutputCorpusPath.swap(Options.OutputCorpus); diff --git a/llvm/lib/Fuzzer/test/fuzzer-traces.test b/llvm/lib/Fuzzer/test/fuzzer-traces.test index 59a3202..5f35ce1 100644 --- a/llvm/lib/Fuzzer/test/fuzzer-traces.test +++ b/llvm/lib/Fuzzer/test/fuzzer-traces.test @@ -7,19 +7,19 @@ RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=10000001 2>&1 | F RUN: not LLVMFuzzer-MemcmpTest -seed=4294967295 -runs=100000 2>&1 | FileCheck %s RUN: LLVMFuzzer-MemcmpTest -use_memcmp=0 -seed=4294967295 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 -RUN: not LLVMFuzzer-StrncmpTest -seed=1 -runs=100000 2>&1 | FileCheck %s -RUN: LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 +RUN: not LLVMFuzzer-StrncmpTest -seed=2 -runs=100000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=3 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 -RUN: not LLVMFuzzer-StrcmpTest -seed=1 -runs=200000 2>&1 | FileCheck %s -RUN: LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 +RUN: not LLVMFuzzer-StrcmpTest -seed=4 -runs=200000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=5 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 -RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=1 -runs=1000002 2>&1 | FileCheck %s -RUN: LLVMFuzzer-SwitchTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 +RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=6 -runs=1000002 2>&1 | FileCheck %s +RUN: LLVMFuzzer-SwitchTest -seed=7 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 -RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=10000000 2>&1 | FileCheck %s -RUN: LLVMFuzzer-SimpleHashTest -seed=1 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000 +RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=8 -runs=10000000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-SimpleHashTest -seed=9 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000 -RUN: LLVMFuzzer-RepeatedMemcmp -seed=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT +RUN: LLVMFuzzer-RepeatedMemcmp -seed=10 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT RECOMMENDED_DICT:###### Recommended dictionary. ###### RECOMMENDED_DICT-DAG: "foo" RECOMMENDED_DICT-DAG: "bar" diff --git a/llvm/lib/Fuzzer/test/merge.test b/llvm/lib/Fuzzer/test/merge.test index 57ecc14..6f19e21 100644 --- a/llvm/lib/Fuzzer/test/merge.test +++ b/llvm/lib/Fuzzer/test/merge.test @@ -8,8 +8,8 @@ RUN: echo ..Z... > %tmp/T1/3 # T1 has 3 elements, T2 is empty. RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1 -CHECK1: Merge: running the initial corpus {{.*}} of 3 units -CHECK1: Merge: written 0 out of 0 units +CHECK1: === Minimizing the initial corpus of 3 units +CHECK1: === Merge: written 0 units RUN: echo ...Z.. > %tmp/T2/1 RUN: echo ....E. > %tmp/T2/2 @@ -20,10 +20,11 @@ RUN: echo ..Z... > %tmp/T2/c # T1 has 3 elements, T2 has 6 elements, only 3 are new. RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK2 -CHECK2: Merge: running the initial corpus {{.*}} of 3 units -CHECK2: Merge: written 3 out of 6 units +CHECK2: === Minimizing the initial corpus of 3 units +CHECK2: === Merging extra 6 units +CHECK2: === Merge: written 3 units # Now, T1 has 6 units and T2 has no new interesting units. RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK3 -CHECK3: Merge: running the initial corpus {{.*}} of 6 units -CHECK3: Merge: written 0 out of 6 units +CHECK3: === Minimizing the initial corpus of 6 units +CHECK3: === Merge: written 0 units -- 2.7.4