From 64d24578d8e199a5f0de8849179ab58807747afb Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Sat, 12 Mar 2016 01:57:04 +0000 Subject: [PATCH] [libFuzzer] try to use max_len based on the items of the corpus instead of blindly defaulting to 64 bytes. llvm-svn: 263323 --- llvm/docs/LibFuzzer.rst | 6 +++--- llvm/lib/Fuzzer/FuzzerDriver.cpp | 17 +++++++++++++---- llvm/lib/Fuzzer/FuzzerFlags.def | 4 +++- llvm/lib/Fuzzer/FuzzerInternal.h | 6 ++++-- llvm/lib/Fuzzer/FuzzerLoop.cpp | 30 +++++++++++++++++++++++------- llvm/lib/Fuzzer/FuzzerTraceState.cpp | 23 +++++++++++++---------- 6 files changed, 59 insertions(+), 27 deletions(-) diff --git a/llvm/docs/LibFuzzer.rst b/llvm/docs/LibFuzzer.rst index d4c1111..de55b27 100644 --- a/llvm/docs/LibFuzzer.rst +++ b/llvm/docs/LibFuzzer.rst @@ -29,8 +29,8 @@ This library is intended primarily for in-process coverage-guided fuzz testing fuzzer (a directory with test inputs, one file per input). The better your inputs are the faster you will find something interesting. Also try to keep your inputs small, otherwise the Fuzzer will run too slow. - By default, the Fuzzer limits the size of every input to 64 bytes - (use ``-max_len=N`` to override). + Use ``-max_len=N`` to set hard limit on the size of the inputs; + by default libFuzzer will try to guess a good value. * Run the fuzzer with the test corpus. As new interesting test cases are discovered they will be added to the corpus. If a bug is discovered by the sanitizer (asan, etc) it will be reported as usual and the reproducer @@ -65,7 +65,7 @@ The most important flags are:: seed 0 Random seed. If 0, seed is generated. runs -1 Number of individual test runs (-1 for infinite runs). - max_len 64 Maximum length of the test input. + max_len 0 Maximum length of the test input. If 0, libFuzzer tries to guess a good value based on the corpus and reports it. cross_over 1 If 1, cross over inputs. mutate_depth 5 Apply this number of consecutive mutations to each input. timeout 1200 Timeout in seconds (if positive). If one unit runs more than this number of seconds the process will abort. diff --git a/llvm/lib/Fuzzer/FuzzerDriver.cpp b/llvm/lib/Fuzzer/FuzzerDriver.cpp index a91d9de..50a9cff 100644 --- a/llvm/lib/Fuzzer/FuzzerDriver.cpp +++ b/llvm/lib/Fuzzer/FuzzerDriver.cpp @@ -131,7 +131,7 @@ static bool ParseOneFlag(const char *Param) { static bool PrintedWarning = false; if (!PrintedWarning) { PrintedWarning = true; - Printf("WARNING: libFuzzer ignores flags that start with '--'\n"); + Printf("INFO: libFuzzer ignores flags that start with '--'\n"); } return true; } @@ -269,6 +269,7 @@ static int FuzzerDriver(const std::vector &Args, if (Flags.workers > 0 && Flags.jobs > 0) return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); + const size_t kMaxSaneLen = 1 << 20; Fuzzer::FuzzingOptions Options; Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; @@ -315,7 +316,7 @@ static int FuzzerDriver(const std::vector &Args, Seed = (std::chrono::system_clock::now().time_since_epoch().count() << 10) + getpid(); if (Flags.verbosity) - Printf("Seed: %u\n", Seed); + Printf("INFO: Seed: %u\n", Seed); Random Rand(Seed); MutationDispatcher MD(Rand); @@ -355,16 +356,24 @@ static int FuzzerDriver(const std::vector &Args, exit(0); } + if (Flags.merge) { + if (Options.MaxLen == 0) + F.SetMaxLen(kMaxSaneLen); F.Merge(*Inputs); exit(0); } + size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen; - F.RereadOutputCorpus(); + F.RereadOutputCorpus(TemporaryMaxLen); for (auto &inp : *Inputs) if (inp != Options.OutputCorpus) - F.ReadDir(inp, nullptr, Options.MaxLen); + F.ReadDir(inp, nullptr, TemporaryMaxLen); + + if (Options.MaxLen == 0) + F.SetMaxLen( + std::min(std::max(64UL, 2 * F.MaxUnitSizeInCorpus()), kMaxSaneLen)); if (F.CorpusSize() == 0) F.AddToCorpus(Unit()); // Can't fuzz empty corpus, so add an empty input. diff --git a/llvm/lib/Fuzzer/FuzzerFlags.def b/llvm/lib/Fuzzer/FuzzerFlags.def index eddd604..caf3d55 100644 --- a/llvm/lib/Fuzzer/FuzzerFlags.def +++ b/llvm/lib/Fuzzer/FuzzerFlags.def @@ -14,7 +14,9 @@ FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") FUZZER_FLAG_INT(runs, -1, "Number of individual test runs (-1 for infinite runs).") -FUZZER_FLAG_INT(max_len, 64, "Maximum length of the test input.") +FUZZER_FLAG_INT(max_len, 0, "Maximum length of the test input. " + "If 0, libFuzzer tries to guess a good value based on the corpus " + "and reports it. ") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") diff --git a/llvm/lib/Fuzzer/FuzzerInternal.h b/llvm/lib/Fuzzer/FuzzerInternal.h index 5d66040..29a0050 100644 --- a/llvm/lib/Fuzzer/FuzzerInternal.h +++ b/llvm/lib/Fuzzer/FuzzerInternal.h @@ -274,7 +274,7 @@ class Fuzzer { public: struct FuzzingOptions { int Verbosity = 1; - int MaxLen = 0; + size_t MaxLen = 0; int UnitTimeoutSec = 300; int TimeoutExitCode = 77; int ErrorExitCode = 77; @@ -316,11 +316,12 @@ public: void InitializeTraceState(); void AssignTaintLabels(uint8_t *Data, size_t Size); size_t CorpusSize() const { return Corpus.size(); } + size_t MaxUnitSizeInCorpus() const; void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { Printf("Loading corpus: %s\n", Path.c_str()); ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); } - void RereadOutputCorpus(); + void RereadOutputCorpus(size_t MaxSize); // Save the current corpus to OutputCorpus. void SaveCorpus(); @@ -345,6 +346,7 @@ public: void Merge(const std::vector &Corpora); MutationDispatcher &GetMD() { return MD; } void PrintFinalStats(); + void SetMaxLen(size_t MaxLen); private: void AlarmCallback(); diff --git a/llvm/lib/Fuzzer/FuzzerLoop.cpp b/llvm/lib/Fuzzer/FuzzerLoop.cpp index 0438f41..2bae761 100644 --- a/llvm/lib/Fuzzer/FuzzerLoop.cpp +++ b/llvm/lib/Fuzzer/FuzzerLoop.cpp @@ -199,12 +199,27 @@ void Fuzzer::PrintFinalStats() { Printf("stat::peak_rss_mb: %zd\n", GetPeakRSSMb()); } -void Fuzzer::RereadOutputCorpus() { +size_t Fuzzer::MaxUnitSizeInCorpus() const { + size_t Res = 0; + for (auto &X : Corpus) + Res = std::max(Res, X.size()); + return Res; +} + +void Fuzzer::SetMaxLen(size_t MaxLen) { + assert(Options.MaxLen == 0); // Can only reset MaxLen from 0 to non-0. + assert(MaxLen); + Options.MaxLen = MaxLen; + Printf("INFO: -max_len is not provided, using %zd\n", Options.MaxLen); +} + + +void Fuzzer::RereadOutputCorpus(size_t MaxSize) { if (Options.OutputCorpus.empty()) return; std::vector AdditionalCorpus; ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, - &EpochOfLastReadOfOutputCorpus, Options.MaxLen); + &EpochOfLastReadOfOutputCorpus, MaxSize); if (Corpus.empty()) { Corpus = AdditionalCorpus; return; @@ -214,8 +229,8 @@ void Fuzzer::RereadOutputCorpus() { if (Options.Verbosity >= 2) Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); for (auto &X : AdditionalCorpus) { - if (X.size() > (size_t)Options.MaxLen) - X.resize(Options.MaxLen); + if (X.size() > MaxSize) + X.resize(MaxSize); if (UnitHashesAddedToCorpus.insert(Hash(X)).second) { if (RunOne(X)) { Corpus.push_back(X); @@ -231,7 +246,7 @@ void Fuzzer::ShuffleAndMinimize() { (Options.PreferSmallDuringInitialShuffle == -1 && MD.GetRand().RandBool())); if (Options.Verbosity) - Printf("PreferSmall: %d\n", PreferSmall); + Printf("INFO: PreferSmall: %d\n", PreferSmall); PrintStats("READ "); std::vector NewCorpus; if (Options.ShuffleAtStartUp) { @@ -427,6 +442,7 @@ void Fuzzer::Merge(const std::vector &Corpora) { return; } auto InitialCorpusDir = Corpora[0]; + assert(Options.MaxLen > 0); ReadDir(InitialCorpusDir, nullptr, Options.MaxLen); Printf("Merge: running the initial corpus '%s' of %d units\n", InitialCorpusDir.c_str(), Corpus.size()); @@ -469,7 +485,7 @@ void Fuzzer::MutateAndTestOne() { else NewSize = MD.Mutate(MutateInPlaceHere.data(), Size, Options.MaxLen); assert(NewSize > 0 && "Mutator returned empty unit"); - assert(NewSize <= (size_t)Options.MaxLen && + assert(NewSize <= Options.MaxLen && "Mutator return overisized unit"); Size = NewSize; if (i == 0) @@ -546,7 +562,7 @@ void Fuzzer::Loop() { SyncCorpus(); auto Now = system_clock::now(); if (duration_cast(Now - LastCorpusReload).count()) { - RereadOutputCorpus(); + RereadOutputCorpus(Options.MaxLen); LastCorpusReload = Now; } if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) diff --git a/llvm/lib/Fuzzer/FuzzerTraceState.cpp b/llvm/lib/Fuzzer/FuzzerTraceState.cpp index dc4f18c..8123407 100644 --- a/llvm/lib/Fuzzer/FuzzerTraceState.cpp +++ b/llvm/lib/Fuzzer/FuzzerTraceState.cpp @@ -253,6 +253,17 @@ class TraceState { AddMutation(Pos, Size, reinterpret_cast(&Data)); } + void EnsureDfsanLabels(size_t Size) { + for (; LastDfsanLabel < Size; LastDfsanLabel++) { + dfsan_label L = dfsan_create_label("input", (void *)(LastDfsanLabel + 1)); + // We assume that no one else has called dfsan_create_label before. + if (L != LastDfsanLabel + 1) { + Printf("DFSan labels are not starting from 1, exiting\n"); + exit(1); + } + } + } + private: bool IsTwoByteData(uint64_t Data) { int64_t Signed = static_cast(Data); @@ -279,6 +290,7 @@ class TraceState { size_t NumMutations; TraceBasedMutation Mutations[kMaxMutations]; LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)]; + size_t LastDfsanLabel = 0; MutationDispatcher &MD; const Fuzzer::FuzzingOptions &Options; uint8_t **CurrentUnitData; @@ -480,6 +492,7 @@ void Fuzzer::StopTraceRecording() { void Fuzzer::AssignTaintLabels(uint8_t *Data, size_t Size) { if (!Options.UseTraces && !Options.UseMemcmp) return; if (!ReallyHaveDFSan()) return; + TS->EnsureDfsanLabels(Size); for (size_t i = 0; i < Size; i++) dfsan_set_label(i + 1, &Data[i], 1); } @@ -487,16 +500,6 @@ void Fuzzer::AssignTaintLabels(uint8_t *Data, size_t Size) { void Fuzzer::InitializeTraceState() { if (!Options.UseTraces && !Options.UseMemcmp) return; TS = new TraceState(MD, Options, &CurrentUnitData, &CurrentUnitSize); - if (ReallyHaveDFSan()) { - for (size_t i = 0; i < static_cast(Options.MaxLen); i++) { - dfsan_label L = dfsan_create_label("input", (void *)(i + 1)); - // We assume that no one else has called dfsan_create_label before. - if (L != i + 1) { - Printf("DFSan labels are not starting from 1, exiting\n"); - exit(1); - } - } - } } static size_t InternalStrnlen(const char *S, size_t MaxLen) { -- 2.7.4