From a35f7d383faaab6b543022fbeceb6bdf9c274460 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Thu, 18 Feb 2016 21:49:10 +0000 Subject: [PATCH] [libFuzzer] only read MaxLen bytes from every file in the corpus to speedup loading the corpus llvm-svn: 261267 --- llvm/lib/Fuzzer/FuzzerDriver.cpp | 2 +- llvm/lib/Fuzzer/FuzzerIO.cpp | 14 ++++++++++---- llvm/lib/Fuzzer/FuzzerInternal.h | 8 ++++---- llvm/lib/Fuzzer/FuzzerLoop.cpp | 6 +++--- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Fuzzer/FuzzerDriver.cpp b/llvm/lib/Fuzzer/FuzzerDriver.cpp index 2c32fed..a3722e7 100644 --- a/llvm/lib/Fuzzer/FuzzerDriver.cpp +++ b/llvm/lib/Fuzzer/FuzzerDriver.cpp @@ -356,7 +356,7 @@ static int FuzzerDriver(const std::vector &Args, F.RereadOutputCorpus(); for (auto &inp : *Inputs) if (inp != Options.OutputCorpus) - F.ReadDir(inp, nullptr); + F.ReadDir(inp, nullptr, Options.MaxLen); if (F.CorpusSize() == 0) F.AddToCorpus(Unit()); // Can't fuzz empty corpus, so add an empty input. diff --git a/llvm/lib/Fuzzer/FuzzerIO.cpp b/llvm/lib/Fuzzer/FuzzerIO.cpp index 5cc589a..6773c60 100644 --- a/llvm/lib/Fuzzer/FuzzerIO.cpp +++ b/llvm/lib/Fuzzer/FuzzerIO.cpp @@ -55,12 +55,18 @@ static std::vector ListFilesInDir(const std::string &Dir, return V; } -Unit FileToVector(const std::string &Path) { +Unit FileToVector(const std::string &Path, size_t MaxSize) { std::ifstream T(Path); if (!T) { Printf("No such directory: %s; exiting\n", Path.c_str()); exit(1); } + if (MaxSize) { + Unit Res(MaxSize); + T.read(reinterpret_cast(Res.data()), MaxSize); + Res.resize(T.gcount()); + return Res; + } return Unit((std::istreambuf_iterator(T)), std::istreambuf_iterator()); } @@ -84,16 +90,16 @@ void WriteToFile(const Unit &U, const std::string &Path) { } void ReadDirToVectorOfUnits(const char *Path, std::vector *V, - long *Epoch) { + long *Epoch, size_t MaxSize) { long E = Epoch ? *Epoch : 0; auto Files = ListFilesInDir(Path, Epoch); for (size_t i = 0; i < Files.size(); i++) { auto &X = Files[i]; auto FilePath = DirPlusFile(Path, X); if (Epoch && GetEpoch(FilePath) < E) continue; - if ((i % 1000) == 0 && i) + if ((i & (i - 1)) == 0 && i >= 1024) Printf("Loaded %zd/%zd files from %s\n", i, Files.size(), Path); - V->push_back(FileToVector(FilePath)); + V->push_back(FileToVector(FilePath, MaxSize)); } } diff --git a/llvm/lib/Fuzzer/FuzzerInternal.h b/llvm/lib/Fuzzer/FuzzerInternal.h index 8a3b936..1c941ab 100644 --- a/llvm/lib/Fuzzer/FuzzerInternal.h +++ b/llvm/lib/Fuzzer/FuzzerInternal.h @@ -65,9 +65,9 @@ typedef FixedWord<27> Word; // 28 bytes. bool IsFile(const std::string &Path); std::string FileToString(const std::string &Path); -Unit FileToVector(const std::string &Path); +Unit FileToVector(const std::string &Path, size_t MaxSize = 0); void ReadDirToVectorOfUnits(const char *Path, std::vector *V, - long *Epoch); + long *Epoch, size_t MaxSize); void WriteToFile(const Unit &U, const std::string &Path); void CopyFileToErr(const std::string &Path); // Returns "Dir/FileName" or equivalent for the current OS. @@ -302,9 +302,9 @@ public: void InitializeTraceState(); void AssignTaintLabels(uint8_t *Data, size_t Size); size_t CorpusSize() const { return Corpus.size(); } - void ReadDir(const std::string &Path, long *Epoch) { + void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { Printf("Loading corpus: %s\n", Path.c_str()); - ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch); + ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); } void RereadOutputCorpus(); // Save the current corpus to OutputCorpus. diff --git a/llvm/lib/Fuzzer/FuzzerLoop.cpp b/llvm/lib/Fuzzer/FuzzerLoop.cpp index e044b67..e08ca77 100644 --- a/llvm/lib/Fuzzer/FuzzerLoop.cpp +++ b/llvm/lib/Fuzzer/FuzzerLoop.cpp @@ -166,7 +166,7 @@ void Fuzzer::RereadOutputCorpus() { return; std::vector AdditionalCorpus; ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, - &EpochOfLastReadOfOutputCorpus); + &EpochOfLastReadOfOutputCorpus, Options.MaxLen); if (Corpus.empty()) { Corpus = AdditionalCorpus; return; @@ -384,7 +384,7 @@ void Fuzzer::Merge(const std::vector &Corpora) { return; } auto InitialCorpusDir = Corpora[0]; - ReadDir(InitialCorpusDir, nullptr); + ReadDir(InitialCorpusDir, nullptr, Options.MaxLen); Printf("Merge: running the initial corpus '%s' of %d units\n", InitialCorpusDir.c_str(), Corpus.size()); for (auto &U : Corpus) @@ -396,7 +396,7 @@ void Fuzzer::Merge(const std::vector &Corpora) { size_t NumMerged = 0; for (auto &C : ExtraCorpora) { Corpus.clear(); - ReadDir(C, nullptr); + ReadDir(C, nullptr, Options.MaxLen); Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(), Corpus.size()); for (auto &U : Corpus) { -- 2.7.4