third_party/pigweed/repo/pw_tokenizer/token_database_fuzzer.cc

   1 // Copyright 2020 The Pigweed Authors
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
   4 // use this file except in compliance with the License. You may obtain a copy of
   5 // the License at
   6 //
   7 //     https://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12 // License for the specific language governing permissions and limitations under
  13 // the License.
  14
  15 // This file implements a basic fuzz test for the TokenDatabase class
  16 // A database is created from fuzz data, and a random entry count (also
  17 // derived from the fuzz data) is set. We then run iterations and 'find'
  18 // operations on this database.
  19
  20 #include <cstring>
  21 #include <span>
  22
  23 #include "pw_fuzzer/asan_interface.h"
  24 #include "pw_fuzzer/fuzzed_data_provider.h"
  25 #include "pw_preprocessor/util.h"
  26 #include "pw_tokenizer/token_database.h"
  27
  28 namespace pw::tokenizer {
  29 namespace {
  30
  31 enum FuzzTestType : uint8_t {
  32   kValidHeader,
  33   kRandomHeader,
  34   kMaxValue = kRandomHeader,
  35 };
  36
  37 constexpr size_t kTokenHeaderSize = 16;
  38
  39 // The default max length in bytes of fuzzed data provided. Note that
  40 // this needs to change if the fuzzer executable is run with a
  41 // '-max_len' argument.
  42 constexpr size_t kFuzzDataSizeMax = 4096;
  43
  44 // Location of the 'EntryCount' field in the token header.
  45 constexpr size_t kEntryCountOffset = 8;
  46 constexpr size_t kEntryCountSize = 4;
  47
  48 void SetTokenEntryCountInBuffer(uint8_t* buffer, uint32_t count) {
  49   memcpy(buffer + kEntryCountOffset, &count, kEntryCountSize);
  50 }
  51
  52 void IterateOverDatabase(TokenDatabase* const database) {
  53   for (TokenDatabase::Entry entry : *database) {
  54     // Since we don't "use" the contents of the entry, we exercise
  55     // the entry by extracting its contents into volatile variables
  56     // to prevent it from being optimized out during compilation.
  57     [[maybe_unused]] volatile const char* entry_string = entry.string;
  58     [[maybe_unused]] volatile uint32_t entry_token = entry.token;
  59   }
  60 }
  61
  62 }  // namespace
  63
  64 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  65   constexpr size_t kBufferSizeMax = kFuzzDataSizeMax + kTokenHeaderSize;
  66   constexpr char kDefaultHeader[] = "TOKENS\0\0\0\0\0\0\0\0\0";
  67   static uint8_t buffer[kBufferSizeMax];
  68
  69   if (size > kFuzzDataSizeMax) {
  70     return 0;
  71   }
  72
  73   FuzzedDataProvider provider(data, size);
  74
  75   // Initialize the token header with either a valid or invalid header
  76   // based on a random enum consumed from the fuzz data.
  77   switch (provider.ConsumeEnum<FuzzTestType>()) {
  78     case kValidHeader:
  79       memcpy(buffer, kDefaultHeader, kTokenHeaderSize);
  80       break;
  81
  82     case kRandomHeader: {
  83       std::vector<uint8_t> random_header =
  84           provider.ConsumeBytes<uint8_t>(kTokenHeaderSize);
  85       random_header.resize(kTokenHeaderSize);
  86       memcpy(buffer, &random_header[0], kTokenHeaderSize);
  87       break;
  88     }
  89   }
  90
  91   // Consume a 'test token' integer to look up later in the database.
  92   uint32_t random_token = provider.ConsumeIntegral<uint32_t>();
  93
  94   // Consume a 'token count' integer to set as our database entry count.
  95   uint32_t random_token_count =
  96       provider.ConsumeIntegralInRange<uint32_t>(0, kFuzzDataSizeMax);
  97
  98   // Consume the remaining data. Note that the data corresponding to the
  99   // string entries in the database are not explicitly null-terminated.
 100   // TODO(karthikmb): Once OSS-Fuzz updates to Clang11.0, switch to
 101   // provider.ConsumeData() to avoid extra memory and the memcpy call.
 102   auto consumed_bytes =
 103       provider.ConsumeBytes<uint8_t>(provider.remaining_bytes());
 104   memcpy(buffer + kTokenHeaderSize, &consumed_bytes[0], consumed_bytes.size());
 105
 106   SetTokenEntryCountInBuffer(buffer, random_token_count);
 107
 108   // Poison the unused buffer space for this run of the fuzzer to
 109   // prevent the token database creator from reading too far in.
 110   size_t data_size = kTokenHeaderSize + consumed_bytes.size();
 111   size_t poisoned_length = kBufferSizeMax - data_size;
 112   void* poisoned = &buffer[data_size];
 113
 114   ASAN_POISON_MEMORY_REGION(poisoned, poisoned_length);
 115
 116   // We create a database from a std::span of the buffer since the string
 117   // entries might not be null terminated, and the creation of a database
 118   // from a raw buffer has an explicit null terminated string requirement
 119   // specified in the API.
 120   std::span<uint8_t> data_span(buffer, data_size);
 121   auto token_database = TokenDatabase::Create<std::span<uint8_t>>(data_span);
 122   [[maybe_unused]] volatile auto match = token_database.Find(random_token);
 123
 124   IterateOverDatabase(&token_database);
 125
 126   // Un-poison for the next iteration.
 127   ASAN_UNPOISON_MEMORY_REGION(poisoned, poisoned_length);
 128
 129   return 0;
 130 }
 131
 132 }  // namespace pw::tokenizer