1 // Copyright 2020 The Pigweed Authors
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
7 // https://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
15 #include "pw_tokenizer/tokenize.h"
22 #include "gtest/gtest.h"
23 #include "pw_tokenizer/hash.h"
24 #include "pw_tokenizer_private/tokenize_test.h"
25 #include "pw_varint/varint.h"
27 namespace pw::tokenizer {
30 // Constructs an array with the hashed string followed by the provided bytes.
31 template <uint8_t... kData, size_t kSize>
32 constexpr auto ExpectedData(const char (&format)[kSize]) {
33 const uint32_t value = Hash(format);
34 return std::array<uint8_t, sizeof(uint32_t) + sizeof...(kData)>{
35 static_cast<uint8_t>(value & 0xff),
36 static_cast<uint8_t>(value >> 8 & 0xff),
37 static_cast<uint8_t>(value >> 16 & 0xff),
38 static_cast<uint8_t>(value >> 24 & 0xff),
42 TEST(TokenizeString, EmptyString_IsZero) {
43 constexpr pw_tokenizer_Token token = PW_TOKENIZE_STRING("");
47 TEST(TokenizeString, String_MatchesHash) {
48 constexpr uint32_t token = PW_TOKENIZE_STRING("[:-)");
49 EXPECT_EQ(Hash("[:-)"), token);
52 constexpr uint32_t kGlobalToken = PW_TOKENIZE_STRING(">:-[]");
54 TEST(TokenizeString, GlobalVariable_MatchesHash) {
55 EXPECT_EQ(Hash(">:-[]"), kGlobalToken);
58 struct TokenizedWithinClass {
59 static constexpr uint32_t kThisToken = PW_TOKENIZE_STRING("???");
62 static_assert(Hash("???") == TokenizedWithinClass::kThisToken);
64 TEST(TokenizeString, ClassMember_MatchesHash) {
65 EXPECT_EQ(Hash("???"), TokenizedWithinClass().kThisToken);
68 // Use a function with a shorter name to test tokenizing __func__ and
69 // __PRETTY_FUNCTION__.
71 // WARNING: This function might cause errors for compilers other than GCC and
72 // clang. It relies on two GCC/clang extensions:
74 // 1 - The __PRETTY_FUNCTION__ C++ function name variable.
75 // 2 - __func__ as a static constexpr array instead of static const. See
76 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66639 for background.
79 constexpr uint32_t function_hash = PW_TOKENIZE_STRING(__func__);
80 EXPECT_EQ(pw::tokenizer::Hash(__func__), function_hash);
82 // Check the non-standard __PRETTY_FUNCTION__ name.
83 constexpr uint32_t pretty_function = PW_TOKENIZE_STRING(__PRETTY_FUNCTION__);
84 EXPECT_EQ(pw::tokenizer::Hash(__PRETTY_FUNCTION__), pretty_function);
87 TEST(TokenizeString, FunctionName) { TestName(); }
89 TEST(TokenizeString, Array) {
90 constexpr char array[] = "won-won-won-wonderful";
92 const uint32_t array_hash = PW_TOKENIZE_STRING(array);
93 EXPECT_EQ(Hash(array), array_hash);
96 TEST(TokenizeString, NullInString) {
97 // Use PW_TOKENIZER_STRING_TOKEN to avoid emitting strings with NUL into the
98 // ELF file. The CSV database format does not support NUL.
99 constexpr char nulls[32] = {};
100 static_assert(Hash(nulls) == PW_TOKENIZER_STRING_TOKEN(nulls));
101 static_assert(PW_TOKENIZER_STRING_TOKEN(nulls) != 0u);
103 static_assert(PW_TOKENIZER_STRING_TOKEN("\0") == Hash("\0"));
104 static_assert(PW_TOKENIZER_STRING_TOKEN("\0") != Hash(""));
106 static_assert(PW_TOKENIZER_STRING_TOKEN("abc\0def") == Hash("abc\0def"));
108 static_assert(Hash("abc\0def") != Hash("abc\0def\0"));
111 // Verify that we can tokenize multiple strings from one source line.
112 #define THREE_FOR_ONE(first, second, third) \
113 [[maybe_unused]] constexpr uint32_t token_1 = \
114 PW_TOKENIZE_STRING_DOMAIN("TEST_DOMAIN", first); \
115 [[maybe_unused]] constexpr uint32_t token_2 = \
116 PW_TOKENIZE_STRING_DOMAIN("TEST_DOMAIN", second); \
117 [[maybe_unused]] constexpr uint32_t token_3 = \
118 PW_TOKENIZE_STRING_DOMAIN("TEST_DOMAIN", third);
120 TEST(TokenizeString, MultipleTokenizationsInOneMacroExpansion) {
121 // This verifies that we can safely tokenize multiple times in a single macro
122 // expansion. This can be useful when for example a name and description are
123 // both tokenized after being passed into a macro.
125 // This test only verifies that this compiles correctly; it does not test
126 // that the tokenizations make it to the final token database.
127 THREE_FOR_ONE("hello", "yes", "something");
130 class TokenizeToBuffer : public ::testing::Test {
132 TokenizeToBuffer() : buffer_{} {}
138 TEST_F(TokenizeToBuffer, Integer64) {
139 size_t message_size = 14;
140 PW_TOKENIZE_TO_BUFFER(
144 static_cast<uint64_t>(0x55555555'55555555ull)); // 0xAAAAAAAA'AAAAAAAA
146 // Pattern becomes 10101010'11010101'10101010 ...
147 constexpr std::array<uint8_t, 14> expected =
148 ExpectedData<0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0x01>(
150 ASSERT_EQ(expected.size(), message_size);
151 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
154 TEST_F(TokenizeToBuffer, Integer64Overflow) {
157 for (size_t size = 4; size < 20; ++size) {
160 PW_TOKENIZE_TO_BUFFER(
164 static_cast<uint64_t>(std::numeric_limits<int64_t>::min()));
167 constexpr std::array<uint8_t, 4> empty = ExpectedData("%" PRIx64);
168 ASSERT_EQ(sizeof(uint32_t), message_size);
169 EXPECT_EQ(std::memcmp(empty.data(), &buffer_, empty.size()), 0);
171 // Make sure nothing was written past the end of the buffer.
172 EXPECT_TRUE(std::all_of(&buffer_[size], std::end(buffer_), [](uint8_t v) {
176 constexpr std::array<uint8_t, 14> expected =
187 ASSERT_EQ(expected.size(), message_size);
188 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
193 TEST_F(TokenizeToBuffer, IntegerNegative) {
194 size_t message_size = 9;
195 PW_TOKENIZE_TO_BUFFER(
196 buffer_, &message_size, "%" PRId32, std::numeric_limits<int32_t>::min());
198 // 0x8000'0000 -zig-zag-> 0xff'ff'ff'ff'0f
199 constexpr std::array<uint8_t, 9> expected =
200 ExpectedData<0xff, 0xff, 0xff, 0xff, 0x0f>("%" PRId32);
201 ASSERT_EQ(expected.size(), message_size);
202 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
205 TEST_F(TokenizeToBuffer, IntegerMin) {
206 size_t message_size = 9;
207 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "%d", -1);
209 constexpr std::array<uint8_t, 5> expected = ExpectedData<0x01>("%d");
210 ASSERT_EQ(expected.size(), message_size);
211 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
214 TEST_F(TokenizeToBuffer, IntegerDoesntFit) {
215 size_t message_size = 8;
216 PW_TOKENIZE_TO_BUFFER(
217 buffer_, &message_size, "%" PRId32, std::numeric_limits<int32_t>::min());
219 constexpr std::array<uint8_t, 4> expected = ExpectedData<>("%" PRId32);
220 ASSERT_EQ(expected.size(), message_size);
221 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
224 TEST_F(TokenizeToBuffer, String) {
225 size_t message_size = sizeof(buffer_);
227 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
228 constexpr std::array<uint8_t, 10> expected =
229 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
231 ASSERT_EQ(expected.size(), message_size);
232 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
235 TEST_F(TokenizeToBuffer, String_BufferTooSmall_TruncatesAndSetsTopStatusBit) {
236 size_t message_size = 8;
237 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
239 constexpr std::array<uint8_t, 8> truncated_1 =
240 ExpectedData<0x83, '5', '4', '3'>("The answer is: %s");
242 ASSERT_EQ(truncated_1.size(), message_size);
243 EXPECT_EQ(std::memcmp(truncated_1.data(), buffer_, truncated_1.size()), 0);
246 TEST_F(TokenizeToBuffer, String_TwoBytesLeft_TruncatesToOneCharacter) {
247 size_t message_size = 6;
248 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
250 constexpr std::array<uint8_t, 6> truncated_2 =
251 ExpectedData<0x81, '5'>("The answer is: %s");
253 ASSERT_EQ(truncated_2.size(), message_size);
254 EXPECT_EQ(std::memcmp(truncated_2.data(), buffer_, truncated_2.size()), 0);
257 TEST_F(TokenizeToBuffer, String_OneByteLeft_OnlyWritesTruncatedStatusByte) {
258 size_t message_size = 5;
259 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
261 std::array<uint8_t, 5> result = ExpectedData<0x80>("The answer is: %s");
262 ASSERT_EQ(result.size(), message_size);
263 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
266 TEST_F(TokenizeToBuffer, EmptyString_OneByteLeft_EncodesCorrectly) {
267 size_t message_size = 5;
268 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "");
270 std::array<uint8_t, 5> result = ExpectedData<0>("The answer is: %s");
271 ASSERT_EQ(result.size(), message_size);
272 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
275 TEST_F(TokenizeToBuffer, String_ZeroBytesLeft_WritesNothing) {
276 size_t message_size = 4;
277 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
279 constexpr std::array<uint8_t, 4> empty = ExpectedData<>("The answer is: %s");
280 ASSERT_EQ(empty.size(), message_size);
281 EXPECT_EQ(std::memcmp(empty.data(), buffer_, empty.size()), 0);
284 TEST_F(TokenizeToBuffer, Array) {
285 static constexpr char array[] = "1234";
286 size_t message_size = 4;
287 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, array);
289 constexpr std::array<uint8_t, 4> result = ExpectedData<>("1234");
290 ASSERT_EQ(result.size(), message_size);
291 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
294 TEST_F(TokenizeToBuffer, NullptrString_EncodesNull) {
295 char* string = nullptr;
296 size_t message_size = 9;
297 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", string);
299 std::array<uint8_t, 9> result =
300 ExpectedData<4, 'N', 'U', 'L', 'L'>("The answer is: %s");
301 ASSERT_EQ(result.size(), message_size);
302 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
305 TEST_F(TokenizeToBuffer, NullptrString_BufferTooSmall_EncodesTruncatedNull) {
306 char* string = nullptr;
307 size_t message_size = 6;
308 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", string);
310 std::array<uint8_t, 6> result = ExpectedData<0x81, 'N'>("The answer is: %s");
311 ASSERT_EQ(result.size(), message_size);
312 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
315 TEST_F(TokenizeToBuffer, Domain_String) {
316 size_t message_size = sizeof(buffer_);
318 PW_TOKENIZE_TO_BUFFER_DOMAIN(
319 "TEST_DOMAIN", buffer_, &message_size, "The answer was: %s", "5432!");
320 constexpr std::array<uint8_t, 10> expected =
321 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer was: %s");
323 ASSERT_EQ(expected.size(), message_size);
324 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
327 TEST_F(TokenizeToBuffer, TruncateArgs) {
328 // Args that can't fit are dropped completely
329 size_t message_size = 6;
330 PW_TOKENIZE_TO_BUFFER(buffer_,
333 static_cast<uint8_t>(0b0010'1010u),
336 constexpr std::array<uint8_t, 5> expected =
337 ExpectedData<0b0101'0100u>("%u %d");
338 ASSERT_EQ(expected.size(), message_size);
339 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
342 TEST_F(TokenizeToBuffer, NoRoomForToken) {
343 // Nothing is written if there isn't room for the token.
344 std::memset(buffer_, '$', sizeof(buffer_));
345 auto is_untouched = [](uint8_t v) { return v == '$'; };
347 size_t message_size = 3;
348 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer: \"%s\"", "5432!");
349 EXPECT_EQ(0u, message_size);
350 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
353 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello, world!");
354 EXPECT_EQ(0u, message_size);
355 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
358 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello!");
359 EXPECT_EQ(0u, message_size);
360 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
363 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello?");
364 EXPECT_EQ(0u, message_size);
365 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
368 TEST_F(TokenizeToBuffer, C_StringShortFloat) {
369 size_t size = sizeof(buffer_);
370 pw_tokenizer_ToBufferTest_StringShortFloat(buffer_, &size);
371 constexpr std::array<uint8_t, 11> expected = // clang-format off
372 ExpectedData<1, '1', // string '1'
373 3, // -2 (zig-zag encoded)
374 0x00, 0x00, 0x40, 0x40 // 3.0 in floating point
375 >(TEST_FORMAT_STRING_SHORT_FLOAT);
376 ASSERT_EQ(expected.size(), size); // clang-format on
377 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
380 TEST_F(TokenizeToBuffer, C_SequentialZigZag) {
381 size_t size = sizeof(buffer_);
382 pw_tokenizer_ToBufferTest_SequentialZigZag(buffer_, &size);
383 constexpr std::array<uint8_t, 18> expected =
384 ExpectedData<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13>(
385 TEST_FORMAT_SEQUENTIAL_ZIG_ZAG);
387 ASSERT_EQ(expected.size(), size);
388 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
391 TEST_F(TokenizeToBuffer, C_Overflow) {
392 std::memset(buffer_, '$', sizeof(buffer_));
396 pw_tokenizer_ToBufferTest_Requires8(buffer_, &size);
397 constexpr std::array<uint8_t, 7> expected =
398 ExpectedData<2, 'h', 'i'>(TEST_FORMAT_REQUIRES_8);
399 ASSERT_EQ(expected.size(), size);
400 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
401 EXPECT_EQ(buffer_[7], '$');
406 pw_tokenizer_ToBufferTest_Requires8(buffer_, &size);
407 constexpr std::array<uint8_t, 8> expected =
408 ExpectedData<2, 'h', 'i', 13>(TEST_FORMAT_REQUIRES_8);
409 ASSERT_EQ(expected.size(), size);
410 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
411 EXPECT_EQ(buffer_[8], '$');
415 // Test fixture for callback and global handler. Both of these need a global
416 // message buffer. To keep the message buffers separate, template this on the
417 // derived class type.
418 template <typename Impl>
419 class GlobalMessage : public ::testing::Test {
421 static void SetMessage(const uint8_t* message, size_t size) {
422 ASSERT_LE(size, sizeof(message_));
423 std::memcpy(message_, message, size);
424 message_size_bytes_ = size;
429 std::memset(message_, 0, sizeof(message_));
430 message_size_bytes_ = 0;
433 static uint8_t message_[256];
434 static size_t message_size_bytes_;
437 template <typename Impl>
438 uint8_t GlobalMessage<Impl>::message_[256] = {};
439 template <typename Impl>
440 size_t GlobalMessage<Impl>::message_size_bytes_ = 0;
442 class TokenizeToCallback : public GlobalMessage<TokenizeToCallback> {};
444 TEST_F(TokenizeToCallback, Variety) {
445 PW_TOKENIZE_TO_CALLBACK(
446 SetMessage, "%s there are %x (%.2f) of them%c", "Now", 2u, 2.0f, '.');
447 const auto expected = // clang-format off
448 ExpectedData<3, 'N', 'o', 'w', // string "Now"
449 0x04, // unsigned 2 (zig-zag encoded)
450 0x00, 0x00, 0x00, 0x40, // float 2.0
451 0x5C // char '.' (0x2E, zig-zag encoded)
452 >("%s there are %x (%.2f) of them%c");
454 ASSERT_EQ(expected.size(), message_size_bytes_);
455 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
458 TEST_F(TokenizeToCallback, Strings) {
459 PW_TOKENIZE_TO_CALLBACK(SetMessage, "The answer is: %s", "5432!");
460 constexpr std::array<uint8_t, 10> expected =
461 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
462 ASSERT_EQ(expected.size(), message_size_bytes_);
463 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
466 TEST_F(TokenizeToCallback, Domain_Strings) {
467 PW_TOKENIZE_TO_CALLBACK_DOMAIN(
468 "TEST_DOMAIN", SetMessage, "The answer is: %s", "5432!");
469 constexpr std::array<uint8_t, 10> expected =
470 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
471 ASSERT_EQ(expected.size(), message_size_bytes_);
472 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
475 TEST_F(TokenizeToCallback, C_SequentialZigZag) {
476 pw_tokenizer_ToCallbackTest_SequentialZigZag(SetMessage);
478 constexpr std::array<uint8_t, 18> expected =
479 ExpectedData<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13>(
480 TEST_FORMAT_SEQUENTIAL_ZIG_ZAG);
481 ASSERT_EQ(expected.size(), message_size_bytes_);
482 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
485 // Hijack an internal macro to capture the tokenizer domain.
486 #undef _PW_TOKENIZER_RECORD_ORIGINAL_STRING
487 #define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(token, domain, string) \
488 tokenizer_domain = domain; \
489 string_literal = string
491 TEST_F(TokenizeToBuffer, Domain_Default) {
492 const char* tokenizer_domain = nullptr;
493 const char* string_literal = nullptr;
495 size_t message_size = sizeof(buffer_);
497 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
499 EXPECT_STREQ(tokenizer_domain, PW_TOKENIZER_DEFAULT_DOMAIN);
500 EXPECT_STREQ(string_literal, "The answer is: %s");
503 TEST_F(TokenizeToBuffer, Domain_Specified) {
504 const char* tokenizer_domain = nullptr;
505 const char* string_literal = nullptr;
507 size_t message_size = sizeof(buffer_);
509 PW_TOKENIZE_TO_BUFFER_DOMAIN(
510 "._.", buffer_, &message_size, "The answer is: %s", "5432!");
512 EXPECT_STREQ(tokenizer_domain, "._.");
513 EXPECT_STREQ(string_literal, "The answer is: %s");
516 TEST_F(TokenizeToCallback, Domain_Default) {
517 const char* tokenizer_domain = nullptr;
518 const char* string_literal = nullptr;
520 PW_TOKENIZE_TO_CALLBACK(SetMessage, "The answer is: %s", "5432!");
522 EXPECT_STREQ(tokenizer_domain, PW_TOKENIZER_DEFAULT_DOMAIN);
523 EXPECT_STREQ(string_literal, "The answer is: %s");
526 TEST_F(TokenizeToCallback, Domain_Specified) {
527 const char* tokenizer_domain = nullptr;
528 const char* string_literal = nullptr;
530 PW_TOKENIZE_TO_CALLBACK_DOMAIN(
531 "ThisIsTheDomain", SetMessage, "The answer is: %s", "5432!");
533 EXPECT_STREQ(tokenizer_domain, "ThisIsTheDomain");
534 EXPECT_STREQ(string_literal, "The answer is: %s");
538 } // namespace pw::tokenizer