1 // Copyright 2020 The Pigweed Authors
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
7 // https://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
18 #include <string_view>
20 #include "pw_preprocessor/compiler.h"
22 namespace pw::tokenizer {
24 // The constant to use when generating the hash. Changing this changes the value
25 // of all hashes, so do not change it randomly.
26 inline constexpr uint32_t k65599HashConstant = 65599u;
28 // Calculates the hash of a string. This function calculates hashes at either
29 // runtime or compile time in C++ code.
31 // This function only hashes up to a fixed length. Characters beyond that length
32 // are ignored. Hashing to a fixed length makes it possible to compute this hash
33 // in a preprocessor macro. To eliminate some collisions, the length of the
34 // string is hashed as if it were the first character.
36 // This hash is calculated with the following equation, where s is the string
37 // and k is the maximum hash length:
39 // H(s, k) = len(s) + 65599 * s[0] + 65599^2 * s[1] + ... + 65599^k * s[k-1]
41 // The hash algorithm is a modified version of the x65599 hash used by the SDBM
42 // open source project. This hash has the following differences from x65599:
43 // - Characters are only hashed up to a fixed maximum string length.
44 // - Characters are hashed in reverse order.
45 // - The string length is hashed as the first character in the string.
46 constexpr uint32_t PwTokenizer65599FixedLengthHash(std::string_view string,
48 PW_NO_SANITIZE("unsigned-integer-overflow") {
49 // The length is hashed as if it were the first character.
50 uint32_t hash = string.size();
51 uint32_t coefficient = k65599HashConstant;
53 // Hash all of the characters in the string as unsigned ints.
54 // The coefficient calculation is done modulo 0x100000000, so the unsigned
55 // integer overflows are intentional.
56 for (uint8_t ch : string.substr(0, hash_length)) {
57 hash += coefficient * ch;
58 coefficient *= k65599HashConstant;
64 // Take the string as an array to support either literals or character arrays,
65 // but not const char*.
66 template <size_t length>
67 constexpr uint32_t PwTokenizer65599FixedLengthHashArray(
68 const char (&string)[length], size_t hash_length) {
69 static_assert(length > 0);
70 return PwTokenizer65599FixedLengthHash(std::string_view(string, length - 1),
74 } // namespace pw::tokenizer