2 # Copyright 2020 The Pigweed Authors
4 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 # use this file except in compliance with the License. You may obtain a copy of
8 # https://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 # License for the specific language governing permissions and limitations under
15 """Generates a C macro for the PW tokenizer 65599 fixed length hash."""
21 HASH_NAME = 'pw_tokenizer_65599_fixed_length'
22 HASH_LENGTHS = 80, 96, 128
25 // Copyright {year} The Pigweed Authors
27 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
28 // use this file except in compliance with the License. You may obtain a copy of
31 // https://www.apache.org/licenses/LICENSE-2.0
33 // Unless required by applicable law or agreed to in writing, software
34 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
35 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
36 // License for the specific language governing permissions and limitations under
39 // AUTOGENERATED - DO NOT EDIT
41 // This file was generated by {script}.
42 // To make changes, update the script and run it to regenerate the files.
47 // {hash_length}-character version of the tokenizer hash function.
49 // The argument must be a string literal. It is concatenated with "" to ensure
50 // that this is the case.
57 def generate_pw_tokenizer_65599_fixed_length_hash_macro(hash_length):
58 """Generate macro that hashes a string literal using a modified x65599 hash.
60 The macros generated by this function only operate on string literals.
62 Since macros can only operate on fixed-length strings, the hash macro only
63 hashes up to a fixed length, and characters beyond that length are ignored.
64 To eliminate some collisions, the length of the string is hashed as if it
65 were the first character.
67 This hash is calculated with the following equation, where s is the string
68 and k is the maximum hash length:
70 H(s, k) = len(s) + 65599 * s[0] + 65599^2 * s[1] + ... + 65599^k * s[k-1]
72 The hash algorithm is a modified version of the x65599 hash used by the SDBM
73 open source project. This hash has the following differences from x65599:
74 - Characters are only hashed up to a fixed maximum string length.
75 - Characters are hashed in reverse order.
76 - The string length is hashed as the first character in the string.
78 The code generated by this function is intentionally sparse. Each character
79 appears hash_length times per log message, so using fewer characters results
80 in faster compilation times.
83 hash_length: maximum string size to hash; extra characters are ignored
86 the macro header file as a string
89 first_hash_term = ('(uint32_t)(sizeof(str "") - 1 + '
90 '/* The argument must be a string literal. */ \\\n')
92 # Use this to add the aligned backslash at the end of the macro lines.
93 line_format = '{{:<{}}}\\\n'.format(len(first_hash_term))
96 FILE_HEADER.format(script=os.path.basename(__file__),
97 hash_length=hash_length,
98 year=datetime.date.today().year)
102 line_format.format('#define {}_{}_HASH(str)'.format(
103 HASH_NAME.upper(), hash_length)))
104 lines.append(' ' + first_hash_term) # add indendation and the macro line
106 indent = ' ' * len(' (uint32_t)(')
107 coefficient_format = '0x{coefficient:0>8x}u'
109 # The string will have at least a null terminator
111 line_format.format('{}0x{:0>8x}u * (uint8_t)str[0] +'.format(
112 indent, HASH_CONSTANT)))
114 # Format string to use for the remaining terms.
116 '{indent}{coefficient} * '
117 '(uint8_t)({index} < sizeof(str) ? str[{index}] : 0) +').format(
119 coefficient=coefficient_format,
120 index='{{index:>{}}}'.format(len(str(hash_length - 1))))
122 for i in range(1, hash_length):
123 coefficient = HASH_CONSTANT**(i + 1) % 2**32
124 term = term_format.format(index=i, coefficient=coefficient)
125 lines.append(line_format.format(term))
127 # Remove the extra + and \ and add the closing )
128 lines[-1] = lines[-1].rstrip(' +\\\n') + ')'
130 lines.append('\n\n// clang-format on\n')
132 return ''.join(lines)
136 base = os.path.abspath(
137 os.path.join(os.path.dirname(__file__), '..', 'public', 'pw_tokenizer',
140 # Generate macros for hashes of the specified lengths.
141 for hash_length in HASH_LENGTHS:
143 base, '{}_{}_hash_macro.h'.format(HASH_NAME, hash_length))
145 with open(path, 'w') as header_file:
147 generate_pw_tokenizer_65599_fixed_length_hash_macro(
150 print('Generated {}-character hash macro at {}'.format(
154 if __name__ == '__main__':