2 # Copyright 2020 The Pigweed Authors
4 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 # use this file except in compliance with the License. You may obtain a copy of
8 # https://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 # License for the specific language governing permissions and limitations under
15 """Tests for the database module."""
19 from pathlib import Path
24 from unittest import mock
26 from pw_tokenizer import database
28 # This is an ELF file with only the pw_tokenizer sections. It was created
29 # from a tokenize_test binary built for the STM32F429i Discovery board. The
30 # pw_tokenizer sections were extracted with this command:
32 # arm-none-eabi-objcopy -S --only-section ".pw_tokenize*" <ELF> <OUTPUT>
34 TOKENIZED_ENTRIES_ELF = Path(
35 __file__).parent / 'example_binary_with_tokenized_strings.elf'
36 LEGACY_PLAIN_STRING_ELF = Path(
37 __file__).parent / 'example_legacy_binary_with_tokenized_strings.elf'
39 CSV_DEFAULT_DOMAIN = '''\
41 141c35d5, ,"The answer: ""%s"""
44 2e668cd6, ,"Jello, world!"
47 68ab92da, ,"%s there are %x (%.2f) of them%c"
48 7b940e2a, ,"Hello %s! %hd %e"
52 881436a0, ,"The answer is: %s"
53 88808930, ,"%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
55 a09d6698, ,"won-won-won-wonderful"
56 aa9ffa66, ,"void pw::tokenizer::{anonymous}::TestName()"
61 e65aefef, ,"Won't fit : %s%d"
64 CSV_TEST_DOMAIN = """\
67 59b2701c, ,"The answer was: %s"
68 881436a0, ,"The answer is: %s"
69 d18ada0f, ,"something"
72 CSV_ALL_DOMAINS = '''\
74 141c35d5, ,"The answer: ""%s"""
79 2e668cd6, ,"Jello, world!"
81 59b2701c, ,"The answer was: %s"
83 68ab92da, ,"%s there are %x (%.2f) of them%c"
84 7b940e2a, ,"Hello %s! %hd %e"
88 881436a0, ,"The answer is: %s"
89 88808930, ,"%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
91 a09d6698, ,"won-won-won-wonderful"
92 aa9ffa66, ,"void pw::tokenizer::{anonymous}::TestName()"
96 d18ada0f, ,"something"
98 e65aefef, ,"Won't fit : %s%d"
102 str(TOKENIZED_ENTRIES_ELF): {
104 'present_entries': 22,
105 'present_size_bytes': 289,
107 'total_size_bytes': 289,
111 'present_entries': 5,
112 'present_size_bytes': 57,
114 'total_size_bytes': 57,
121 def run_cli(*args) -> None:
122 original_argv = sys.argv
123 sys.argv = ['database.py', *(str(a) for a in args)]
124 # pylint: disable=protected-access
126 database._main(*database._parse_args())
128 # Remove the log handler added by _main to avoid duplicate logs.
129 if database._LOG.handlers:
130 database._LOG.handlers.pop()
131 # pylint: enable=protected-access
133 sys.argv = original_argv
136 def _mock_output() -> io.TextIOWrapper:
137 output = io.BytesIO()
138 output.name = '<fake stdout>'
139 return io.TextIOWrapper(output, write_through=True)
142 class DatabaseCommandLineTest(unittest.TestCase):
143 """Tests the database.py command line interface."""
145 self._dir = Path(tempfile.mkdtemp('_pw_tokenizer_test'))
146 self._csv = self._dir / 'db.csv'
147 self._elf = TOKENIZED_ENTRIES_ELF
149 self._csv_test_domain = CSV_TEST_DOMAIN
152 shutil.rmtree(self._dir)
154 def test_create_csv(self):
155 run_cli('create', '--database', self._csv, self._elf)
157 self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
158 self._csv.read_text().splitlines())
160 def test_create_csv_test_domain(self):
161 run_cli('create', '--database', self._csv, f'{self._elf}#TEST_DOMAIN')
163 self.assertEqual(self._csv_test_domain.splitlines(),
164 self._csv.read_text().splitlines())
166 def test_create_csv_all_domains(self):
167 run_cli('create', '--database', self._csv, f'{self._elf}#.*')
169 self.assertEqual(CSV_ALL_DOMAINS.splitlines(),
170 self._csv.read_text().splitlines())
172 def test_create_force(self):
173 self._csv.write_text(CSV_ALL_DOMAINS)
175 with self.assertRaises(FileExistsError):
176 run_cli('create', '--database', self._csv, self._elf)
178 run_cli('create', '--force', '--database', self._csv, self._elf)
180 def test_create_binary(self):
181 binary = self._dir / 'db.bin'
182 run_cli('create', '--type', 'binary', '--database', binary, self._elf)
184 # Write the binary database as CSV to verify its contents.
185 run_cli('create', '--database', self._csv, binary)
187 self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
188 self._csv.read_text().splitlines())
190 def test_add_does_not_recalculate_tokens(self):
191 db_with_custom_token = '01234567, ,"hello"'
193 to_add = self._dir / 'add_this.csv'
194 to_add.write_text(db_with_custom_token + '\n')
197 run_cli('add', '--database', self._csv, to_add)
198 self.assertEqual(db_with_custom_token.splitlines(),
199 self._csv.read_text().splitlines())
201 def test_mark_removals(self):
202 self._csv.write_text(CSV_ALL_DOMAINS)
204 run_cli('mark_removals', '--database', self._csv, '--date',
205 '1998-09-04', self._elf)
207 # Add the removal date to the four tokens not in the default domain
208 new_csv = CSV_ALL_DOMAINS
209 new_csv = new_csv.replace('17fa86d3, ,"hello"',
210 '17fa86d3,1998-09-04,"hello"')
211 new_csv = new_csv.replace('18c5017c, ,"yes"',
212 '18c5017c,1998-09-04,"yes"')
213 new_csv = new_csv.replace('59b2701c, ,"The answer was: %s"',
214 '59b2701c,1998-09-04,"The answer was: %s"')
215 new_csv = new_csv.replace('d18ada0f, ,"something"',
216 'd18ada0f,1998-09-04,"something"')
217 self.assertNotEqual(CSV_ALL_DOMAINS, new_csv)
219 self.assertEqual(new_csv.splitlines(),
220 self._csv.read_text().splitlines())
222 def test_purge(self):
223 self._csv.write_text(CSV_ALL_DOMAINS)
225 # Mark everything not in TEST_DOMAIN as removed.
226 run_cli('mark_removals', '--database', self._csv,
227 f'{self._elf}#TEST_DOMAIN')
229 # Delete all entries except those in TEST_DOMAIN.
230 run_cli('purge', '--database', self._csv)
232 self.assertEqual(self._csv_test_domain.splitlines(),
233 self._csv.read_text().splitlines())
235 @mock.patch('sys.stdout', new_callable=_mock_output)
236 def test_report(self, mock_stdout):
237 run_cli('report', self._elf)
239 self.assertEqual(json.loads(mock_stdout.buffer.getvalue()),
242 def test_replace(self):
244 run_cli('create', '--database', self._csv, self._elf, '--replace',
245 r'(?i)\b[jh]ello\b/' + sub)
247 CSV_DEFAULT_DOMAIN.replace('Jello', sub).replace('Hello', sub),
248 self._csv.read_text())
251 class LegacyDatabaseCommandLineTest(DatabaseCommandLineTest):
252 """Test an ELF with the legacy plain string storage format."""
255 self._elf = LEGACY_PLAIN_STRING_ELF
257 # The legacy approach for storing tokenized strings in an ELF always
258 # adds an entry for "", even if the empty string was never tokenized.
259 self._csv_test_domain = '00000000, ,""\n' + CSV_TEST_DOMAIN
261 @mock.patch('sys.stdout', new_callable=_mock_output)
262 def test_report(self, mock_stdout):
263 run_cli('report', self._elf)
265 report = EXPECTED_REPORT[str(TOKENIZED_ENTRIES_ELF)].copy()
267 # Count the implicitly added "" entry in TEST_DOMAIN.
268 report['TEST_DOMAIN']['present_entries'] += 1
269 report['TEST_DOMAIN']['present_size_bytes'] += 1
270 report['TEST_DOMAIN']['total_entries'] += 1
271 report['TEST_DOMAIN']['total_size_bytes'] += 1
273 # Rename "" to the legacy name "default"
274 report['default'] = report['']
277 self.assertEqual({str(LEGACY_PLAIN_STRING_ELF): report},
278 json.loads(mock_stdout.buffer.getvalue()))
281 if __name__ == '__main__':