1 /* xgettext C/C++/ObjectiveC backend.
2 Copyright (C) 1995-1998, 2000-2009, 2012, 2015 Free Software
5 This file was written by Peter Miller <millerp@canb.auug.org.au>
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
37 #include "error-progname.h"
39 #include "xvasprintf.h"
41 #include "po-charset.h"
44 #define _(s) gettext(s)
46 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
49 /* The ANSI C standard defines several phases of translation:
51 1. Terminate line by \n, regardless of the external representation
52 of a text line. Stdio does this for us.
54 2. Convert trigraphs to their single character equivalents.
56 3. Concatenate each line ending in backslash (\) with the following
59 4. Replace each comment with a space character.
61 5. Parse each resulting logical line as preprocessing tokens a
64 6. Recognize and carry out directives (it also expands macros on
65 non-directive lines, which we do not do here).
67 7. Replaces escape sequences within character strings with their
68 single character equivalents (we do this in step 5, because we
69 don't have to worry about the #include argument).
71 8. Concatenates adjacent string literals to form single string
72 literals (because we don't expand macros, there are a few things
75 9. Converts the remaining preprocessing tokens to C tokens and
76 discards any white space from the translation unit.
78 This lexer implements the above, and presents the scanner (in
79 xgettext.c) with a stream of C tokens. The comments are
80 accumulated in a buffer, and given to xgettext when asked for. */
83 /* ========================= Lexer customization. ========================= */
85 static bool trigraphs = false;
94 /* ====================== Keyword set customization. ====================== */
96 /* If true extract all strings. */
97 static bool extract_all = false;
99 static hash_table c_keywords;
100 static hash_table objc_keywords;
101 static bool default_keywords = true;
112 add_keyword (const char *name, hash_table *keywords)
115 default_keywords = false;
119 struct callshape shape;
122 if (keywords->table == NULL)
123 hash_init (keywords, 100);
125 split_keywordspec (name, &end, &shape);
127 /* The characters between name and end should form a valid C identifier.
128 A colon means an invalid parse in split_keywordspec(). */
129 colon = strchr (name, ':');
130 if (colon == NULL || colon >= end)
131 insert_keyword_callshape (keywords, name, end - name, &shape);
136 x_c_keyword (const char *name)
138 add_keyword (name, &c_keywords);
142 x_objc_keyword (const char *name)
144 add_keyword (name, &objc_keywords);
147 static bool additional_keywords_kde;
150 activate_additional_keywords_kde ()
152 additional_keywords_kde = true;
155 /* Finish initializing the keywords hash tables.
156 Called after argument processing, before each file is processed. */
160 if (default_keywords)
162 /* When adding new keywords here, also update the documentation in
164 x_c_keyword ("gettext");
165 x_c_keyword ("dgettext:2");
166 x_c_keyword ("dcgettext:2");
167 x_c_keyword ("ngettext:1,2");
168 x_c_keyword ("dngettext:2,3");
169 x_c_keyword ("dcngettext:2,3");
170 x_c_keyword ("gettext_noop");
171 x_c_keyword ("pgettext:1c,2");
172 x_c_keyword ("dpgettext:2c,3");
173 x_c_keyword ("dcpgettext:2c,3");
174 x_c_keyword ("npgettext:1c,2,3");
175 x_c_keyword ("dnpgettext:2c,3,4");
176 x_c_keyword ("dcnpgettext:2c,3,4");
178 if (additional_keywords_kde)
180 x_c_keyword ("i18n:1");
181 x_c_keyword ("i18nc:1c,2");
182 x_c_keyword ("i18np:1,2");
183 x_c_keyword ("i18ncp:1c,2,3");
184 x_c_keyword ("i18nd:2");
185 x_c_keyword ("i18ndc:2c,3");
186 x_c_keyword ("i18ndp:2,3");
187 x_c_keyword ("i18ndcp:2c,3,4");
188 x_c_keyword ("ki18n:1");
189 x_c_keyword ("ki18nc:1c,2");
190 x_c_keyword ("ki18np:1,2");
191 x_c_keyword ("ki18ncp:1c,2,3");
192 x_c_keyword ("ki18nd:2");
193 x_c_keyword ("ki18ndc:2c,3");
194 x_c_keyword ("ki18ndp:2,3");
195 x_c_keyword ("ki18ndcp:2c,3,4");
196 x_c_keyword ("I18N_NOOP:1");
197 x_c_keyword ("I18NC_NOOP:1c,2");
198 x_c_keyword ("I18N_NOOP2:1c,2");
199 x_c_keyword ("I18N_NOOP2_NOSTRIP:1c,2");
200 x_c_keyword ("xi18n:1");
201 x_c_keyword ("xi18nc:1c,2");
202 x_c_keyword ("xi18np:1,2");
203 x_c_keyword ("xi18ncp:1c,2,3");
204 x_c_keyword ("xi18nd:2");
205 x_c_keyword ("xi18ndc:2c,3");
206 x_c_keyword ("xi18ndp:2,3");
207 x_c_keyword ("xi18ndcp:2c,3,4");
208 x_c_keyword ("kxi18n:1");
209 x_c_keyword ("kxi18nc:1c,2");
210 x_c_keyword ("kxi18np:1,2");
211 x_c_keyword ("kxi18ncp:1c,2,3");
212 x_c_keyword ("kxi18nd:2");
213 x_c_keyword ("kxi18ndc:2c,3");
214 x_c_keyword ("kxi18ndp:2,3");
215 x_c_keyword ("kxi18ndcp:2c,3,4");
216 x_c_keyword ("XI18N_NOOP:1");
217 x_c_keyword ("XI18NC_NOOP:1c,2");
218 x_c_keyword ("XI18N_NOOP2:1c,2");
219 x_c_keyword ("XI18N_NOOP2_NOSTRIP:1c,2");
222 x_objc_keyword ("gettext");
223 x_objc_keyword ("dgettext:2");
224 x_objc_keyword ("dcgettext:2");
225 x_objc_keyword ("ngettext:1,2");
226 x_objc_keyword ("dngettext:2,3");
227 x_objc_keyword ("dcngettext:2,3");
228 x_objc_keyword ("gettext_noop");
229 x_objc_keyword ("pgettext:1c,2");
230 x_objc_keyword ("dpgettext:2c,3");
231 x_objc_keyword ("dcpgettext:2c,3");
232 x_objc_keyword ("npgettext:1c,2,3");
233 x_objc_keyword ("dnpgettext:2c,3,4");
234 x_objc_keyword ("dcnpgettext:2c,3,4");
235 x_objc_keyword ("NSLocalizedString"); /* similar to gettext */
236 x_objc_keyword ("_"); /* similar to gettext */
237 x_objc_keyword ("NSLocalizedStaticString"); /* similar to gettext_noop */
238 x_objc_keyword ("__"); /* similar to gettext_noop */
240 default_keywords = false;
247 xgettext_record_flag ("gettext:1:pass-c-format");
248 xgettext_record_flag ("dgettext:2:pass-c-format");
249 xgettext_record_flag ("dcgettext:2:pass-c-format");
250 xgettext_record_flag ("ngettext:1:pass-c-format");
251 xgettext_record_flag ("ngettext:2:pass-c-format");
252 xgettext_record_flag ("dngettext:2:pass-c-format");
253 xgettext_record_flag ("dngettext:3:pass-c-format");
254 xgettext_record_flag ("dcngettext:2:pass-c-format");
255 xgettext_record_flag ("dcngettext:3:pass-c-format");
256 xgettext_record_flag ("gettext_noop:1:pass-c-format");
257 xgettext_record_flag ("pgettext:2:pass-c-format");
258 xgettext_record_flag ("dpgettext:3:pass-c-format");
259 xgettext_record_flag ("dcpgettext:3:pass-c-format");
260 xgettext_record_flag ("npgettext:2:pass-c-format");
261 xgettext_record_flag ("npgettext:3:pass-c-format");
262 xgettext_record_flag ("dnpgettext:3:pass-c-format");
263 xgettext_record_flag ("dnpgettext:4:pass-c-format");
264 xgettext_record_flag ("dcnpgettext:3:pass-c-format");
265 xgettext_record_flag ("dcnpgettext:4:pass-c-format");
268 xgettext_record_flag ("fprintf:2:c-format");
269 xgettext_record_flag ("vfprintf:2:c-format");
270 xgettext_record_flag ("printf:1:c-format");
271 xgettext_record_flag ("vprintf:1:c-format");
272 xgettext_record_flag ("sprintf:2:c-format");
273 xgettext_record_flag ("vsprintf:2:c-format");
274 xgettext_record_flag ("snprintf:3:c-format");
275 xgettext_record_flag ("vsnprintf:3:c-format");
276 #if 0 /* These functions are not standard. */
278 xgettext_record_flag ("asprintf:2:c-format");
279 xgettext_record_flag ("vasprintf:2:c-format");
280 xgettext_record_flag ("dprintf:2:c-format");
281 xgettext_record_flag ("vdprintf:2:c-format");
282 xgettext_record_flag ("obstack_printf:2:c-format");
283 xgettext_record_flag ("obstack_vprintf:2:c-format");
285 xgettext_record_flag ("error:3:c-format");
286 xgettext_record_flag ("error_at_line:5:c-format");
288 xgettext_record_flag ("argp_error:2:c-format");
289 xgettext_record_flag ("argp_failure:2:c-format");
292 xgettext_record_flag ("gettext:1:pass-qt-format");
293 xgettext_record_flag ("dgettext:2:pass-qt-format");
294 xgettext_record_flag ("dcgettext:2:pass-qt-format");
295 xgettext_record_flag ("ngettext:1:pass-qt-format");
296 xgettext_record_flag ("ngettext:2:pass-qt-format");
297 xgettext_record_flag ("dngettext:2:pass-qt-format");
298 xgettext_record_flag ("dngettext:3:pass-qt-format");
299 xgettext_record_flag ("dcngettext:2:pass-qt-format");
300 xgettext_record_flag ("dcngettext:3:pass-qt-format");
301 xgettext_record_flag ("gettext_noop:1:pass-qt-format");
302 xgettext_record_flag ("pgettext:2:pass-qt-format");
303 xgettext_record_flag ("dpgettext:3:pass-qt-format");
304 xgettext_record_flag ("dcpgettext:3:pass-qt-format");
305 xgettext_record_flag ("npgettext:2:pass-qt-format");
306 xgettext_record_flag ("npgettext:3:pass-qt-format");
307 xgettext_record_flag ("dnpgettext:3:pass-qt-format");
308 xgettext_record_flag ("dnpgettext:4:pass-qt-format");
309 xgettext_record_flag ("dcnpgettext:3:pass-qt-format");
310 xgettext_record_flag ("dcnpgettext:4:pass-qt-format");
312 xgettext_record_flag ("gettext:1:pass-kde-format");
313 xgettext_record_flag ("dgettext:2:pass-kde-format");
314 xgettext_record_flag ("dcgettext:2:pass-kde-format");
315 xgettext_record_flag ("ngettext:1:pass-kde-format");
316 xgettext_record_flag ("ngettext:2:pass-kde-format");
317 xgettext_record_flag ("dngettext:2:pass-kde-format");
318 xgettext_record_flag ("dngettext:3:pass-kde-format");
319 xgettext_record_flag ("dcngettext:2:pass-kde-format");
320 xgettext_record_flag ("dcngettext:3:pass-kde-format");
321 xgettext_record_flag ("gettext_noop:1:pass-kde-format");
322 xgettext_record_flag ("pgettext:2:pass-kde-format");
323 xgettext_record_flag ("dpgettext:3:pass-kde-format");
324 xgettext_record_flag ("dcpgettext:3:pass-kde-format");
325 xgettext_record_flag ("npgettext:2:pass-kde-format");
326 xgettext_record_flag ("npgettext:3:pass-kde-format");
327 xgettext_record_flag ("dnpgettext:3:pass-kde-format");
328 xgettext_record_flag ("dnpgettext:4:pass-kde-format");
329 xgettext_record_flag ("dcnpgettext:3:pass-kde-format");
330 xgettext_record_flag ("dcnpgettext:4:pass-kde-format");
332 xgettext_record_flag ("gettext:1:pass-boost-format");
333 xgettext_record_flag ("dgettext:2:pass-boost-format");
334 xgettext_record_flag ("dcgettext:2:pass-boost-format");
335 xgettext_record_flag ("ngettext:1:pass-boost-format");
336 xgettext_record_flag ("ngettext:2:pass-boost-format");
337 xgettext_record_flag ("dngettext:2:pass-boost-format");
338 xgettext_record_flag ("dngettext:3:pass-boost-format");
339 xgettext_record_flag ("dcngettext:2:pass-boost-format");
340 xgettext_record_flag ("dcngettext:3:pass-boost-format");
341 xgettext_record_flag ("gettext_noop:1:pass-boost-format");
342 xgettext_record_flag ("pgettext:2:pass-boost-format");
343 xgettext_record_flag ("dpgettext:3:pass-boost-format");
344 xgettext_record_flag ("dcpgettext:3:pass-boost-format");
345 xgettext_record_flag ("npgettext:2:pass-boost-format");
346 xgettext_record_flag ("npgettext:3:pass-boost-format");
347 xgettext_record_flag ("dnpgettext:3:pass-boost-format");
348 xgettext_record_flag ("dnpgettext:4:pass-boost-format");
349 xgettext_record_flag ("dcnpgettext:3:pass-boost-format");
350 xgettext_record_flag ("dcnpgettext:4:pass-boost-format");
352 /* <boost/format.hpp> */
353 xgettext_record_flag ("format:1:boost-format");
357 init_flag_table_objc ()
359 /* Since the settings done in init_flag_table_c() also have an effect for
360 the ObjectiveC parser, we don't have to repeat them here. */
361 xgettext_record_flag ("gettext:1:pass-objc-format");
362 xgettext_record_flag ("dgettext:2:pass-objc-format");
363 xgettext_record_flag ("dcgettext:2:pass-objc-format");
364 xgettext_record_flag ("ngettext:1:pass-objc-format");
365 xgettext_record_flag ("ngettext:2:pass-objc-format");
366 xgettext_record_flag ("dngettext:2:pass-objc-format");
367 xgettext_record_flag ("dngettext:3:pass-objc-format");
368 xgettext_record_flag ("dcngettext:2:pass-objc-format");
369 xgettext_record_flag ("dcngettext:3:pass-objc-format");
370 xgettext_record_flag ("gettext_noop:1:pass-objc-format");
371 xgettext_record_flag ("pgettext:2:pass-objc-format");
372 xgettext_record_flag ("dpgettext:3:pass-objc-format");
373 xgettext_record_flag ("dcpgettext:3:pass-objc-format");
374 xgettext_record_flag ("npgettext:2:pass-objc-format");
375 xgettext_record_flag ("npgettext:3:pass-objc-format");
376 xgettext_record_flag ("dnpgettext:3:pass-objc-format");
377 xgettext_record_flag ("dnpgettext:4:pass-objc-format");
378 xgettext_record_flag ("dcnpgettext:3:pass-objc-format");
379 xgettext_record_flag ("dcnpgettext:4:pass-objc-format");
380 xgettext_record_flag ("NSLocalizedString:1:pass-c-format");
381 xgettext_record_flag ("NSLocalizedString:1:pass-objc-format");
382 xgettext_record_flag ("_:1:pass-c-format");
383 xgettext_record_flag ("_:1:pass-objc-format");
384 xgettext_record_flag ("stringWithFormat::1:objc-format");
385 xgettext_record_flag ("initWithFormat::1:objc-format");
386 xgettext_record_flag ("stringByAppendingFormat::1:objc-format");
387 xgettext_record_flag ("localizedStringWithFormat::1:objc-format");
388 xgettext_record_flag ("appendFormat::1:objc-format");
392 init_flag_table_gcc_internal ()
394 xgettext_record_flag ("gettext:1:pass-gcc-internal-format");
395 xgettext_record_flag ("dgettext:2:pass-gcc-internal-format");
396 xgettext_record_flag ("dcgettext:2:pass-gcc-internal-format");
397 xgettext_record_flag ("ngettext:1:pass-gcc-internal-format");
398 xgettext_record_flag ("ngettext:2:pass-gcc-internal-format");
399 xgettext_record_flag ("dngettext:2:pass-gcc-internal-format");
400 xgettext_record_flag ("dngettext:3:pass-gcc-internal-format");
401 xgettext_record_flag ("dcngettext:2:pass-gcc-internal-format");
402 xgettext_record_flag ("dcngettext:3:pass-gcc-internal-format");
403 xgettext_record_flag ("gettext_noop:1:pass-gcc-internal-format");
404 xgettext_record_flag ("pgettext:2:pass-gcc-internal-format");
405 xgettext_record_flag ("dpgettext:3:pass-gcc-internal-format");
406 xgettext_record_flag ("dcpgettext:3:pass-gcc-internal-format");
407 xgettext_record_flag ("npgettext:2:pass-gcc-internal-format");
408 xgettext_record_flag ("npgettext:3:pass-gcc-internal-format");
409 xgettext_record_flag ("dnpgettext:3:pass-gcc-internal-format");
410 xgettext_record_flag ("dnpgettext:4:pass-gcc-internal-format");
411 xgettext_record_flag ("dcnpgettext:3:pass-gcc-internal-format");
412 xgettext_record_flag ("dcnpgettext:4:pass-gcc-internal-format");
413 #if 0 /* This should better be done inside GCC. */
414 /* grepping for ATTRIBUTE_PRINTF in gcc-3.3/gcc/?*.h */
416 xgettext_record_flag ("status_warning:2:gcc-internal-format");
418 xgettext_record_flag ("pedwarn_c99:1:pass-gcc-internal-format");
420 //xgettext_record_flag ("error:1:c-format"); // 3 different versions
421 xgettext_record_flag ("notice:1:c-format");
422 //xgettext_record_flag ("fatal:1:c-format"); // 2 different versions
423 xgettext_record_flag ("fatal_perror:1:c-format");
425 xgettext_record_flag ("cpp_error:3:c-format");
426 xgettext_record_flag ("cpp_error_with_line:5:c-format");
428 xgettext_record_flag ("diagnostic_set_info:2:pass-gcc-internal-format");
429 xgettext_record_flag ("output_printf:2:gcc-internal-format");
430 xgettext_record_flag ("output_verbatim:2:pass-gcc-internal-format");
431 xgettext_record_flag ("verbatim:1:gcc-internal-format");
432 xgettext_record_flag ("inform:1:pass-gcc-internal-format");
434 //xgettext_record_flag ("fatal:1:c-format"); // 2 different versions
435 //xgettext_record_flag ("error:1:c-format"); // 3 different versions
437 xgettext_record_flag ("attr_printf:2:pass-c-format");
439 xgettext_record_flag ("error_at_line:2:pass-c-format");
440 xgettext_record_flag ("xvasprintf:2:pass-c-format");
441 xgettext_record_flag ("xasprintf:1:pass-c-format");
442 xgettext_record_flag ("oprintf:2:pass-c-format");
444 xgettext_record_flag ("message_with_line:2:pass-c-format");
446 xgettext_record_flag ("output_operand_lossage:1:c-format");
448 xgettext_record_flag ("ra_debug_msg:2:pass-c-format");
450 xgettext_record_flag ("fnotice:2:c-format");
451 xgettext_record_flag ("fatal_io_error:2:gcc-internal-format");
452 xgettext_record_flag ("error_for_asm:2:pass-gcc-internal-format");
453 xgettext_record_flag ("warning_for_asm:2:pass-gcc-internal-format");
454 xgettext_record_flag ("error_with_file_and_line:3:pass-gcc-internal-format");
455 xgettext_record_flag ("error_with_decl:2:pass-gcc-internal-format");
456 xgettext_record_flag ("pedwarn:1:gcc-internal-format");
457 xgettext_record_flag ("pedwarn_with_file_and_line:3:gcc-internal-format");
458 xgettext_record_flag ("pedwarn_with_decl:2:gcc-internal-format");
459 xgettext_record_flag ("sorry:1:gcc-internal-format");
460 xgettext_record_flag ("error:1:pass-gcc-internal-format");
461 xgettext_record_flag ("fatal_error:1:pass-gcc-internal-format");
462 xgettext_record_flag ("internal_error:1:pass-gcc-internal-format");
463 xgettext_record_flag ("warning:1:pass-gcc-internal-format");
464 xgettext_record_flag ("warning_with_file_and_line:3:pass-gcc-internal-format");
465 xgettext_record_flag ("warning_with_decl:2:pass-gcc-internal-format");
467 xgettext_record_flag ("ffecom_get_invented_identifier:1:pass-c-format");
469 xgettext_record_flag ("ffests_printf:2:pass-c-format");
470 /* java/java-tree.h */
471 xgettext_record_flag ("parse_error_context:2:pass-c-format");
474 xgettext_record_flag ("gettext:1:pass-gfc-internal-format");
475 xgettext_record_flag ("dgettext:2:pass-gfc-internal-format");
476 xgettext_record_flag ("dcgettext:2:pass-gfc-internal-format");
477 xgettext_record_flag ("ngettext:1:pass-gfc-internal-format");
478 xgettext_record_flag ("ngettext:2:pass-gfc-internal-format");
479 xgettext_record_flag ("dngettext:2:pass-gfc-internal-format");
480 xgettext_record_flag ("dngettext:3:pass-gfc-internal-format");
481 xgettext_record_flag ("dcngettext:2:pass-gfc-internal-format");
482 xgettext_record_flag ("dcngettext:3:pass-gfc-internal-format");
483 xgettext_record_flag ("gettext_noop:1:pass-gfc-internal-format");
484 xgettext_record_flag ("pgettext:2:pass-gfc-internal-format");
485 xgettext_record_flag ("dpgettext:3:pass-gfc-internal-format");
486 xgettext_record_flag ("dcpgettext:3:pass-gfc-internal-format");
487 xgettext_record_flag ("npgettext:2:pass-gfc-internal-format");
488 xgettext_record_flag ("npgettext:3:pass-gfc-internal-format");
489 xgettext_record_flag ("dnpgettext:3:pass-gfc-internal-format");
490 xgettext_record_flag ("dnpgettext:4:pass-gfc-internal-format");
491 xgettext_record_flag ("dcnpgettext:3:pass-gfc-internal-format");
492 xgettext_record_flag ("dcnpgettext:4:pass-gfc-internal-format");
493 #if 0 /* This should better be done inside GCC. */
494 /* fortran/error.c */
495 xgettext_record_flag ("gfc_error:1:gfc-internal-format");
496 xgettext_record_flag ("gfc_error_now:1:gfc-internal-format");
497 xgettext_record_flag ("gfc_fatal_error:1:gfc-internal-format");
498 xgettext_record_flag ("gfc_internal_error:1:gfc-internal-format");
499 xgettext_record_flag ("gfc_notify_std:2:gfc-internal-format");
500 xgettext_record_flag ("gfc_warning:1:gfc-internal-format");
501 xgettext_record_flag ("gfc_warning_now:1:gfc-internal-format");
506 init_flag_table_kde ()
508 xgettext_record_flag ("i18n:1:kde-format");
509 xgettext_record_flag ("i18nc:2:kde-format");
510 xgettext_record_flag ("i18np:1:kde-format");
511 xgettext_record_flag ("i18ncp:2:kde-format");
512 xgettext_record_flag ("i18nd:2:kde-format");
513 xgettext_record_flag ("i18ndc:3:kde-format");
514 xgettext_record_flag ("i18ndp:2:kde-format");
515 xgettext_record_flag ("i18ndcp:3:kde-format");
516 xgettext_record_flag ("ki18n:1:kde-format");
517 xgettext_record_flag ("ki18nc:2:kde-format");
518 xgettext_record_flag ("ki18np:1:kde-format");
519 xgettext_record_flag ("ki18ncp:2:kde-format");
520 xgettext_record_flag ("ki18nd:2:kde-format");
521 xgettext_record_flag ("ki18ndc:3:kde-format");
522 xgettext_record_flag ("ki18ndp:2:kde-format");
523 xgettext_record_flag ("ki18ndcp:3:kde-format");
524 xgettext_record_flag ("I18N_NOOP:1:kde-format");
525 xgettext_record_flag ("I18NC_NOOP:2:kde-format");
526 xgettext_record_flag ("I18N_NOOP2:2:kde-format");
527 xgettext_record_flag ("I18N_NOOP2_NOSTRIP:2:kde-format");
528 xgettext_record_flag ("xi18n:1:kde-kuit-format");
529 xgettext_record_flag ("xi18nc:2:kde-kuit-format");
530 xgettext_record_flag ("xi18np:1:kde-kuit-format");
531 xgettext_record_flag ("xi18ncp:2:kde-kuit-format");
532 xgettext_record_flag ("xi18nd:2:kde-kuit-format");
533 xgettext_record_flag ("xi18ndc:3:kde-kuit-format");
534 xgettext_record_flag ("xi18ndp:2:kde-kuit-format");
535 xgettext_record_flag ("xi18ndcp:3:kde-kuit-format");
536 xgettext_record_flag ("kxi18n:1:kde-kuit-format");
537 xgettext_record_flag ("kxi18nc:2:kde-kuit-format");
538 xgettext_record_flag ("kxi18np:1:kde-kuit-format");
539 xgettext_record_flag ("kxi18ncp:2:kde-kuit-format");
540 xgettext_record_flag ("kxi18nd:2:kde-kuit-format");
541 xgettext_record_flag ("kxi18ndc:3:kde-kuit-format");
542 xgettext_record_flag ("kxi18ndp:2:kde-kuit-format");
543 xgettext_record_flag ("kxi18ndcp:3:kde-kuit-format");
544 xgettext_record_flag ("XI18N_NOOP:1:kde-kuit-format");
545 xgettext_record_flag ("XI18NC_NOOP:2:kde-kuit-format");
546 xgettext_record_flag ("XI18N_NOOP2:2:kde-kuit-format");
547 xgettext_record_flag ("XI18N_NOOP2_NOSTRIP:2:kde-kuit-format");
550 /* ======================== Reading of characters. ======================== */
552 /* Real filename, used in error messages about the input file. */
553 static const char *real_file_name;
555 /* Logical filename and line number, used to label the extracted messages. */
556 static char *logical_file_name;
557 static int line_number;
559 /* The input file stream. */
563 /* 0. Terminate line by \n, regardless whether the external representation of
564 a line terminator is LF (Unix), CR (Mac) or CR/LF (DOS/Windows).
565 It is debatable whether supporting CR/LF line terminators in C sources
566 on Unix is ISO C or POSIX compliant, but since GCC 3.3 now supports it
567 unconditionally, it must be OK.
568 The so-called "text mode" in stdio on DOS/Windows translates CR/LF to \n
569 automatically, but here we also need this conversion on Unix. As a side
570 effect, on DOS/Windows we also parse CR/CR/LF into a single \n, but this
583 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
592 if (c1 != EOF && c1 != '\n')
595 /* Seen line terminator CR or CR/LF. */
603 /* Supports only one pushback character, and not '\n'. */
605 phase0_ungetc (int c)
612 /* 1. line_number handling. Combine backslash-newline to nothing. */
614 static unsigned char phase1_pushback[2];
615 static int phase1_pushback_length;
623 if (phase1_pushback_length)
625 c = phase1_pushback[--phase1_pushback_length];
656 /* Supports 2 characters of pushback. */
658 phase1_ungetc (int c)
670 if (phase1_pushback_length == SIZEOF (phase1_pushback))
672 phase1_pushback[phase1_pushback_length++] = c;
678 /* 2. Convert trigraphs to their single character equivalents. Most
679 sane human beings vomit copiously at the mention of trigraphs, which
680 is why they are an option. */
682 static unsigned char phase2_pushback[1];
683 static int phase2_pushback_length;
691 if (phase2_pushback_length)
692 return phase2_pushback[--phase2_pushback_length];
694 return phase1_getc ();
733 /* Supports only one pushback character. */
735 phase2_ungetc (int c)
739 if (phase2_pushback_length == SIZEOF (phase2_pushback))
741 phase2_pushback[phase2_pushback_length++] = c;
746 /* 3. Concatenate each line ending in backslash (\) with the following
747 line. Basically, all you need to do is elide "\\\n" sequences from
750 static unsigned char phase3_pushback[2];
751 static int phase3_pushback_length;
757 if (phase3_pushback_length)
758 return phase3_pushback[--phase3_pushback_length];
761 int c = phase2_getc ();
774 /* Supports 2 characters of pushback. */
776 phase3_ungetc (int c)
780 if (phase3_pushback_length == SIZEOF (phase3_pushback))
782 phase3_pushback[phase3_pushback_length++] = c;
787 /* Accumulating comments. */
790 static size_t bufmax;
791 static size_t buflen;
802 if (buflen >= bufmax)
804 bufmax = 2 * bufmax + 10;
805 buffer = xrealloc (buffer, bufmax);
807 buffer[buflen++] = c;
811 comment_line_end (size_t chars_to_remove)
813 buflen -= chars_to_remove;
815 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
817 if (chars_to_remove == 0 && buflen >= bufmax)
819 bufmax = 2 * bufmax + 10;
820 buffer = xrealloc (buffer, bufmax);
822 buffer[buflen] = '\0';
823 savable_comment_add (buffer);
827 /* These are for tracking whether comments count as immediately before
829 static int last_comment_line;
830 static int last_non_comment_line;
831 static int newline_count;
834 /* 4. Replace each comment that is not inside a character constant or
835 string literal with a space character. We need to remember the
836 comment for later, because it may be attached to a keyword string.
837 We also optionally understand C++ comments. */
858 last_was_star = false;
864 /* We skip all leading white space, but not EOLs. */
865 if (!(buflen == 0 && (c == ' ' || c == '\t')))
870 comment_line_end (1);
872 last_was_star = false;
876 last_was_star = true;
882 comment_line_end (2);
888 last_was_star = false;
893 last_comment_line = newline_count;
897 /* C++ or ISO C 99 comment. */
902 if (c == '\n' || c == EOF)
904 /* We skip all leading white space, but not EOLs. */
905 if (!(buflen == 0 && (c == ' ' || c == '\t')))
908 comment_line_end (0);
909 last_comment_line = newline_count;
915 /* Supports only one pushback character. */
917 phase4_ungetc (int c)
923 /* ========================== Reading of tokens. ========================== */
926 /* True if ObjectiveC extensions are recognized. */
927 static bool objc_extensions;
929 /* True if C++ extensions are recognized. */
930 static bool cxx_extensions;
934 token_type_character_constant, /* 'x' */
937 token_type_hash, /* # */
938 token_type_lparen, /* ( */
939 token_type_rparen, /* ) */
940 token_type_comma, /* , */
941 token_type_colon, /* : */
942 token_type_name, /* abc */
943 token_type_number, /* 2.7 */
944 token_type_string_literal, /* "abc" */
945 token_type_symbol, /* < > = etc. */
946 token_type_objc_special, /* @ */
947 token_type_white_space
949 typedef enum token_type_ty token_type_ty;
951 typedef struct token_ty token_ty;
955 char *string; /* for token_type_name, token_type_string_literal */
956 refcounted_string_list_ty *comment; /* for token_type_string_literal,
957 token_type_objc_special */
958 enum literalstring_escape_type escape; /* for token_type_string_literal */
964 /* Free the memory pointed to by a 'struct token_ty'. */
966 free_token (token_ty *tp)
968 if (tp->type == token_type_name || tp->type == token_type_string_literal)
970 if (tp->type == token_type_string_literal
971 || tp->type == token_type_objc_special)
972 drop_reference (tp->comment);
977 literalstring_parse (const char *string, lex_pos_ty *pos,
978 enum literalstring_escape_type type)
980 struct mixed_string_buffer *bp;
983 /* Start accumulating the string. */
984 bp = mixed_string_buffer_alloc (lc_string,
997 mixed_string_buffer_append_char (bp, c);
1001 if (!(type & LET_ANSI_C) && !(type & LET_UNICODE))
1003 mixed_string_buffer_append_char (bp, '\\');
1011 if (type & LET_ANSI_C)
1018 mixed_string_buffer_append_char (bp, c);
1022 mixed_string_buffer_append_char (bp, '\a');
1025 mixed_string_buffer_append_char (bp, '\b');
1028 /* The \e escape is preculiar to gcc, and assumes an ASCII
1029 character set (or superset). We don't provide support for it
1033 mixed_string_buffer_append_char (bp, '\f');
1036 mixed_string_buffer_append_char (bp, '\n');
1039 mixed_string_buffer_append_char (bp, '\r');
1042 mixed_string_buffer_append_char (bp, '\t');
1045 mixed_string_buffer_append_char (bp, '\v');
1055 mixed_string_buffer_append_char (bp, '\\');
1056 mixed_string_buffer_append_char (bp, 'x');
1057 mixed_string_buffer_append_char (bp, c);
1060 case '0': case '1': case '2': case '3': case '4':
1061 case '5': case '6': case '7': case '8': case '9':
1062 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1063 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1067 for (n = 0; ; c = *p++)
1074 case '0': case '1': case '2': case '3': case '4':
1075 case '5': case '6': case '7': case '8': case '9':
1076 n = n * 16 + c - '0';
1079 case 'A': case 'B': case 'C': case 'D': case 'E':
1081 n = n * 16 + 10 + c - 'A';
1084 case 'a': case 'b': case 'c': case 'd': case 'e':
1086 n = n * 16 + 10 + c - 'a';
1092 mixed_string_buffer_append_char (bp, n);
1099 case '0': case '1': case '2': case '3':
1100 case '4': case '5': case '6': case '7':
1104 for (n = 0, j = 0; j < 3; ++j)
1106 n = n * 8 + c - '0';
1113 case '0': case '1': case '2': case '3':
1114 case '4': case '5': case '6': case '7':
1120 mixed_string_buffer_append_char (bp, n);
1126 if (type & LET_UNICODE)
1131 unsigned char buf[8];
1133 int length = prefix == 'u' ? 4 : 8;
1136 for (n = 0, j = 0; j < length; j++)
1140 if (c >= '0' && c <= '9')
1141 n = (n << 4) + (c - '0');
1142 else if (c >= 'A' && c <= 'F')
1143 n = (n << 4) + (c - 'A' + 10);
1144 else if (c >= 'a' && c <= 'f')
1145 n = (n << 4) + (c - 'a' + 10);
1155 mixed_string_buffer_append_unicode (bp, n);
1158 error_with_progname = false;
1159 error_at_line (0, 0,
1160 pos->file_name, pos->line_number,
1162 warning: invalid Unicode character"));
1163 error_with_progname = true;
1170 mixed_string_buffer_append_char (bp, '\\');
1171 mixed_string_buffer_append_char (bp, prefix);
1173 for (i = 0; i < j; i++)
1174 mixed_string_buffer_append_char (bp, buf[i]);
1185 mixed_string_buffer_append_char (bp, c);
1188 return mixed_string_buffer_done (bp);
1191 struct literalstring_parser literalstring_c =
1197 /* 5. Parse each resulting logical line as preprocessing tokens and
1198 white space. Preprocessing tokens and C tokens don't always match. */
1200 static token_ty phase5_pushback[1];
1201 static int phase5_pushback_length;
1205 phase5_get (token_ty *tp)
1207 static char *buffer;
1211 int last_was_backslash;
1212 bool raw_expected = false;
1213 int delimiter_left_end;
1214 int delimiter_right_start;
1217 if (phase5_pushback_length)
1219 *tp = phase5_pushback[--phase5_pushback_length];
1224 tp->line_number = line_number;
1229 tp->type = token_type_eof;
1233 tp->type = token_type_eoln;
1255 tp->type = token_type_white_space;
1258 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1259 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1260 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1261 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1263 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1264 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1265 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1266 case 'v': case 'w': case 'x': case 'y': case 'z':
1270 if (bufpos >= bufmax)
1272 bufmax = 2 * bufmax + 10;
1273 buffer = xrealloc (buffer, bufmax);
1275 buffer[bufpos++] = c;
1279 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1280 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1281 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1282 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1285 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1286 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1287 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1288 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1290 case '0': case '1': case '2': case '3': case '4':
1291 case '5': case '6': case '7': case '8': case '9':
1295 /* Recognize string literals prefixed by R, u8, u8R, u,
1296 uR, U, UR, L, or LR. It is defined in the C standard
1297 ISO/IEC 9899:201x and the C++ standard ISO/IEC
1298 14882:2011. The raw string literals prefixed by R,
1299 u8R, uR, UR, or LR are only valid in C++.
1301 Since gettext's argument is a byte sequence, we are
1302 only interested in u8, R, and u8R. */
1305 bool is_prefix = false;
1310 if (cxx_extensions && bufpos == 1)
1313 raw_expected = true;
1323 if (cxx_extensions && bufpos == 2)
1326 raw_expected = true;
1332 else if (cxx_extensions
1333 && bufpos == 3 && buffer[2] == 'R')
1336 raw_expected = true;
1345 else if (cxx_extensions
1346 && bufpos == 2 && buffer[1] == 'R')
1349 raw_expected = true;
1362 if (bufpos >= bufmax)
1364 bufmax = 2 * bufmax + 10;
1365 buffer = xrealloc (buffer, bufmax);
1368 tp->string = xstrdup (buffer);
1369 tp->type = token_type_name;
1378 tp->type = token_type_symbol;
1381 case '0': case '1': case '2': case '3': case '4':
1382 case '5': case '6': case '7': case '8': case '9':
1388 case '0': case '1': case '2': case '3': case '4':
1389 case '5': case '6': case '7': case '8': case '9':
1390 /* The preprocessing number token is more "generous" than the C
1391 number tokens. This is mostly due to token pasting (another
1392 thing we can ignore here). */
1396 if (bufpos >= bufmax)
1398 bufmax = 2 * bufmax + 10;
1399 buffer = xrealloc (buffer, bufmax);
1401 buffer[bufpos++] = c;
1407 if (bufpos >= bufmax)
1409 bufmax = 2 * bufmax + 10;
1410 buffer = xrealloc (buffer, bufmax);
1412 buffer[bufpos++] = c;
1414 if (c != '+' && c != '-')
1421 case 'A': case 'B': case 'C': case 'D': case 'F':
1422 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1423 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1424 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1426 case 'a': case 'b': case 'c': case 'd': case 'f':
1427 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1428 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1429 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1431 case '0': case '1': case '2': case '3': case '4':
1432 case '5': case '6': case '7': case '8': case '9':
1442 if (bufpos >= bufmax)
1444 bufmax = 2 * bufmax + 10;
1445 buffer = xrealloc (buffer, bufmax);
1448 tp->type = token_type_number;
1449 tp->number = atol (buffer);
1453 /* We could worry about the 'L' before wide character constants,
1454 but ignoring it has no effect unless one of the keywords is
1455 "L". Just pretend it won't happen. Also, we don't need to
1456 remember the character constant. */
1457 last_was_backslash = false;
1461 if (last_was_backslash)
1463 last_was_backslash = false;
1469 last_was_backslash = true;
1474 error_with_progname = false;
1475 error (0, 0, _("%s:%d: warning: unterminated character constant"),
1476 logical_file_name, line_number - 1);
1477 error_with_progname = true;
1478 phase3_ungetc ('\n');
1480 case EOF: case '\'':
1485 tp->type = token_type_character_constant;
1491 /* We could worry about the 'L' before wide string constants,
1492 but since gettext's argument is not a wide character string,
1493 let the compiler complain about the argument not matching the
1494 prototype. Just pretend it won't happen. */
1495 last_was_backslash = false;
1496 delimiter_left_end = -1;
1497 delimiter_right_start = -1;
1503 if (last_was_backslash && !raw_expected)
1505 last_was_backslash = false;
1506 if (bufpos >= bufmax)
1508 bufmax = 2 * bufmax + 10;
1509 buffer = xrealloc (buffer, bufmax);
1511 buffer[bufpos++] = c;
1517 last_was_backslash = true;
1522 if (c == '(' && delimiter_left_end < 0)
1523 delimiter_left_end = bufpos;
1524 else if (c == ')' && delimiter_left_end >= 0)
1525 last_rparen = bufpos;
1529 error_with_progname = false;
1531 _("%s:%d: warning: unterminated string literal"),
1532 logical_file_name, line_number - 1);
1533 error_with_progname = true;
1534 phase3_ungetc ('\n');
1537 if (bufpos >= bufmax)
1539 bufmax = 2 * bufmax + 10;
1540 buffer = xrealloc (buffer, bufmax);
1542 buffer[bufpos++] = c;
1546 if (raw_expected && delimiter_left_end >= 0)
1549 || delimiter_left_end != bufpos - (last_rparen + 1)
1550 || strncmp (buffer, buffer + last_rparen + 1,
1551 delimiter_left_end) != 0)
1553 if (bufpos >= bufmax)
1555 bufmax = 2 * bufmax + 10;
1556 buffer = xrealloc (buffer, bufmax);
1558 buffer[bufpos++] = c;
1561 delimiter_right_start = last_rparen;
1570 if (bufpos >= bufmax)
1572 bufmax = 2 * bufmax + 10;
1573 buffer = xrealloc (buffer, bufmax);
1579 if (delimiter_left_end < 0 || delimiter_right_start < 0)
1581 error_with_progname = false;
1582 error (0, 0, _("%s:%d: warning: unterminated string literal"),
1583 logical_file_name, line_number - 1);
1584 error_with_progname = true;
1588 buffer[delimiter_right_start] = '\0';
1589 tp->type = token_type_string_literal;
1590 tp->string = xstrdup (&buffer[delimiter_left_end + 1]);
1591 tp->escape = LET_NONE;
1592 tp->comment = add_reference (savable_comment);
1596 tp->type = token_type_string_literal;
1597 tp->string = xstrdup (buffer);
1598 tp->escape = LET_ANSI_C | LET_UNICODE;
1599 tp->comment = add_reference (savable_comment);
1604 tp->type = token_type_lparen;
1608 tp->type = token_type_rparen;
1612 tp->type = token_type_comma;
1616 tp->type = token_type_hash;
1620 tp->type = token_type_colon;
1624 if (objc_extensions)
1626 tp->type = token_type_objc_special;
1627 tp->comment = add_reference (savable_comment);
1633 /* We could carefully recognize each of the 2 and 3 character
1634 operators, but it is not necessary, as we only need to recognize
1635 gettext invocations. Don't bother. */
1636 tp->type = token_type_symbol;
1642 /* Supports only one pushback token. */
1644 phase5_unget (token_ty *tp)
1646 if (tp->type != token_type_eof)
1648 if (phase5_pushback_length == SIZEOF (phase5_pushback))
1650 phase5_pushback[phase5_pushback_length++] = *tp;
1655 /* X. Recognize a leading # symbol. Leave leading hash as a hash, but
1656 turn hash in the middle of a line into a plain symbol token. This
1657 makes the phase 6 easier. */
1660 phaseX_get (token_ty *tp)
1662 static bool middle; /* false at the beginning of a line, true otherwise. */
1666 if (tp->type == token_type_eoln || tp->type == token_type_eof)
1672 /* Turn hash in the middle of a line into a plain symbol token. */
1673 if (tp->type == token_type_hash)
1674 tp->type = token_type_symbol;
1678 /* When we see leading whitespace followed by a hash sign,
1679 discard the leading white space token. The hash is all
1680 phase 6 is interested in. */
1681 if (tp->type == token_type_white_space)
1686 if (next.type == token_type_hash)
1689 phase5_unget (&next);
1697 /* 6. Recognize and carry out directives (it also expands macros on
1698 non-directive lines, which we do not do here). The only directive
1699 we care about are the #line and #define directive. We throw all the
1702 static token_ty phase6_pushback[2];
1703 static int phase6_pushback_length;
1707 phase6_get (token_ty *tp)
1709 static token_ty *buf;
1714 if (phase6_pushback_length)
1716 *tp = phase6_pushback[--phase6_pushback_length];
1721 /* Get the next token. If it is not a '#' at the beginning of a
1722 line (ignoring whitespace), return immediately. */
1724 if (tp->type != token_type_hash)
1727 /* Accumulate the rest of the directive in a buffer, until the
1728 "define" keyword is seen or until end of line. */
1733 if (tp->type == token_type_eoln || tp->type == token_type_eof)
1736 /* Before the "define" keyword and inside other directives
1737 white space is irrelevant. So just throw it away. */
1738 if (tp->type != token_type_white_space)
1740 /* If it is a #define directive, return immediately,
1741 thus treating the body of the #define directive like
1744 && tp->type == token_type_name
1745 && strcmp (tp->string, "define") == 0)
1749 if (bufpos >= bufmax)
1751 bufmax = 2 * bufmax + 10;
1752 buf = xrealloc (buf, bufmax * sizeof (buf[0]));
1754 buf[bufpos++] = *tp;
1758 /* If it is a #line directive, with no macros to expand, act on
1759 it. Ignore all other directives. */
1760 if (bufpos >= 3 && buf[0].type == token_type_name
1761 && strcmp (buf[0].string, "line") == 0
1762 && buf[1].type == token_type_number
1763 && buf[2].type == token_type_string_literal)
1765 logical_file_name = xstrdup (buf[2].string);
1766 line_number = buf[1].number;
1768 if (bufpos >= 2 && buf[0].type == token_type_number
1769 && buf[1].type == token_type_string_literal)
1771 logical_file_name = xstrdup (buf[1].string);
1772 line_number = buf[0].number;
1775 /* Release the storage held by the directive. */
1776 for (j = 0; j < bufpos; ++j)
1777 free_token (&buf[j]);
1779 /* We must reset the selected comments. */
1780 savable_comment_reset ();
1785 /* Supports 2 tokens of pushback. */
1787 phase6_unget (token_ty *tp)
1789 if (tp->type != token_type_eof)
1791 if (phase6_pushback_length == SIZEOF (phase6_pushback))
1793 phase6_pushback[phase6_pushback_length++] = *tp;
1798 /* 8a. Convert ISO C 99 section 7.8.1 format string directives to string
1799 literal placeholders. */
1801 /* Test for an ISO C 99 section 7.8.1 format string directive. */
1803 is_inttypes_macro (const char *name)
1806 P R I { d | i | o | u | x | X }
1807 { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR } */
1808 if (name[0] == 'P' && name[1] == 'R' && name[2] == 'I')
1811 if (name[0] == 'd' || name[0] == 'i' || name[0] == 'o' || name[0] == 'u'
1812 || name[0] == 'x' || name[0] == 'X')
1815 if (name[0] == 'M' && name[1] == 'A' && name[2] == 'X'
1818 if (name[0] == 'P' && name[1] == 'T' && name[2] == 'R'
1821 if (name[0] == 'L' && name[1] == 'E' && name[2] == 'A'
1822 && name[3] == 'S' && name[4] == 'T')
1824 else if (name[0] == 'F' && name[1] == 'A' && name[2] == 'S'
1827 if (name[0] == '8' && name[1] == '\0')
1829 if (name[0] == '1' && name[1] == '6' && name[2] == '\0')
1831 if (name[0] == '3' && name[1] == '2' && name[2] == '\0')
1833 if (name[0] == '6' && name[1] == '4' && name[2] == '\0')
1841 phase8a_get (token_ty *tp)
1844 if (tp->type == token_type_name && is_inttypes_macro (tp->string))
1846 /* Turn PRIdXXX into "<PRIdXXX>". */
1847 char *new_string = xasprintf ("<%s>", tp->string);
1849 tp->string = new_string;
1850 tp->comment = add_reference (savable_comment);
1851 tp->type = token_type_string_literal;
1852 tp->escape = LET_ANSI_C | LET_UNICODE;
1856 /* Supports 2 tokens of pushback. */
1858 phase8a_unget (token_ty *tp)
1864 /* 8b. Drop whitespace. */
1866 phase8b_get (token_ty *tp)
1872 if (tp->type == token_type_white_space)
1874 if (tp->type == token_type_eoln)
1876 /* We have to track the last occurrence of a string. One
1877 mode of xgettext allows to group an extracted message
1878 with a comment for documentation. The rule which states
1879 which comment is assumed to be grouped with the message
1880 says it should immediately precede it. Our
1881 interpretation: between the last line of the comment and
1882 the line in which the keyword is found must be no line
1883 with non-white space tokens. */
1885 if (last_non_comment_line > last_comment_line)
1886 savable_comment_reset ();
1893 /* Supports 2 tokens of pushback. */
1895 phase8b_unget (token_ty *tp)
1901 /* 8c. In ObjectiveC mode, drop '@' before a literal string. We need to
1902 do this before performing concatenation of adjacent string literals. */
1904 phase8c_get (token_ty *tp)
1909 if (tp->type != token_type_objc_special)
1912 if (tmp.type != token_type_string_literal)
1914 phase8b_unget (&tmp);
1917 /* Drop the '@' token and return immediately the following string. */
1918 drop_reference (tmp.comment);
1919 tmp.comment = tp->comment;
1923 /* Supports only one pushback token. */
1925 phase8c_unget (token_ty *tp)
1931 /* 8. Concatenate adjacent string literals to form single string
1932 literals (because we don't expand macros, there are a few things we
1935 FIXME: handle the case when the string literals have different
1936 tp->escape setting. */
1939 phase8_get (token_ty *tp)
1942 if (tp->type != token_type_string_literal)
1950 if (tmp.type != token_type_string_literal)
1952 phase8c_unget (&tmp);
1955 len = strlen (tp->string);
1956 tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
1957 strcpy (tp->string + len, tmp.string);
1963 /* ===================== Reading of high-level tokens. ==================== */
1966 enum xgettext_token_type_ty
1968 xgettext_token_type_eof,
1969 xgettext_token_type_keyword,
1970 xgettext_token_type_symbol,
1971 xgettext_token_type_lparen,
1972 xgettext_token_type_rparen,
1973 xgettext_token_type_comma,
1974 xgettext_token_type_colon,
1975 xgettext_token_type_string_literal,
1976 xgettext_token_type_other
1978 typedef enum xgettext_token_type_ty xgettext_token_type_ty;
1980 typedef struct xgettext_token_ty xgettext_token_ty;
1981 struct xgettext_token_ty
1983 xgettext_token_type_ty type;
1985 /* This field is used only for xgettext_token_type_keyword. */
1986 const struct callshapes *shapes;
1988 /* This field is used only for xgettext_token_type_string_literal,
1989 xgettext_token_type_keyword, xgettext_token_type_symbol. */
1992 /* This field is used only for xgettext_token_type_string_literal. */
1993 enum literalstring_escape_type escape;
1995 /* This field is used only for xgettext_token_type_string_literal. */
1996 refcounted_string_list_ty *comment;
1998 /* These fields are only for
1999 xgettext_token_type_keyword,
2000 xgettext_token_type_string_literal. */
2005 /* 9. Convert the remaining preprocessing tokens to C tokens and
2006 discards any white space from the translation unit. */
2009 x_c_lex (xgettext_token_ty *tp)
2014 void *keyword_value;
2016 phase8_get (&token);
2019 case token_type_eof:
2020 tp->type = xgettext_token_type_eof;
2023 case token_type_name:
2024 last_non_comment_line = newline_count;
2026 if (hash_find_entry (objc_extensions ? &objc_keywords : &c_keywords,
2027 token.string, strlen (token.string),
2031 tp->type = xgettext_token_type_keyword;
2032 tp->shapes = (const struct callshapes *) keyword_value;
2033 tp->pos.file_name = logical_file_name;
2034 tp->pos.line_number = token.line_number;
2037 tp->type = xgettext_token_type_symbol;
2038 tp->string = token.string;
2041 case token_type_lparen:
2042 last_non_comment_line = newline_count;
2044 tp->type = xgettext_token_type_lparen;
2047 case token_type_rparen:
2048 last_non_comment_line = newline_count;
2050 tp->type = xgettext_token_type_rparen;
2053 case token_type_comma:
2054 last_non_comment_line = newline_count;
2056 tp->type = xgettext_token_type_comma;
2059 case token_type_colon:
2060 last_non_comment_line = newline_count;
2062 tp->type = xgettext_token_type_colon;
2065 case token_type_string_literal:
2066 last_non_comment_line = newline_count;
2068 tp->type = xgettext_token_type_string_literal;
2069 tp->string = token.string;
2070 tp->escape = token.escape;
2071 tp->comment = token.comment;
2072 tp->pos.file_name = logical_file_name;
2073 tp->pos.line_number = token.line_number;
2076 case token_type_objc_special:
2077 drop_reference (token.comment);
2081 last_non_comment_line = newline_count;
2083 tp->type = xgettext_token_type_other;
2090 /* ========================= Extracting strings. ========================== */
2093 /* Context lookup table. */
2094 static flag_context_list_table_ty *flag_context_list_table;
2097 /* The file is broken into tokens. Scan the token stream, looking for
2098 a keyword, followed by a left paren, followed by a string. When we
2099 see this sequence, we have something to remember. We assume we are
2100 looking at a valid C or C++ program, and leave the complaints about
2101 the grammar to the compiler.
2103 Normal handling: Look for
2104 keyword ( ... msgid ... )
2105 Plural handling: Look for
2106 keyword ( ... msgid ... msgid_plural ... )
2108 We use recursion because the arguments before msgid or between msgid
2109 and msgid_plural can contain subexpressions of the same form. */
2112 /* Extract messages until the next balanced closing parenthesis.
2113 Extracted messages are added to MLP.
2114 Return true upon eof, false upon closing parenthesis. */
2116 extract_parenthesized (message_list_ty *mlp,
2117 flag_context_ty outer_context,
2118 flag_context_list_iterator_ty context_iter,
2119 struct arglist_parser *argparser)
2121 /* Current argument number. */
2123 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
2125 /* Parameters of the keyword just seen. Defined only in state 1. */
2126 const struct callshapes *next_shapes = NULL;
2127 /* Context iterator that will be used if the next token is a '('. */
2128 flag_context_list_iterator_ty next_context_iter =
2129 passthrough_context_list_iterator;
2130 /* Context iterator that will be used if the next token is a ':'.
2131 (Objective C selector syntax.) */
2132 flag_context_list_iterator_ty selectorcall_context_iter =
2133 passthrough_context_list_iterator;
2134 /* Current context. */
2135 flag_context_ty inner_context =
2136 inherited_context (outer_context,
2137 flag_context_list_iterator_advance (&context_iter));
2139 /* Start state is 0. */
2144 xgettext_token_ty token;
2149 case xgettext_token_type_keyword:
2150 next_shapes = token.shapes;
2152 goto keyword_or_symbol;
2154 case xgettext_token_type_symbol:
2158 flag_context_list_iterator (
2159 flag_context_list_table_lookup (
2160 flag_context_list_table,
2161 token.string, strlen (token.string)));
2162 if (objc_extensions)
2164 size_t token_string_len = strlen (token.string);
2165 token.string = xrealloc (token.string, token_string_len + 2);
2166 token.string[token_string_len] = ':';
2167 token.string[token_string_len + 1] = '\0';
2168 selectorcall_context_iter =
2169 flag_context_list_iterator (
2170 flag_context_list_table_lookup (
2171 flag_context_list_table,
2172 token.string, token_string_len + 1));
2174 free (token.string);
2177 case xgettext_token_type_lparen:
2178 if (extract_parenthesized (mlp, inner_context, next_context_iter,
2179 arglist_parser_alloc (mlp,
2180 state ? next_shapes : NULL)))
2182 arglist_parser_done (argparser, arg);
2185 next_context_iter = null_context_list_iterator;
2186 selectorcall_context_iter = null_context_list_iterator;
2190 case xgettext_token_type_rparen:
2191 arglist_parser_done (argparser, arg);
2194 case xgettext_token_type_comma:
2197 inherited_context (outer_context,
2198 flag_context_list_iterator_advance (
2200 next_context_iter = passthrough_context_list_iterator;
2201 selectorcall_context_iter = passthrough_context_list_iterator;
2205 case xgettext_token_type_colon:
2206 if (objc_extensions)
2208 context_iter = selectorcall_context_iter;
2210 inherited_context (inner_context,
2211 flag_context_list_iterator_advance (
2213 next_context_iter = passthrough_context_list_iterator;
2214 selectorcall_context_iter = passthrough_context_list_iterator;
2218 next_context_iter = null_context_list_iterator;
2219 selectorcall_context_iter = null_context_list_iterator;
2224 case xgettext_token_type_string_literal:
2228 refcounted_string_list_ty *comment;
2229 const char *encoding;
2231 string = literalstring_parse (token.string, &token.pos,
2233 free (token.string);
2234 token.string = string;
2236 if (token.comment != NULL)
2238 comment = savable_comment_convert_encoding (token.comment,
2240 drop_reference (token.comment);
2241 token.comment = comment;
2244 /* token.string and token.comment are already converted
2245 to UTF-8. Prevent further conversion in
2246 remember_a_message. */
2247 encoding = xgettext_current_source_encoding;
2248 xgettext_current_source_encoding = po_charset_utf8;
2249 remember_a_message (mlp, NULL, token.string, inner_context,
2250 &token.pos, NULL, token.comment);
2251 xgettext_current_source_encoding = encoding;
2254 arglist_parser_remember_literal (argparser, arg, token.string,
2256 token.pos.file_name,
2257 token.pos.line_number,
2260 drop_reference (token.comment);
2261 next_context_iter = null_context_list_iterator;
2262 selectorcall_context_iter = null_context_list_iterator;
2266 case xgettext_token_type_other:
2267 next_context_iter = null_context_list_iterator;
2268 selectorcall_context_iter = null_context_list_iterator;
2272 case xgettext_token_type_eof:
2273 arglist_parser_done (argparser, arg);
2284 extract_whole_file (FILE *f,
2285 const char *real_filename, const char *logical_filename,
2286 flag_context_list_table_ty *flag_table,
2287 msgdomain_list_ty *mdlp)
2289 message_list_ty *mlp = mdlp->item[0]->messages;
2292 real_file_name = real_filename;
2293 logical_file_name = xstrdup (logical_filename);
2297 last_comment_line = -1;
2298 last_non_comment_line = -1;
2300 flag_context_list_table = flag_table;
2304 /* Eat tokens until eof is seen. When extract_parenthesized returns
2305 due to an unbalanced closing parenthesis, just restart it. */
2306 while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
2307 arglist_parser_alloc (mlp, NULL)))
2310 /* Close scanner. */
2312 real_file_name = NULL;
2313 logical_file_name = NULL;
2320 const char *real_filename, const char *logical_filename,
2321 flag_context_list_table_ty *flag_table,
2322 msgdomain_list_ty *mdlp)
2324 objc_extensions = false;
2325 cxx_extensions = false;
2326 extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
2330 extract_cxx (FILE *f,
2331 const char *real_filename, const char *logical_filename,
2332 flag_context_list_table_ty *flag_table,
2333 msgdomain_list_ty *mdlp)
2335 objc_extensions = false;
2336 cxx_extensions = true;
2337 extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
2341 extract_objc (FILE *f,
2342 const char *real_filename, const char *logical_filename,
2343 flag_context_list_table_ty *flag_table,
2344 msgdomain_list_ty *mdlp)
2346 objc_extensions = true;
2347 cxx_extensions = false;
2348 extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);