1 /* xgettext common functions.
2 Copyright (C) 2001-2003, 2005-2006, 2008-2009, 2011, 2015 Free
3 Software Foundation, Inc.
4 Written by Peter Miller <millerp@canb.auug.org.au>
5 and Bruno Haible <haible@clisp.cons.org>, 2001.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
35 /* Declare 'line_comment' and 'input_syntax'. */
36 #include "read-catalog.h"
44 /* If true, omit the header entry.
45 If false, keep the header entry present in the input. */
46 extern int xgettext_omit_header;
48 extern bool substring_match;
51 /* Calling convention for a given keyword. */
54 int argnum1; /* argument number to use for msgid */
55 int argnum2; /* argument number to use for msgid_plural */
56 int argnumc; /* argument number to use for msgctxt */
57 bool argnum1_glib_context; /* argument argnum1 has the syntax "ctxt|msgid" */
58 bool argnum2_glib_context; /* argument argnum2 has the syntax "ctxt|msgid" */
59 int argtotal; /* total number of arguments */
60 string_list_ty xcomments; /* auto-extracted comments */
63 /* Split keyword spec into keyword, argnum1, argnum2, argnumc. */
64 extern void split_keywordspec (const char *spec, const char **endp,
65 struct callshape *shapep);
67 /* Set of alternative calling conventions for a given keyword. */
70 const char *keyword; /* the keyword, not NUL terminated */
71 size_t keyword_len; /* the keyword's length */
73 struct callshape shapes[1]; /* actually nshapes elements */
76 /* Insert a (keyword, callshape) pair into a hash table mapping keyword to
77 'struct callshapes *'. */
78 extern void insert_keyword_callshape (hash_table *table,
79 const char *keyword, size_t keyword_len,
80 const struct callshape *shape);
83 /* Context representing some flags. */
84 typedef struct flag_context_ty flag_context_ty;
85 struct flag_context_ty
87 /* Regarding the primary formatstring type. */
88 /*enum is_format*/ unsigned int is_format1 : 3;
89 /*bool*/ unsigned int pass_format1 : 1;
90 /* Regarding the secondary formatstring type. */
91 /*enum is_format*/ unsigned int is_format2 : 3;
92 /*bool*/ unsigned int pass_format2 : 1;
93 /* Regarding the tertiary formatstring type. */
94 /*enum is_format*/ unsigned int is_format3 : 3;
95 /*bool*/ unsigned int pass_format3 : 1;
98 extern flag_context_ty null_context;
99 /* Transparent context. */
100 extern flag_context_ty passthrough_context;
101 /* Compute an inherited context.
102 The outer_context is assumed to have all pass_format* flags = false.
103 The result will then also have all pass_format* flags = false. */
104 extern flag_context_ty
105 inherited_context (flag_context_ty outer_context,
106 flag_context_ty modifier_context);
108 /* Context representing some flags, for each possible argument number.
109 This is a linked list, sorted according to the argument number. */
110 typedef struct flag_context_list_ty flag_context_list_ty;
111 struct flag_context_list_ty
113 int argnum; /* current argument number, > 0 */
114 flag_context_ty flags; /* flags for current argument */
115 flag_context_list_ty *next;
118 /* Iterator through a flag_context_list_ty. */
119 typedef struct flag_context_list_iterator_ty flag_context_list_iterator_ty;
120 struct flag_context_list_iterator_ty
122 int argnum; /* current argument number, > 0 */
123 const flag_context_list_ty* head; /* tail of list */
125 extern flag_context_list_iterator_ty null_context_list_iterator;
126 extern flag_context_list_iterator_ty passthrough_context_list_iterator;
127 extern flag_context_list_iterator_ty
128 flag_context_list_iterator (flag_context_list_ty *list);
129 extern flag_context_ty
130 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter);
132 /* For nearly each backend, we have a separate table mapping a keyword to
133 a flag_context_list_ty *. */
134 typedef hash_table /* char[] -> flag_context_list_ty * */
135 flag_context_list_table_ty;
136 extern flag_context_list_ty *
137 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
138 const void *key, size_t keylen);
139 /* Record a flag in the appropriate backend's table. */
140 extern void xgettext_record_flag (const char *optionstring);
143 /* Context while building up lexical tokens. */
146 lc_outside, /* Initial context: outside of comments and strings. */
147 lc_comment, /* Inside a comment. */
148 lc_string, /* Inside a string literal. */
150 /* For embedded XML in programming code, like E4X in JavaScript. */
151 lc_xml_open_tag, /* Inside an opening tag of an XML element. */
152 lc_xml_close_tag, /* Inside a closing tag of an XML element. */
153 lc_xml_content /* Inside an XML text node. */
157 /* Error message about non-ASCII character in a specific lexical context. */
158 extern char *non_ascii_error_message (lexical_context_ty lcontext,
159 const char *file_name,
163 /* Canonicalized encoding name for all input files. */
164 extern const char *xgettext_global_source_encoding;
167 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
168 ASCII or UTF-8, when this conversion is a no-op). */
169 extern iconv_t xgettext_global_source_iconv;
172 /* Canonicalized encoding name for the current input file. */
173 extern const char *xgettext_current_source_encoding;
176 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
177 ASCII or UTF-8, when this conversion is a no-op). */
178 extern iconv_t xgettext_current_source_iconv;
181 /* Convert the given string from xgettext_current_source_encoding to
182 the output file encoding (i.e. ASCII or UTF-8).
183 The resulting string is either the argument string, or freshly allocated.
184 The lcontext, file_name and line_number are only used for error message
186 extern char *from_current_source_encoding (const char *string,
187 lexical_context_ty lcontext,
188 const char *file_name,
192 /* List of messages whose msgids must not be extracted, or NULL.
193 Used by remember_a_message(). */
194 extern message_list_ty *exclude;
197 /* Comment handling for backends which support combining adjacent strings
199 In these backends we cannot use the xgettext_comment* functions directly,
200 because in multiline string expressions like
203 the newline between "string1" and "string2" would cause a call to
204 xgettext_comment_reset(), thus destroying the accumulated comments
205 that we need a little later, when we have concatenated the two strings
206 and pass them to remember_a_message().
207 Instead, we do the bookkeeping of the accumulated comments directly,
208 and save a pointer to the accumulated comments when we read "string1".
209 In order to avoid excessive copying of strings, we use reference
212 typedef struct refcounted_string_list_ty refcounted_string_list_ty;
213 struct refcounted_string_list_ty
215 unsigned int refcount;
216 struct string_list_ty contents;
219 static inline refcounted_string_list_ty *
220 add_reference (refcounted_string_list_ty *rslp)
228 drop_reference (refcounted_string_list_ty *rslp)
232 if (rslp->refcount > 1)
236 string_list_destroy (&rslp->contents);
242 extern refcounted_string_list_ty *savable_comment;
243 extern void savable_comment_add (const char *str);
244 extern void savable_comment_reset (void);
246 /* Convert character encoding of COMMENT according to the current
247 source encoding. Returns a new refcounted_string_list_ty. */
248 extern refcounted_string_list_ty *
249 savable_comment_convert_encoding (refcounted_string_list_ty *comment,
253 enum literalstring_escape_type
260 struct literalstring_parser
262 char * (*parse) (const char *string, lex_pos_ty *pos,
263 enum literalstring_escape_type type);
266 /* Add a message to the list of extracted messages.
267 msgctxt must be either NULL or a malloc()ed string; its ownership is passed
269 MSGID must be a malloc()ed string; its ownership is passed to the callee.
270 POS->file_name must be allocated with indefinite extent.
271 EXTRACTED_COMMENT is a comment that needs to be copied into the POT file,
273 COMMENT may be savable_comment, or it may be a saved copy of savable_comment
274 (then add_reference must be used when saving it, and drop_reference while
275 dropping it). Clear savable_comment.
276 Return the new or found message, or NULL if the message is excluded. */
277 extern message_ty *remember_a_message (message_list_ty *mlp,
280 flag_context_ty context,
282 const char *extracted_comment,
283 refcounted_string_list_ty *comment);
285 /* Add an msgid_plural to a message previously returned by
287 STRING must be a malloc()ed string; its ownership is passed to the callee.
288 POS->file_name must be allocated with indefinite extent.
289 COMMENT may be savable_comment, or it may be a saved copy of savable_comment
290 (then add_reference must be used when saving it, and drop_reference while
291 dropping it). Clear savable_comment. */
292 extern void remember_a_message_plural (message_ty *mp,
294 flag_context_ty context,
296 refcounted_string_list_ty *comment);
298 /* Represents the progressive parsing of an argument list w.r.t. a single
299 'struct callshape'. */
302 int argnumc; /* number of context argument, 0 when seen */
303 int argnum1; /* number of singular argument, 0 when seen */
304 int argnum2; /* number of plural argument, 0 when seen */
305 bool argnum1_glib_context; /* argument argnum1 has the syntax "ctxt|msgid" */
306 bool argnum2_glib_context; /* argument argnum2 has the syntax "ctxt|msgid" */
307 int argtotal; /* total number of arguments, 0 if unspecified */
308 string_list_ty xcomments; /* auto-extracted comments */
309 char *msgctxt; /* context - owned string, or NULL */
310 enum literalstring_escape_type msgctxt_escape;
311 lex_pos_ty msgctxt_pos;
312 char *msgid; /* msgid - owned string, or NULL */
313 enum literalstring_escape_type msgid_escape;
314 flag_context_ty msgid_context;
315 lex_pos_ty msgid_pos;
316 refcounted_string_list_ty *msgid_comment;
317 char *msgid_plural; /* msgid_plural - owned string, or NULL */
318 enum literalstring_escape_type msgid_plural_escape;
319 flag_context_ty msgid_plural_context;
320 lex_pos_ty msgid_plural_pos;
323 /* Represents the progressive parsing of an argument list w.r.t. an entire
324 'struct callshapes'. */
325 struct arglist_parser
327 message_list_ty *mlp; /* list where the message shall be added */
328 const char *keyword; /* the keyword, not NUL terminated */
329 size_t keyword_len; /* the keyword's length */
330 size_t nalternatives; /* number of partial_call alternatives */
331 struct partial_call alternative[1]; /* partial_call alternatives */
334 /* Creates a fresh arglist_parser recognizing calls.
335 You can pass shapes = NULL for a parser not recognizing any calls. */
336 extern struct arglist_parser * arglist_parser_alloc (message_list_ty *mlp,
337 const struct callshapes *shapes);
338 /* Clones an arglist_parser. */
339 extern struct arglist_parser * arglist_parser_clone (struct arglist_parser *ap);
340 /* Adds a string argument to an arglist_parser. ARGNUM must be > 0.
341 STRING must be malloc()ed string; its ownership is passed to the callee.
342 FILE_NAME must be allocated with indefinite extent.
343 COMMENT may be savable_comment, or it may be a saved copy of savable_comment
344 (then add_reference must be used when saving it, and drop_reference while
345 dropping it). Clear savable_comment. */
346 extern void arglist_parser_remember (struct arglist_parser *ap,
347 int argnum, char *string,
348 flag_context_ty context,
349 char *file_name, size_t line_number,
350 refcounted_string_list_ty *comment);
351 /* Adds an uninterpreted string argument to an arglist_parser. ARGNUM
353 STRING is must be malloc()ed string; its ownership is passed to the callee.
354 FILE_NAME must be allocated with indefinite extent.
355 COMMENT may be savable_comment, or it may be a saved copy of savable_comment
356 (then add_reference must be used when saving it, and drop_reference while
357 dropping it). Clear savable_comment. */
358 extern void arglist_parser_remember_literal (struct arglist_parser *ap,
359 int argnum, char *string,
360 flag_context_ty context,
361 char *file_name, size_t line_number,
362 refcounted_string_list_ty *comment,
363 enum literalstring_escape_type type);
364 /* Tests whether an arglist_parser has is not waiting for more arguments after
366 extern bool arglist_parser_decidedp (struct arglist_parser *ap, int argnum);
367 /* Terminates the processing of an arglist_parser after argument ARGNUM and
369 extern void arglist_parser_done (struct arglist_parser *ap, int argnum);
372 /* A string buffer type that allows appending bytes (in the
373 xgettext_current_source_encoding) or Unicode characters.
374 Returns the entire string in UTF-8 encoding. */
376 struct mixed_string_buffer
378 /* The part of the string that has already been converted to UTF-8. */
381 size_t utf8_allocated;
382 /* The first half of an UTF-16 surrogate character. */
383 unsigned short utf16_surr;
384 /* The part of the string that is still in the source encoding. */
387 size_t curr_allocated;
388 /* The lexical context. Used only for error message purposes. */
389 lexical_context_ty lcontext;
390 const char *logical_file_name;
394 /* Creates a fresh mixed_string_buffer. */
395 extern struct mixed_string_buffer *
396 mixed_string_buffer_alloc (lexical_context_ty lcontext,
397 const char *logical_file_name,
400 /* Appends a character to a mixed_string_buffer. */
401 extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp,
404 /* Appends a Unicode character to a mixed_string_buffer. */
405 extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
408 /* Frees mixed_string_buffer and returns the accumulated string in UTF-8. */
409 extern char * mixed_string_buffer_done (struct mixed_string_buffer *bp);
417 #endif /* _XGETTEXT_H */