Change LGPL-2.1+ to LGPL-2.1-or-later
[platform/upstream/glib.git] / glib / gscanner.c
1 /* GLIB - Library of useful routines for C programming
2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3  *
4  * GScanner: Flexible lexical scanner for general purpose.
5  * Copyright (C) 1997, 1998 Tim Janik
6  *
7  * SPDX-License-Identifier: LGPL-2.1-or-later
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22
23 /*
24  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
25  * file for a list of people on the GLib Team.  See the ChangeLog
26  * files for a list of changes.  These files are distributed with
27  * GLib at ftp://ftp.gtk.org/pub/gtk/.
28  */
29
30 /*
31  * MT safe
32  */
33
34 #include "config.h"
35
36 #include <errno.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <string.h>
40 #include <stdio.h>
41
42 #include "gscanner.h"
43
44 #include "gprintfint.h"
45 #include "gstrfuncs.h"
46 #include "gstring.h"
47 #include "gtestutils.h"
48
49 #ifdef G_OS_UNIX
50 #include <unistd.h>
51 #endif
52 #ifdef G_OS_WIN32
53 #include <io.h>
54 #endif
55
56
57 /**
58  * SECTION:scanner
59  * @title: Lexical Scanner
60  * @short_description: a general purpose lexical scanner
61  *
62  * The #GScanner and its associated functions provide a
63  * general purpose lexical scanner.
64  */
65
66 /**
67  * GScannerMsgFunc:
68  * @scanner: a #GScanner
69  * @message: the message
70  * @error: %TRUE if the message signals an error,
71  *     %FALSE if it signals a warning.
72  *
73  * Specifies the type of the message handler function.
74  */
75
76 /**
77  * G_CSET_a_2_z:
78  *
79  * The set of lowercase ASCII alphabet characters.
80  * Used for specifying valid identifier characters
81  * in #GScannerConfig.
82  */
83
84 /**
85  * G_CSET_A_2_Z:
86  *
87  * The set of uppercase ASCII alphabet characters.
88  * Used for specifying valid identifier characters
89  * in #GScannerConfig.
90  */
91
92 /**
93  * G_CSET_DIGITS:
94  *
95  * The set of ASCII digits.
96  * Used for specifying valid identifier characters
97  * in #GScannerConfig.
98  */
99
100 /**
101  * G_CSET_LATINC:
102  *
103  * The set of uppercase ISO 8859-1 alphabet characters
104  * which are not ASCII characters.
105  * Used for specifying valid identifier characters
106  * in #GScannerConfig.
107  */
108
109 /**
110  * G_CSET_LATINS:
111  *
112  * The set of lowercase ISO 8859-1 alphabet characters
113  * which are not ASCII characters.
114  * Used for specifying valid identifier characters
115  * in #GScannerConfig.
116  */
117
118 /**
119  * GTokenType:
120  * @G_TOKEN_EOF: the end of the file
121  * @G_TOKEN_LEFT_PAREN: a '(' character
122  * @G_TOKEN_LEFT_CURLY: a '{' character
123  * @G_TOKEN_LEFT_BRACE: a '[' character
124  * @G_TOKEN_RIGHT_CURLY: a '}' character
125  * @G_TOKEN_RIGHT_PAREN: a ')' character
126  * @G_TOKEN_RIGHT_BRACE: a ']' character
127  * @G_TOKEN_EQUAL_SIGN: a '=' character
128  * @G_TOKEN_COMMA: a ',' character
129  * @G_TOKEN_NONE: not a token
130  * @G_TOKEN_ERROR: an error occurred
131  * @G_TOKEN_CHAR: a character
132  * @G_TOKEN_BINARY: a binary integer
133  * @G_TOKEN_OCTAL: an octal integer
134  * @G_TOKEN_INT: an integer
135  * @G_TOKEN_HEX: a hex integer
136  * @G_TOKEN_FLOAT: a floating point number
137  * @G_TOKEN_STRING: a string
138  * @G_TOKEN_SYMBOL: a symbol
139  * @G_TOKEN_IDENTIFIER: an identifier
140  * @G_TOKEN_IDENTIFIER_NULL: a null identifier
141  * @G_TOKEN_COMMENT_SINGLE: one line comment
142  * @G_TOKEN_COMMENT_MULTI: multi line comment
143  *
144  * The possible types of token returned from each
145  * g_scanner_get_next_token() call.
146  */
147
148 /**
149  * GTokenValue:
150  * @v_symbol: token symbol value
151  * @v_identifier: token identifier value
152  * @v_binary: token binary integer value
153  * @v_octal: octal integer value
154  * @v_int: integer value
155  * @v_int64: 64-bit integer value
156  * @v_float: floating point value
157  * @v_hex: hex integer value
158  * @v_string: string value
159  * @v_comment: comment value
160  * @v_char: character value
161  * @v_error: error value
162  *
163  * A union holding the value of the token.
164  */
165
166 /**
167  * GErrorType:
168  * @G_ERR_UNKNOWN: unknown error
169  * @G_ERR_UNEXP_EOF: unexpected end of file
170  * @G_ERR_UNEXP_EOF_IN_STRING: unterminated string constant
171  * @G_ERR_UNEXP_EOF_IN_COMMENT: unterminated comment
172  * @G_ERR_NON_DIGIT_IN_CONST: non-digit character in a number
173  * @G_ERR_DIGIT_RADIX: digit beyond radix in a number
174  * @G_ERR_FLOAT_RADIX: non-decimal floating point number
175  * @G_ERR_FLOAT_MALFORMED: malformed floating point number
176  *
177  * The possible errors, used in the @v_error field
178  * of #GTokenValue, when the token is a %G_TOKEN_ERROR.
179  */
180
181 /**
182  * GScanner:
183  * @user_data: unused
184  * @max_parse_errors: unused
185  * @parse_errors: g_scanner_error() increments this field
186  * @input_name: name of input stream, featured by the default message handler
187  * @qdata: quarked data
188  * @config: link into the scanner configuration
189  * @token: token parsed by the last g_scanner_get_next_token()
190  * @value: value of the last token from g_scanner_get_next_token()
191  * @line: line number of the last token from g_scanner_get_next_token()
192  * @position: char number of the last token from g_scanner_get_next_token()
193  * @next_token: token parsed by the last g_scanner_peek_next_token()
194  * @next_value: value of the last token from g_scanner_peek_next_token()
195  * @next_line: line number of the last token from g_scanner_peek_next_token()
196  * @next_position: char number of the last token from g_scanner_peek_next_token()
197  * @msg_handler: handler function for _warn and _error
198  *
199  * The data structure representing a lexical scanner.
200  *
201  * You should set @input_name after creating the scanner, since
202  * it is used by the default message handler when displaying
203  * warnings and errors. If you are scanning a file, the filename
204  * would be a good choice.
205  *
206  * The @user_data and @max_parse_errors fields are not used.
207  * If you need to associate extra data with the scanner you
208  * can place them here.
209  *
210  * If you want to use your own message handler you can set the
211  * @msg_handler field. The type of the message handler function
212  * is declared by #GScannerMsgFunc.
213  */
214
215 /**
216  * GScannerConfig:
217  * @cset_skip_characters: specifies which characters should be skipped
218  *     by the scanner (the default is the whitespace characters: space,
219  *     tab, carriage-return and line-feed).
220  * @cset_identifier_first: specifies the characters which can start
221  *     identifiers (the default is %G_CSET_a_2_z, "_", and %G_CSET_A_2_Z).
222  * @cset_identifier_nth: specifies the characters which can be used
223  *     in identifiers, after the first character (the default is
224  *     %G_CSET_a_2_z, "_0123456789", %G_CSET_A_2_Z, %G_CSET_LATINS,
225  *     %G_CSET_LATINC).
226  * @cpair_comment_single: specifies the characters at the start and
227  *     end of single-line comments. The default is "#\n" which means
228  *     that single-line comments start with a '#' and continue until
229  *     a '\n' (end of line).
230  * @case_sensitive: specifies if symbols are case sensitive (the
231  *     default is %FALSE).
232  * @skip_comment_multi: specifies if multi-line comments are skipped
233  *     and not returned as tokens (the default is %TRUE).
234  * @skip_comment_single: specifies if single-line comments are skipped
235  *     and not returned as tokens (the default is %TRUE).
236  * @scan_comment_multi: specifies if multi-line comments are recognized
237  *     (the default is %TRUE).
238  * @scan_identifier: specifies if identifiers are recognized (the
239  *     default is %TRUE).
240  * @scan_identifier_1char: specifies if single-character
241  *     identifiers are recognized (the default is %FALSE).
242  * @scan_identifier_NULL: specifies if %NULL is reported as
243  *     %G_TOKEN_IDENTIFIER_NULL (the default is %FALSE).
244  * @scan_symbols: specifies if symbols are recognized (the default
245  *     is %TRUE).
246  * @scan_binary: specifies if binary numbers are recognized (the
247  *     default is %FALSE).
248  * @scan_octal: specifies if octal numbers are recognized (the
249  *     default is %TRUE).
250  * @scan_float: specifies if floating point numbers are recognized
251  *     (the default is %TRUE).
252  * @scan_hex: specifies if hexadecimal numbers are recognized (the
253  *     default is %TRUE).
254  * @scan_hex_dollar: specifies if '$' is recognized as a prefix for
255  *     hexadecimal numbers (the default is %FALSE).
256  * @scan_string_sq: specifies if strings can be enclosed in single
257  *     quotes (the default is %TRUE).
258  * @scan_string_dq: specifies if strings can be enclosed in double
259  *     quotes (the default is %TRUE).
260  * @numbers_2_int: specifies if binary, octal and hexadecimal numbers
261  *     are reported as %G_TOKEN_INT (the default is %TRUE).
262  * @int_2_float: specifies if all numbers are reported as %G_TOKEN_FLOAT
263  *     (the default is %FALSE).
264  * @identifier_2_string: specifies if identifiers are reported as strings
265  *     (the default is %FALSE).
266  * @char_2_token: specifies if characters are reported by setting
267  *     `token = ch` or as %G_TOKEN_CHAR (the default is %TRUE).
268  * @symbol_2_token: specifies if symbols are reported by setting
269  *     `token = v_symbol` or as %G_TOKEN_SYMBOL (the default is %FALSE).
270  * @scope_0_fallback: specifies if a symbol is searched for in the
271  *     default scope in addition to the current scope (the default is %FALSE).
272  * @store_int64: use value.v_int64 rather than v_int
273  *
274  * Specifies the #GScanner parser configuration. Most settings can
275  * be changed during the parsing phase and will affect the lexical
276  * parsing of the next unpeeked token.
277  */
278
279 /* --- defines --- */
280 #define to_lower(c)                             ( \
281         (guchar) (                                                      \
282           ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) |  \
283           ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) |  \
284           ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) |  \
285           ((guchar)(c))                                                 \
286         )                                                               \
287 )
288 #define READ_BUFFER_SIZE        (4000)
289
290
291 /* --- typedefs --- */
292 typedef struct  _GScannerKey    GScannerKey;
293
294 struct  _GScannerKey
295 {
296   guint          scope_id;
297   gchar         *symbol;
298   gpointer       value;
299 };
300
301
302 /* --- variables --- */
303 static const GScannerConfig g_scanner_config_template =
304 {
305   (
306    " \t\r\n"
307    )                    /* cset_skip_characters */,
308   (
309    G_CSET_a_2_z
310    "_"
311    G_CSET_A_2_Z
312    )                    /* cset_identifier_first */,
313   (
314    G_CSET_a_2_z
315    "_"
316    G_CSET_A_2_Z
317    G_CSET_DIGITS
318    G_CSET_LATINS
319    G_CSET_LATINC
320    )                    /* cset_identifier_nth */,
321   ( "#\n" )             /* cpair_comment_single */,
322   
323   FALSE                 /* case_sensitive */,
324   
325   TRUE                  /* skip_comment_multi */,
326   TRUE                  /* skip_comment_single */,
327   TRUE                  /* scan_comment_multi */,
328   TRUE                  /* scan_identifier */,
329   FALSE                 /* scan_identifier_1char */,
330   FALSE                 /* scan_identifier_NULL */,
331   TRUE                  /* scan_symbols */,
332   FALSE                 /* scan_binary */,
333   TRUE                  /* scan_octal */,
334   TRUE                  /* scan_float */,
335   TRUE                  /* scan_hex */,
336   FALSE                 /* scan_hex_dollar */,
337   TRUE                  /* scan_string_sq */,
338   TRUE                  /* scan_string_dq */,
339   TRUE                  /* numbers_2_int */,
340   FALSE                 /* int_2_float */,
341   FALSE                 /* identifier_2_string */,
342   TRUE                  /* char_2_token */,
343   FALSE                 /* symbol_2_token */,
344   FALSE                 /* scope_0_fallback */,
345   FALSE                 /* store_int64 */,
346   0                     /* padding_dummy */
347 };
348
349
350 /* --- prototypes --- */
351 static inline
352 GScannerKey*    g_scanner_lookup_internal (GScanner     *scanner,
353                                            guint         scope_id,
354                                            const gchar  *symbol);
355 static gboolean g_scanner_key_equal       (gconstpointer v1,
356                                            gconstpointer v2);
357 static guint    g_scanner_key_hash        (gconstpointer v);
358 static void     g_scanner_get_token_ll    (GScanner     *scanner,
359                                            GTokenType   *token_p,
360                                            GTokenValue  *value_p,
361                                            guint        *line_p,
362                                            guint        *position_p);
363 static void     g_scanner_get_token_i     (GScanner     *scanner,
364                                            GTokenType   *token_p,
365                                            GTokenValue  *value_p,
366                                            guint        *line_p,
367                                            guint        *position_p);
368
369 static guchar   g_scanner_peek_next_char  (GScanner     *scanner);
370 static guchar   g_scanner_get_char        (GScanner     *scanner,
371                                            guint        *line_p,
372                                            guint        *position_p);
373 static void     g_scanner_msg_handler     (GScanner     *scanner,
374                                            gchar        *message,
375                                            gboolean      is_error);
376
377
378 /* --- functions --- */
379 static inline gint
380 g_scanner_char_2_num (guchar    c,
381                       guchar    base)
382 {
383   if (c >= '0' && c <= '9')
384     c -= '0';
385   else if (c >= 'A' && c <= 'Z')
386     c -= 'A' - 10;
387   else if (c >= 'a' && c <= 'z')
388     c -= 'a' - 10;
389   else
390     return -1;
391   
392   if (c < base)
393     return c;
394   
395   return -1;
396 }
397
398 /**
399  * g_scanner_new:
400  * @config_templ: the initial scanner settings
401  *
402  * Creates a new #GScanner.
403  *
404  * The @config_templ structure specifies the initial settings
405  * of the scanner, which are copied into the #GScanner
406  * @config field. If you pass %NULL then the default settings
407  * are used.
408  *
409  * Returns: the new #GScanner
410  */
411 GScanner *
412 g_scanner_new (const GScannerConfig *config_templ)
413 {
414   GScanner *scanner;
415   
416   if (!config_templ)
417     config_templ = &g_scanner_config_template;
418   
419   scanner = g_new0 (GScanner, 1);
420   
421   scanner->user_data = NULL;
422   scanner->max_parse_errors = 1;
423   scanner->parse_errors = 0;
424   scanner->input_name = NULL;
425   g_datalist_init (&scanner->qdata);
426   
427   scanner->config = g_new0 (GScannerConfig, 1);
428   
429   scanner->config->case_sensitive        = config_templ->case_sensitive;
430   scanner->config->cset_skip_characters  = config_templ->cset_skip_characters;
431   if (!scanner->config->cset_skip_characters)
432     scanner->config->cset_skip_characters = "";
433   scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
434   scanner->config->cset_identifier_nth   = config_templ->cset_identifier_nth;
435   scanner->config->cpair_comment_single  = config_templ->cpair_comment_single;
436   scanner->config->skip_comment_multi    = config_templ->skip_comment_multi;
437   scanner->config->skip_comment_single   = config_templ->skip_comment_single;
438   scanner->config->scan_comment_multi    = config_templ->scan_comment_multi;
439   scanner->config->scan_identifier       = config_templ->scan_identifier;
440   scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
441   scanner->config->scan_identifier_NULL  = config_templ->scan_identifier_NULL;
442   scanner->config->scan_symbols          = config_templ->scan_symbols;
443   scanner->config->scan_binary           = config_templ->scan_binary;
444   scanner->config->scan_octal            = config_templ->scan_octal;
445   scanner->config->scan_float            = config_templ->scan_float;
446   scanner->config->scan_hex              = config_templ->scan_hex;
447   scanner->config->scan_hex_dollar       = config_templ->scan_hex_dollar;
448   scanner->config->scan_string_sq        = config_templ->scan_string_sq;
449   scanner->config->scan_string_dq        = config_templ->scan_string_dq;
450   scanner->config->numbers_2_int         = config_templ->numbers_2_int;
451   scanner->config->int_2_float           = config_templ->int_2_float;
452   scanner->config->identifier_2_string   = config_templ->identifier_2_string;
453   scanner->config->char_2_token          = config_templ->char_2_token;
454   scanner->config->symbol_2_token        = config_templ->symbol_2_token;
455   scanner->config->scope_0_fallback      = config_templ->scope_0_fallback;
456   scanner->config->store_int64           = config_templ->store_int64;
457   
458   scanner->token = G_TOKEN_NONE;
459   scanner->value.v_int64 = 0;
460   scanner->line = 1;
461   scanner->position = 0;
462   
463   scanner->next_token = G_TOKEN_NONE;
464   scanner->next_value.v_int64 = 0;
465   scanner->next_line = 1;
466   scanner->next_position = 0;
467   
468   scanner->symbol_table = g_hash_table_new (g_scanner_key_hash, g_scanner_key_equal);
469   scanner->input_fd = -1;
470   scanner->text = NULL;
471   scanner->text_end = NULL;
472   scanner->buffer = NULL;
473   scanner->scope_id = 0;
474   
475   scanner->msg_handler = g_scanner_msg_handler;
476   
477   return scanner;
478 }
479
480 static inline void
481 g_scanner_free_value (GTokenType     *token_p,
482                       GTokenValue     *value_p)
483 {
484   switch (*token_p)
485     {
486     case G_TOKEN_STRING:
487     case G_TOKEN_IDENTIFIER:
488     case G_TOKEN_IDENTIFIER_NULL:
489     case G_TOKEN_COMMENT_SINGLE:
490     case G_TOKEN_COMMENT_MULTI:
491       g_free (value_p->v_string);
492       break;
493       
494     default:
495       break;
496     }
497   
498   *token_p = G_TOKEN_NONE;
499 }
500
501 static void
502 g_scanner_destroy_symbol_table_entry (gpointer _key,
503                                       gpointer _value,
504                                       gpointer _data)
505 {
506   GScannerKey *key = _key;
507   
508   g_free (key->symbol);
509   g_free (key);
510 }
511
512 /**
513  * g_scanner_destroy:
514  * @scanner: a #GScanner
515  *
516  * Frees all memory used by the #GScanner.
517  */
518 void
519 g_scanner_destroy (GScanner *scanner)
520 {
521   g_return_if_fail (scanner != NULL);
522   
523   g_datalist_clear (&scanner->qdata);
524   g_hash_table_foreach (scanner->symbol_table, 
525                         g_scanner_destroy_symbol_table_entry, NULL);
526   g_hash_table_destroy (scanner->symbol_table);
527   g_scanner_free_value (&scanner->token, &scanner->value);
528   g_scanner_free_value (&scanner->next_token, &scanner->next_value);
529   g_free (scanner->config);
530   g_free (scanner->buffer);
531   g_free (scanner);
532 }
533
534 static void
535 g_scanner_msg_handler (GScanner         *scanner,
536                        gchar            *message,
537                        gboolean         is_error)
538 {
539   g_return_if_fail (scanner != NULL);
540   
541   _g_fprintf (stderr, "%s:%d: ",
542               scanner->input_name ? scanner->input_name : "<memory>",
543               scanner->line);
544   if (is_error)
545     _g_fprintf (stderr, "error: ");
546   _g_fprintf (stderr, "%s\n", message);
547 }
548
549 /**
550  * g_scanner_error:
551  * @scanner: a #GScanner
552  * @format: the message format. See the printf() documentation
553  * @...: the parameters to insert into the format string
554  *
555  * Outputs an error message, via the #GScanner message handler.
556  */
557 void
558 g_scanner_error (GScanner       *scanner,
559                  const gchar    *format,
560                  ...)
561 {
562   g_return_if_fail (scanner != NULL);
563   g_return_if_fail (format != NULL);
564   
565   scanner->parse_errors++;
566   
567   if (scanner->msg_handler)
568     {
569       va_list args;
570       gchar *string;
571       
572       va_start (args, format);
573       string = g_strdup_vprintf (format, args);
574       va_end (args);
575       
576       scanner->msg_handler (scanner, string, TRUE);
577       
578       g_free (string);
579     }
580 }
581
582 /**
583  * g_scanner_warn:
584  * @scanner: a #GScanner
585  * @format: the message format. See the printf() documentation
586  * @...: the parameters to insert into the format string
587  *
588  * Outputs a warning message, via the #GScanner message handler.
589  */
590 void
591 g_scanner_warn (GScanner       *scanner,
592                 const gchar    *format,
593                 ...)
594 {
595   g_return_if_fail (scanner != NULL);
596   g_return_if_fail (format != NULL);
597   
598   if (scanner->msg_handler)
599     {
600       va_list args;
601       gchar *string;
602       
603       va_start (args, format);
604       string = g_strdup_vprintf (format, args);
605       va_end (args);
606       
607       scanner->msg_handler (scanner, string, FALSE);
608       
609       g_free (string);
610     }
611 }
612
613 static gboolean
614 g_scanner_key_equal (gconstpointer v1,
615                      gconstpointer v2)
616 {
617   const GScannerKey *key1 = v1;
618   const GScannerKey *key2 = v2;
619   
620   return (key1->scope_id == key2->scope_id) && (strcmp (key1->symbol, key2->symbol) == 0);
621 }
622
623 static guint
624 g_scanner_key_hash (gconstpointer v)
625 {
626   const GScannerKey *key = v;
627   gchar *c;
628   guint h;
629   
630   h = key->scope_id;
631   for (c = key->symbol; *c; c++)
632     h = (h << 5) - h + *c;
633   
634   return h;
635 }
636
637 static inline GScannerKey*
638 g_scanner_lookup_internal (GScanner     *scanner,
639                            guint         scope_id,
640                            const gchar  *symbol)
641 {
642   GScannerKey   *key_p;
643   GScannerKey key;
644   
645   key.scope_id = scope_id;
646   
647   if (!scanner->config->case_sensitive)
648     {
649       gchar *d;
650       const gchar *c;
651       
652       key.symbol = g_new (gchar, strlen (symbol) + 1);
653       for (d = key.symbol, c = symbol; *c; c++, d++)
654         *d = to_lower (*c);
655       *d = 0;
656       key_p = g_hash_table_lookup (scanner->symbol_table, &key);
657       g_free (key.symbol);
658     }
659   else
660     {
661       key.symbol = (gchar*) symbol;
662       key_p = g_hash_table_lookup (scanner->symbol_table, &key);
663     }
664   
665   return key_p;
666 }
667
668 /**
669  * g_scanner_add_symbol:
670  * @scanner: a #GScanner
671  * @symbol: the symbol to add
672  * @value: the value of the symbol
673  *
674  * Adds a symbol to the default scope.
675  *
676  * Deprecated: 2.2: Use g_scanner_scope_add_symbol() instead.
677  */
678
679 /**
680  * g_scanner_scope_add_symbol:
681  * @scanner: a #GScanner
682  * @scope_id: the scope id
683  * @symbol: the symbol to add
684  * @value: the value of the symbol
685  *
686  * Adds a symbol to the given scope.
687  */
688 void
689 g_scanner_scope_add_symbol (GScanner    *scanner,
690                             guint        scope_id,
691                             const gchar *symbol,
692                             gpointer     value)
693 {
694   GScannerKey   *key;
695   
696   g_return_if_fail (scanner != NULL);
697   g_return_if_fail (symbol != NULL);
698   
699   key = g_scanner_lookup_internal (scanner, scope_id, symbol);
700   
701   if (!key)
702     {
703       key = g_new (GScannerKey, 1);
704       key->scope_id = scope_id;
705       key->symbol = g_strdup (symbol);
706       key->value = value;
707       if (!scanner->config->case_sensitive)
708         {
709           gchar *c;
710           
711           c = key->symbol;
712           while (*c != 0)
713             {
714               *c = to_lower (*c);
715               c++;
716             }
717         }
718       g_hash_table_add (scanner->symbol_table, key);
719     }
720   else
721     key->value = value;
722 }
723
724 /**
725  * g_scanner_remove_symbol:
726  * @scanner: a #GScanner
727  * @symbol: the symbol to remove
728  *
729  * Removes a symbol from the default scope.
730  *
731  * Deprecated: 2.2: Use g_scanner_scope_remove_symbol() instead.
732  */
733
734 /**
735  * g_scanner_scope_remove_symbol:
736  * @scanner: a #GScanner
737  * @scope_id: the scope id
738  * @symbol: the symbol to remove
739  *
740  * Removes a symbol from a scope.
741  */
742 void
743 g_scanner_scope_remove_symbol (GScanner    *scanner,
744                                guint        scope_id,
745                                const gchar *symbol)
746 {
747   GScannerKey   *key;
748   
749   g_return_if_fail (scanner != NULL);
750   g_return_if_fail (symbol != NULL);
751   
752   key = g_scanner_lookup_internal (scanner, scope_id, symbol);
753   
754   if (key)
755     {
756       g_hash_table_remove (scanner->symbol_table, key);
757       g_free (key->symbol);
758       g_free (key);
759     }
760 }
761
762 /**
763  * g_scanner_freeze_symbol_table:
764  * @scanner: a #GScanner
765  *
766  * There is no reason to use this macro, since it does nothing.
767  *
768  * Deprecated: 2.2: This macro does nothing.
769  */
770
771 /**
772  * g_scanner_thaw_symbol_table:
773  * @scanner: a #GScanner
774  *
775  * There is no reason to use this macro, since it does nothing.
776  *
777  * Deprecated: 2.2: This macro does nothing.
778  */
779
780 /**
781  * g_scanner_lookup_symbol:
782  * @scanner: a #GScanner
783  * @symbol: the symbol to look up
784  *
785  * Looks up a symbol in the current scope and return its value.
786  * If the symbol is not bound in the current scope, %NULL is
787  * returned.
788  *
789  * Returns: the value of @symbol in the current scope, or %NULL
790  *     if @symbol is not bound in the current scope
791  */
792 gpointer
793 g_scanner_lookup_symbol (GScanner       *scanner,
794                          const gchar    *symbol)
795 {
796   GScannerKey   *key;
797   guint scope_id;
798   
799   g_return_val_if_fail (scanner != NULL, NULL);
800   
801   if (!symbol)
802     return NULL;
803   
804   scope_id = scanner->scope_id;
805   key = g_scanner_lookup_internal (scanner, scope_id, symbol);
806   if (!key && scope_id && scanner->config->scope_0_fallback)
807     key = g_scanner_lookup_internal (scanner, 0, symbol);
808   
809   if (key)
810     return key->value;
811   else
812     return NULL;
813 }
814
815 /**
816  * g_scanner_scope_lookup_symbol:
817  * @scanner: a #GScanner
818  * @scope_id: the scope id
819  * @symbol: the symbol to look up
820  *
821  * Looks up a symbol in a scope and return its value. If the
822  * symbol is not bound in the scope, %NULL is returned.
823  *
824  * Returns: the value of @symbol in the given scope, or %NULL
825  *     if @symbol is not bound in the given scope.
826  *
827  */
828 gpointer
829 g_scanner_scope_lookup_symbol (GScanner       *scanner,
830                                guint           scope_id,
831                                const gchar    *symbol)
832 {
833   GScannerKey   *key;
834   
835   g_return_val_if_fail (scanner != NULL, NULL);
836   
837   if (!symbol)
838     return NULL;
839   
840   key = g_scanner_lookup_internal (scanner, scope_id, symbol);
841   
842   if (key)
843     return key->value;
844   else
845     return NULL;
846 }
847
848 /**
849  * g_scanner_set_scope:
850  * @scanner: a #GScanner
851  * @scope_id: the new scope id
852  *
853  * Sets the current scope.
854  *
855  * Returns: the old scope id
856  */
857 guint
858 g_scanner_set_scope (GScanner       *scanner,
859                      guint           scope_id)
860 {
861   guint old_scope_id;
862   
863   g_return_val_if_fail (scanner != NULL, 0);
864   
865   old_scope_id = scanner->scope_id;
866   scanner->scope_id = scope_id;
867   
868   return old_scope_id;
869 }
870
871 static void
872 g_scanner_foreach_internal (gpointer  _key,
873                             gpointer  _value,
874                             gpointer  _user_data)
875 {
876   GScannerKey *key;
877   gpointer *d;
878   GHFunc func;
879   gpointer user_data;
880   guint *scope_id;
881   
882   d = _user_data;
883   func = (GHFunc) d[0];
884   user_data = d[1];
885   scope_id = d[2];
886   key = _value;
887   
888   if (key->scope_id == *scope_id)
889     func (key->symbol, key->value, user_data);
890 }
891
892 /**
893  * g_scanner_foreach_symbol:
894  * @scanner: a #GScanner
895  * @func: the function to call with each symbol
896  * @data: data to pass to the function
897  *
898  * Calls a function for each symbol in the default scope.
899  *
900  * Deprecated: 2.2: Use g_scanner_scope_foreach_symbol() instead.
901  */
902
903 /**
904  * g_scanner_scope_foreach_symbol:
905  * @scanner: a #GScanner
906  * @scope_id: the scope id
907  * @func: the function to call for each symbol/value pair
908  * @user_data: user data to pass to the function
909  *
910  * Calls the given function for each of the symbol/value pairs
911  * in the given scope of the #GScanner. The function is passed
912  * the symbol and value of each pair, and the given @user_data
913  * parameter.
914  */
915 void
916 g_scanner_scope_foreach_symbol (GScanner       *scanner,
917                                 guint           scope_id,
918                                 GHFunc          func,
919                                 gpointer        user_data)
920 {
921   gpointer d[3];
922   
923   g_return_if_fail (scanner != NULL);
924   
925   d[0] = (gpointer) func;
926   d[1] = user_data;
927   d[2] = &scope_id;
928   
929   g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d);
930 }
931
932 /**
933  * g_scanner_peek_next_token:
934  * @scanner: a #GScanner
935  *
936  * Parses the next token, without removing it from the input stream.
937  * The token data is placed in the @next_token, @next_value, @next_line,
938  * and @next_position fields of the #GScanner structure.
939  *
940  * Note that, while the token is not removed from the input stream
941  * (i.e. the next call to g_scanner_get_next_token() will return the
942  * same token), it will not be reevaluated. This can lead to surprising
943  * results when changing scope or the scanner configuration after peeking
944  * the next token. Getting the next token after switching the scope or
945  * configuration will return whatever was peeked before, regardless of
946  * any symbols that may have been added or removed in the new scope.
947  *
948  * Returns: the type of the token
949  */
950 GTokenType
951 g_scanner_peek_next_token (GScanner     *scanner)
952 {
953   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
954   
955   if (scanner->next_token == G_TOKEN_NONE)
956     {
957       scanner->next_line = scanner->line;
958       scanner->next_position = scanner->position;
959       g_scanner_get_token_i (scanner,
960                              &scanner->next_token,
961                              &scanner->next_value,
962                              &scanner->next_line,
963                              &scanner->next_position);
964     }
965   
966   return scanner->next_token;
967 }
968
969 /**
970  * g_scanner_get_next_token:
971  * @scanner: a #GScanner
972  *
973  * Parses the next token just like g_scanner_peek_next_token()
974  * and also removes it from the input stream. The token data is
975  * placed in the @token, @value, @line, and @position fields of
976  * the #GScanner structure.
977  *
978  * Returns: the type of the token
979  */
980 GTokenType
981 g_scanner_get_next_token (GScanner      *scanner)
982 {
983   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
984   
985   if (scanner->next_token != G_TOKEN_NONE)
986     {
987       g_scanner_free_value (&scanner->token, &scanner->value);
988       
989       scanner->token = scanner->next_token;
990       scanner->value = scanner->next_value;
991       scanner->line = scanner->next_line;
992       scanner->position = scanner->next_position;
993       scanner->next_token = G_TOKEN_NONE;
994     }
995   else
996     g_scanner_get_token_i (scanner,
997                            &scanner->token,
998                            &scanner->value,
999                            &scanner->line,
1000                            &scanner->position);
1001   
1002   return scanner->token;
1003 }
1004
1005 /**
1006  * g_scanner_cur_token:
1007  * @scanner: a #GScanner
1008  *
1009  * Gets the current token type. This is simply the @token
1010  * field in the #GScanner structure.
1011  *
1012  * Returns: the current token type
1013  */
1014 GTokenType
1015 g_scanner_cur_token (GScanner *scanner)
1016 {
1017   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
1018   
1019   return scanner->token;
1020 }
1021
1022 /**
1023  * g_scanner_cur_value:
1024  * @scanner: a #GScanner
1025  *
1026  * Gets the current token value. This is simply the @value
1027  * field in the #GScanner structure.
1028  *
1029  * Returns: the current token value
1030  */
1031 GTokenValue
1032 g_scanner_cur_value (GScanner *scanner)
1033 {
1034   GTokenValue v;
1035   
1036   v.v_int64 = 0;
1037   
1038   g_return_val_if_fail (scanner != NULL, v);
1039
1040   /* MSC isn't capable of handling return scanner->value; ? */
1041
1042   v = scanner->value;
1043
1044   return v;
1045 }
1046
1047 /**
1048  * g_scanner_cur_line:
1049  * @scanner: a #GScanner
1050  *
1051  * Returns the current line in the input stream (counting
1052  * from 1). This is the line of the last token parsed via
1053  * g_scanner_get_next_token().
1054  *
1055  * Returns: the current line
1056  */
1057 guint
1058 g_scanner_cur_line (GScanner *scanner)
1059 {
1060   g_return_val_if_fail (scanner != NULL, 0);
1061   
1062   return scanner->line;
1063 }
1064
1065 /**
1066  * g_scanner_cur_position:
1067  * @scanner: a #GScanner
1068  *
1069  * Returns the current position in the current line (counting
1070  * from 0). This is the position of the last token parsed via
1071  * g_scanner_get_next_token().
1072  *
1073  * Returns: the current position on the line
1074  */
1075 guint
1076 g_scanner_cur_position (GScanner *scanner)
1077 {
1078   g_return_val_if_fail (scanner != NULL, 0);
1079   
1080   return scanner->position;
1081 }
1082
1083 /**
1084  * g_scanner_eof:
1085  * @scanner: a #GScanner
1086  *
1087  * Returns %TRUE if the scanner has reached the end of
1088  * the file or text buffer.
1089  *
1090  * Returns: %TRUE if the scanner has reached the end of
1091  *     the file or text buffer
1092  */
1093 gboolean
1094 g_scanner_eof (GScanner *scanner)
1095 {
1096   g_return_val_if_fail (scanner != NULL, TRUE);
1097   
1098   return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR;
1099 }
1100
1101 /**
1102  * g_scanner_input_file:
1103  * @scanner: a #GScanner
1104  * @input_fd: a file descriptor
1105  *
1106  * Prepares to scan a file.
1107  */
1108 void
1109 g_scanner_input_file (GScanner *scanner,
1110                       gint      input_fd)
1111 {
1112   g_return_if_fail (scanner != NULL);
1113   g_return_if_fail (input_fd >= 0);
1114
1115   if (scanner->input_fd >= 0)
1116     g_scanner_sync_file_offset (scanner);
1117
1118   scanner->token = G_TOKEN_NONE;
1119   scanner->value.v_int64 = 0;
1120   scanner->line = 1;
1121   scanner->position = 0;
1122   scanner->next_token = G_TOKEN_NONE;
1123
1124   scanner->input_fd = input_fd;
1125   scanner->text = NULL;
1126   scanner->text_end = NULL;
1127
1128   if (!scanner->buffer)
1129     scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
1130 }
1131
1132 /**
1133  * g_scanner_input_text:
1134  * @scanner: a #GScanner
1135  * @text: the text buffer to scan
1136  * @text_len: the length of the text buffer
1137  *
1138  * Prepares to scan a text buffer.
1139  */
1140 void
1141 g_scanner_input_text (GScanner    *scanner,
1142                       const gchar *text,
1143                       guint        text_len)
1144 {
1145   g_return_if_fail (scanner != NULL);
1146   if (text_len)
1147     g_return_if_fail (text != NULL);
1148   else
1149     text = NULL;
1150
1151   if (scanner->input_fd >= 0)
1152     g_scanner_sync_file_offset (scanner);
1153
1154   scanner->token = G_TOKEN_NONE;
1155   scanner->value.v_int64 = 0;
1156   scanner->line = 1;
1157   scanner->position = 0;
1158   scanner->next_token = G_TOKEN_NONE;
1159
1160   scanner->input_fd = -1;
1161   scanner->text = text;
1162   scanner->text_end = text + text_len;
1163
1164   if (scanner->buffer)
1165     {
1166       g_free (scanner->buffer);
1167       scanner->buffer = NULL;
1168     }
1169 }
1170
1171 static guchar
1172 g_scanner_peek_next_char (GScanner *scanner)
1173 {
1174   if (scanner->text < scanner->text_end)
1175     {
1176       return *scanner->text;
1177     }
1178   else if (scanner->input_fd >= 0)
1179     {
1180       gint count;
1181       gchar *buffer;
1182
1183       buffer = scanner->buffer;
1184       do
1185         {
1186           count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
1187         }
1188       while (count == -1 && (errno == EINTR || errno == EAGAIN));
1189
1190       if (count < 1)
1191         {
1192           scanner->input_fd = -1;
1193
1194           return 0;
1195         }
1196       else
1197         {
1198           scanner->text = buffer;
1199           scanner->text_end = buffer + count;
1200
1201           return *buffer;
1202         }
1203     }
1204   else
1205     return 0;
1206 }
1207
1208 /**
1209  * g_scanner_sync_file_offset:
1210  * @scanner: a #GScanner
1211  *
1212  * Rewinds the filedescriptor to the current buffer position
1213  * and blows the file read ahead buffer. This is useful for
1214  * third party uses of the scanners filedescriptor, which hooks
1215  * onto the current scanning position.
1216  */
1217 void
1218 g_scanner_sync_file_offset (GScanner *scanner)
1219 {
1220   g_return_if_fail (scanner != NULL);
1221
1222   /* for file input, rewind the filedescriptor to the current
1223    * buffer position and blow the file read ahead buffer. useful
1224    * for third party uses of our file descriptor, which hooks 
1225    * onto the current scanning position.
1226    */
1227
1228   if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
1229     {
1230       gint buffered;
1231
1232       buffered = scanner->text_end - scanner->text;
1233       if (lseek (scanner->input_fd, - buffered, SEEK_CUR) >= 0)
1234         {
1235           /* we succeeded, blow our buffer's contents now */
1236           scanner->text = NULL;
1237           scanner->text_end = NULL;
1238         }
1239       else
1240         errno = 0;
1241     }
1242 }
1243
1244 static guchar
1245 g_scanner_get_char (GScanner    *scanner,
1246                     guint       *line_p,
1247                     guint       *position_p)
1248 {
1249   guchar fchar;
1250
1251   if (scanner->text < scanner->text_end)
1252     fchar = *(scanner->text++);
1253   else if (scanner->input_fd >= 0)
1254     {
1255       gint count;
1256       gchar *buffer;
1257
1258       buffer = scanner->buffer;
1259       do
1260         {
1261           count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
1262         }
1263       while (count == -1 && (errno == EINTR || errno == EAGAIN));
1264
1265       if (count < 1)
1266         {
1267           scanner->input_fd = -1;
1268           fchar = 0;
1269         }
1270       else
1271         {
1272           scanner->text = buffer + 1;
1273           scanner->text_end = buffer + count;
1274           fchar = *buffer;
1275           if (!fchar)
1276             {
1277               g_scanner_sync_file_offset (scanner);
1278               scanner->text_end = scanner->text;
1279               scanner->input_fd = -1;
1280             }
1281         }
1282     }
1283   else
1284     fchar = 0;
1285   
1286   if (fchar == '\n')
1287     {
1288       (*position_p) = 0;
1289       (*line_p)++;
1290     }
1291   else if (fchar)
1292     {
1293       (*position_p)++;
1294     }
1295   
1296   return fchar;
1297 }
1298
1299 /**
1300  * g_scanner_unexp_token:
1301  * @scanner: a #GScanner
1302  * @expected_token: the expected token
1303  * @identifier_spec: a string describing how the scanner's user
1304  *     refers to identifiers (%NULL defaults to "identifier").
1305  *     This is used if @expected_token is %G_TOKEN_IDENTIFIER or
1306  *     %G_TOKEN_IDENTIFIER_NULL.
1307  * @symbol_spec: a string describing how the scanner's user refers
1308  *     to symbols (%NULL defaults to "symbol"). This is used if
1309  *     @expected_token is %G_TOKEN_SYMBOL or any token value greater
1310  *     than %G_TOKEN_LAST.
1311  * @symbol_name: the name of the symbol, if the scanner's current
1312  *     token is a symbol.
1313  * @message: a message string to output at the end of the
1314  *     warning/error, or %NULL.
1315  * @is_error: if %TRUE it is output as an error. If %FALSE it is
1316  *     output as a warning.
1317  *
1318  * Outputs a message through the scanner's msg_handler,
1319  * resulting from an unexpected token in the input stream.
1320  * Note that you should not call g_scanner_peek_next_token()
1321  * followed by g_scanner_unexp_token() without an intermediate
1322  * call to g_scanner_get_next_token(), as g_scanner_unexp_token()
1323  * evaluates the scanner's current token (not the peeked token)
1324  * to construct part of the message.
1325  */
1326 void
1327 g_scanner_unexp_token (GScanner         *scanner,
1328                        GTokenType        expected_token,
1329                        const gchar      *identifier_spec,
1330                        const gchar      *symbol_spec,
1331                        const gchar      *symbol_name,
1332                        const gchar      *message,
1333                        gint              is_error)
1334 {
1335   gchar *token_string;
1336   guint token_string_len;
1337   gchar *expected_string;
1338   guint expected_string_len;
1339   gchar *message_prefix;
1340   gboolean print_unexp;
1341   void (*msg_handler)   (GScanner*, const gchar*, ...);
1342   
1343   g_return_if_fail (scanner != NULL);
1344   
1345   if (is_error)
1346     msg_handler = g_scanner_error;
1347   else
1348     msg_handler = g_scanner_warn;
1349   
1350   if (!identifier_spec)
1351     identifier_spec = "identifier";
1352   if (!symbol_spec)
1353     symbol_spec = "symbol";
1354   
1355   token_string_len = 56;
1356   token_string = g_new (gchar, token_string_len + 1);
1357   expected_string_len = 64;
1358   expected_string = g_new (gchar, expected_string_len + 1);
1359   print_unexp = TRUE;
1360   
1361   switch (scanner->token)
1362     {
1363     case G_TOKEN_EOF:
1364       _g_snprintf (token_string, token_string_len, "end of file");
1365       break;
1366       
1367     default:
1368       if (scanner->token >= 1 && scanner->token <= 255)
1369         {
1370           if ((scanner->token >= ' ' && scanner->token <= '~') ||
1371               strchr (scanner->config->cset_identifier_first, scanner->token) ||
1372               strchr (scanner->config->cset_identifier_nth, scanner->token))
1373             _g_snprintf (token_string, token_string_len, "character '%c'", scanner->token);
1374           else
1375             _g_snprintf (token_string, token_string_len, "character '\\%o'", scanner->token);
1376           break;
1377         }
1378       else if (!scanner->config->symbol_2_token)
1379         {
1380           _g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
1381           break;
1382         }
1383       G_GNUC_FALLTHROUGH;
1384     case G_TOKEN_SYMBOL:
1385       if (expected_token == G_TOKEN_SYMBOL ||
1386           (scanner->config->symbol_2_token &&
1387            expected_token > G_TOKEN_LAST))
1388         print_unexp = FALSE;
1389       if (symbol_name)
1390         _g_snprintf (token_string,
1391                      token_string_len,
1392                      "%s%s '%s'",
1393                      print_unexp ? "" : "invalid ",
1394                      symbol_spec,
1395                      symbol_name);
1396       else
1397         _g_snprintf (token_string,
1398                      token_string_len,
1399                      "%s%s",
1400                      print_unexp ? "" : "invalid ",
1401                      symbol_spec);
1402       break;
1403       
1404     case G_TOKEN_ERROR:
1405       print_unexp = FALSE;
1406       expected_token = G_TOKEN_NONE;
1407       switch (scanner->value.v_error)
1408         {
1409         case G_ERR_UNEXP_EOF:
1410           _g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
1411           break;
1412           
1413         case G_ERR_UNEXP_EOF_IN_STRING:
1414           _g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
1415           break;
1416           
1417         case G_ERR_UNEXP_EOF_IN_COMMENT:
1418           _g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
1419           break;
1420           
1421         case G_ERR_NON_DIGIT_IN_CONST:
1422           _g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
1423           break;
1424           
1425         case G_ERR_FLOAT_RADIX:
1426           _g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
1427           break;
1428           
1429         case G_ERR_FLOAT_MALFORMED:
1430           _g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
1431           break;
1432           
1433         case G_ERR_DIGIT_RADIX:
1434           _g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
1435           break;
1436           
1437         case G_ERR_UNKNOWN:
1438         default:
1439           _g_snprintf (token_string, token_string_len, "scanner: unknown error");
1440           break;
1441         }
1442       break;
1443       
1444     case G_TOKEN_CHAR:
1445       _g_snprintf (token_string, token_string_len, "character '%c'", scanner->value.v_char);
1446       break;
1447       
1448     case G_TOKEN_IDENTIFIER:
1449     case G_TOKEN_IDENTIFIER_NULL:
1450       if (expected_token == G_TOKEN_IDENTIFIER ||
1451           expected_token == G_TOKEN_IDENTIFIER_NULL)
1452         print_unexp = FALSE;
1453       _g_snprintf (token_string,
1454                   token_string_len,
1455                   "%s%s '%s'",
1456                   print_unexp ? "" : "invalid ",
1457                   identifier_spec,
1458                   scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
1459       break;
1460       
1461     case G_TOKEN_BINARY:
1462     case G_TOKEN_OCTAL:
1463     case G_TOKEN_INT:
1464     case G_TOKEN_HEX:
1465       if (scanner->config->store_int64)
1466         _g_snprintf (token_string, token_string_len, "number '%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
1467       else
1468         _g_snprintf (token_string, token_string_len, "number '%lu'", scanner->value.v_int);
1469       break;
1470       
1471     case G_TOKEN_FLOAT:
1472       _g_snprintf (token_string, token_string_len, "number '%.3f'", scanner->value.v_float);
1473       break;
1474       
1475     case G_TOKEN_STRING:
1476       if (expected_token == G_TOKEN_STRING)
1477         print_unexp = FALSE;
1478       _g_snprintf (token_string,
1479                    token_string_len,
1480                    "%s%sstring constant \"%s\"",
1481                    print_unexp ? "" : "invalid ",
1482                    scanner->value.v_string[0] == 0 ? "empty " : "",
1483                    scanner->value.v_string);
1484       token_string[token_string_len - 2] = '"';
1485       token_string[token_string_len - 1] = 0;
1486       break;
1487       
1488     case G_TOKEN_COMMENT_SINGLE:
1489     case G_TOKEN_COMMENT_MULTI:
1490       _g_snprintf (token_string, token_string_len, "comment");
1491       break;
1492       
1493     case G_TOKEN_NONE:
1494       /* somehow the user's parsing code is screwed, there isn't much
1495        * we can do about it.
1496        * Note, a common case to trigger this is
1497        * g_scanner_peek_next_token(); g_scanner_unexp_token();
1498        * without an intermediate g_scanner_get_next_token().
1499        */
1500       g_assert_not_reached ();
1501       break;
1502     }
1503   
1504   
1505   switch (expected_token)
1506     {
1507       gboolean need_valid;
1508       gchar *tstring;
1509     case G_TOKEN_EOF:
1510       _g_snprintf (expected_string, expected_string_len, "end of file");
1511       break;
1512     default:
1513       if (expected_token >= 1 && expected_token <= 255)
1514         {
1515           if ((expected_token >= ' ' && expected_token <= '~') ||
1516               strchr (scanner->config->cset_identifier_first, expected_token) ||
1517               strchr (scanner->config->cset_identifier_nth, expected_token))
1518             _g_snprintf (expected_string, expected_string_len, "character '%c'", expected_token);
1519           else
1520             _g_snprintf (expected_string, expected_string_len, "character '\\%o'", expected_token);
1521           break;
1522         }
1523       else if (!scanner->config->symbol_2_token)
1524         {
1525           _g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
1526           break;
1527         }
1528       G_GNUC_FALLTHROUGH;
1529     case G_TOKEN_SYMBOL:
1530       need_valid = (scanner->token == G_TOKEN_SYMBOL ||
1531                     (scanner->config->symbol_2_token &&
1532                      scanner->token > G_TOKEN_LAST));
1533       _g_snprintf (expected_string,
1534                    expected_string_len,
1535                    "%s%s",
1536                    need_valid ? "valid " : "",
1537                    symbol_spec);
1538       /* FIXME: should we attempt to look up the symbol_name for symbol_2_token? */
1539       break;
1540     case G_TOKEN_CHAR:
1541       _g_snprintf (expected_string, expected_string_len, "%scharacter",
1542                    scanner->token == G_TOKEN_CHAR ? "valid " : "");
1543       break;
1544     case G_TOKEN_BINARY:
1545       tstring = "binary";
1546       _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1547                    scanner->token == expected_token ? "valid " : "", tstring);
1548       break;
1549     case G_TOKEN_OCTAL:
1550       tstring = "octal";
1551       _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1552                    scanner->token == expected_token ? "valid " : "", tstring);
1553       break;
1554     case G_TOKEN_INT:
1555       tstring = "integer";
1556       _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1557                    scanner->token == expected_token ? "valid " : "", tstring);
1558       break;
1559     case G_TOKEN_HEX:
1560       tstring = "hexadecimal";
1561       _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1562                    scanner->token == expected_token ? "valid " : "", tstring);
1563       break;
1564     case G_TOKEN_FLOAT:
1565       tstring = "float";
1566       _g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1567                    scanner->token == expected_token ? "valid " : "", tstring);
1568       break;
1569     case G_TOKEN_STRING:
1570       _g_snprintf (expected_string,
1571                    expected_string_len,
1572                    "%sstring constant",
1573                    scanner->token == G_TOKEN_STRING ? "valid " : "");
1574       break;
1575     case G_TOKEN_IDENTIFIER:
1576     case G_TOKEN_IDENTIFIER_NULL:
1577       need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
1578                     scanner->token == G_TOKEN_IDENTIFIER);
1579       _g_snprintf (expected_string,
1580                    expected_string_len,
1581                    "%s%s",
1582                    need_valid ? "valid " : "",
1583                    identifier_spec);
1584       break;
1585     case G_TOKEN_COMMENT_SINGLE:
1586       tstring = "single-line";
1587       _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1588                    scanner->token == expected_token ? "valid " : "", tstring);
1589       break;
1590     case G_TOKEN_COMMENT_MULTI:
1591       tstring = "multi-line";
1592       _g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1593                    scanner->token == expected_token ? "valid " : "", tstring);
1594       break;
1595     case G_TOKEN_NONE:
1596     case G_TOKEN_ERROR:
1597       /* this is handled upon printout */
1598       break;
1599     }
1600   
1601   if (message && message[0] != 0)
1602     message_prefix = " - ";
1603   else
1604     {
1605       message_prefix = "";
1606       message = "";
1607     }
1608   if (expected_token == G_TOKEN_ERROR)
1609     {
1610       msg_handler (scanner,
1611                    "failure around %s%s%s",
1612                    token_string,
1613                    message_prefix,
1614                    message);
1615     }
1616   else if (expected_token == G_TOKEN_NONE)
1617     {
1618       if (print_unexp)
1619         msg_handler (scanner,
1620                      "unexpected %s%s%s",
1621                      token_string,
1622                      message_prefix,
1623                      message);
1624       else
1625         msg_handler (scanner,
1626                      "%s%s%s",
1627                      token_string,
1628                      message_prefix,
1629                      message);
1630     }
1631   else
1632     {
1633       if (print_unexp)
1634         msg_handler (scanner,
1635                      "unexpected %s, expected %s%s%s",
1636                      token_string,
1637                      expected_string,
1638                      message_prefix,
1639                      message);
1640       else
1641         msg_handler (scanner,
1642                      "%s, expected %s%s%s",
1643                      token_string,
1644                      expected_string,
1645                      message_prefix,
1646                      message);
1647     }
1648   
1649   g_free (token_string);
1650   g_free (expected_string);
1651 }
1652
1653 static void
1654 g_scanner_get_token_i (GScanner *scanner,
1655                        GTokenType       *token_p,
1656                        GTokenValue      *value_p,
1657                        guint            *line_p,
1658                        guint            *position_p)
1659 {
1660   do
1661     {
1662       g_scanner_free_value (token_p, value_p);
1663       g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
1664     }
1665   while (((*token_p > 0 && *token_p < 256) &&
1666           strchr (scanner->config->cset_skip_characters, *token_p)) ||
1667          (*token_p == G_TOKEN_CHAR &&
1668           strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
1669          (*token_p == G_TOKEN_COMMENT_MULTI &&
1670           scanner->config->skip_comment_multi) ||
1671          (*token_p == G_TOKEN_COMMENT_SINGLE &&
1672           scanner->config->skip_comment_single));
1673   
1674   switch (*token_p)
1675     {
1676     case G_TOKEN_IDENTIFIER:
1677       if (scanner->config->identifier_2_string)
1678         *token_p = G_TOKEN_STRING;
1679       break;
1680       
1681     case G_TOKEN_SYMBOL:
1682       if (scanner->config->symbol_2_token)
1683         *token_p = (GTokenType) ((size_t) value_p->v_symbol);
1684       break;
1685       
1686     case G_TOKEN_BINARY:
1687     case G_TOKEN_OCTAL:
1688     case G_TOKEN_HEX:
1689       if (scanner->config->numbers_2_int)
1690         *token_p = G_TOKEN_INT;
1691       break;
1692       
1693     default:
1694       break;
1695     }
1696   
1697   if (*token_p == G_TOKEN_INT &&
1698       scanner->config->int_2_float)
1699     {
1700       *token_p = G_TOKEN_FLOAT;
1701
1702       /* Have to assign through a temporary variable to avoid undefined behaviour
1703        * by copying between potentially-overlapping union members. */
1704       if (scanner->config->store_int64)
1705         {
1706           gint64 temp = value_p->v_int64;
1707           value_p->v_float = temp;
1708         }
1709       else
1710         {
1711           gint temp = value_p->v_int;
1712           value_p->v_float = temp;
1713         }
1714     }
1715   
1716   errno = 0;
1717 }
1718
1719 static void
1720 g_scanner_get_token_ll  (GScanner       *scanner,
1721                          GTokenType     *token_p,
1722                          GTokenValue    *value_p,
1723                          guint          *line_p,
1724                          guint          *position_p)
1725 {
1726   GScannerConfig *config;
1727   GTokenType       token;
1728   gboolean         in_comment_multi;
1729   gboolean         in_comment_single;
1730   gboolean         in_string_sq;
1731   gboolean         in_string_dq;
1732   GString         *gstring;
1733   GTokenValue      value;
1734   guchar           ch;
1735   
1736   config = scanner->config;
1737   (*value_p).v_int64 = 0;
1738   
1739   if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
1740       scanner->token == G_TOKEN_EOF)
1741     {
1742       *token_p = G_TOKEN_EOF;
1743       return;
1744     }
1745   
1746   in_comment_multi = FALSE;
1747   in_comment_single = FALSE;
1748   in_string_sq = FALSE;
1749   in_string_dq = FALSE;
1750   gstring = NULL;
1751   
1752   do /* while (ch != 0) */
1753     {
1754       gboolean dotted_float = FALSE;
1755       
1756       ch = g_scanner_get_char (scanner, line_p, position_p);
1757       
1758       value.v_int64 = 0;
1759       token = G_TOKEN_NONE;
1760       
1761       /* this is *evil*, but needed ;(
1762        * we first check for identifier first character, because  it
1763        * might interfere with other key chars like slashes or numbers
1764        */
1765       if (config->scan_identifier &&
1766           ch && strchr (config->cset_identifier_first, ch))
1767         goto identifier_precedence;
1768       
1769       switch (ch)
1770         {
1771         case 0:
1772           token = G_TOKEN_EOF;
1773           (*position_p)++;
1774           /* ch = 0; */
1775           break;
1776           
1777         case '/':
1778           if (!config->scan_comment_multi ||
1779               g_scanner_peek_next_char (scanner) != '*')
1780             goto default_case;
1781           g_scanner_get_char (scanner, line_p, position_p);
1782           token = G_TOKEN_COMMENT_MULTI;
1783           in_comment_multi = TRUE;
1784           gstring = g_string_new (NULL);
1785           while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1786             {
1787               if (ch == '*' && g_scanner_peek_next_char (scanner) == '/')
1788                 {
1789                   g_scanner_get_char (scanner, line_p, position_p);
1790                   in_comment_multi = FALSE;
1791                   break;
1792                 }
1793               else
1794                 gstring = g_string_append_c (gstring, ch);
1795             }
1796           ch = 0;
1797           break;
1798           
1799         case '\'':
1800           if (!config->scan_string_sq)
1801             goto default_case;
1802           token = G_TOKEN_STRING;
1803           in_string_sq = TRUE;
1804           gstring = g_string_new (NULL);
1805           while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1806             {
1807               if (ch == '\'')
1808                 {
1809                   in_string_sq = FALSE;
1810                   break;
1811                 }
1812               else
1813                 gstring = g_string_append_c (gstring, ch);
1814             }
1815           ch = 0;
1816           break;
1817           
1818         case '"':
1819           if (!config->scan_string_dq)
1820             goto default_case;
1821           token = G_TOKEN_STRING;
1822           in_string_dq = TRUE;
1823           gstring = g_string_new (NULL);
1824           while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0)
1825             {
1826               if (ch == '"')
1827                 {
1828                   in_string_dq = FALSE;
1829                   break;
1830                 }
1831               else
1832                 {
1833                   if (ch == '\\')
1834                     {
1835                       ch = g_scanner_get_char (scanner, line_p, position_p);
1836                       switch (ch)
1837                         {
1838                           guint i;
1839                           guint fchar;
1840                           
1841                         case 0:
1842                           break;
1843                           
1844                         case '\\':
1845                           gstring = g_string_append_c (gstring, '\\');
1846                           break;
1847                           
1848                         case 'n':
1849                           gstring = g_string_append_c (gstring, '\n');
1850                           break;
1851                           
1852                         case 't':
1853                           gstring = g_string_append_c (gstring, '\t');
1854                           break;
1855                           
1856                         case 'r':
1857                           gstring = g_string_append_c (gstring, '\r');
1858                           break;
1859                           
1860                         case 'b':
1861                           gstring = g_string_append_c (gstring, '\b');
1862                           break;
1863                           
1864                         case 'f':
1865                           gstring = g_string_append_c (gstring, '\f');
1866                           break;
1867                           
1868                         case '0':
1869                         case '1':
1870                         case '2':
1871                         case '3':
1872                         case '4':
1873                         case '5':
1874                         case '6':
1875                         case '7':
1876                           i = ch - '0';
1877                           fchar = g_scanner_peek_next_char (scanner);
1878                           if (fchar >= '0' && fchar <= '7')
1879                             {
1880                               ch = g_scanner_get_char (scanner, line_p, position_p);
1881                               i = i * 8 + ch - '0';
1882                               fchar = g_scanner_peek_next_char (scanner);
1883                               if (fchar >= '0' && fchar <= '7')
1884                                 {
1885                                   ch = g_scanner_get_char (scanner, line_p, position_p);
1886                                   i = i * 8 + ch - '0';
1887                                 }
1888                             }
1889                           gstring = g_string_append_c (gstring, i);
1890                           break;
1891                           
1892                         default:
1893                           gstring = g_string_append_c (gstring, ch);
1894                           break;
1895                         }
1896                     }
1897                   else
1898                     gstring = g_string_append_c (gstring, ch);
1899                 }
1900             }
1901           ch = 0;
1902           break;
1903           
1904         case '.':
1905           if (!config->scan_float)
1906             goto default_case;
1907           token = G_TOKEN_FLOAT;
1908           dotted_float = TRUE;
1909           ch = g_scanner_get_char (scanner, line_p, position_p);
1910           goto number_parsing;
1911           
1912         case '$':
1913           if (!config->scan_hex_dollar)
1914             goto default_case;
1915           token = G_TOKEN_HEX;
1916           ch = g_scanner_get_char (scanner, line_p, position_p);
1917           goto number_parsing;
1918           
1919         case '0':
1920           if (config->scan_octal)
1921             token = G_TOKEN_OCTAL;
1922           else
1923             token = G_TOKEN_INT;
1924           ch = g_scanner_peek_next_char (scanner);
1925           if (config->scan_hex && (ch == 'x' || ch == 'X'))
1926             {
1927               token = G_TOKEN_HEX;
1928               g_scanner_get_char (scanner, line_p, position_p);
1929               ch = g_scanner_get_char (scanner, line_p, position_p);
1930               if (ch == 0)
1931                 {
1932                   token = G_TOKEN_ERROR;
1933                   value.v_error = G_ERR_UNEXP_EOF;
1934                   (*position_p)++;
1935                   break;
1936                 }
1937               if (g_scanner_char_2_num (ch, 16) < 0)
1938                 {
1939                   token = G_TOKEN_ERROR;
1940                   value.v_error = G_ERR_DIGIT_RADIX;
1941                   ch = 0;
1942                   break;
1943                 }
1944             }
1945           else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1946             {
1947               token = G_TOKEN_BINARY;
1948               g_scanner_get_char (scanner, line_p, position_p);
1949               ch = g_scanner_get_char (scanner, line_p, position_p);
1950               if (ch == 0)
1951                 {
1952                   token = G_TOKEN_ERROR;
1953                   value.v_error = G_ERR_UNEXP_EOF;
1954                   (*position_p)++;
1955                   break;
1956                 }
1957               if (g_scanner_char_2_num (ch, 10) < 0)
1958                 {
1959                   token = G_TOKEN_ERROR;
1960                   value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1961                   ch = 0;
1962                   break;
1963                 }
1964             }
1965           else
1966             ch = '0';
1967           G_GNUC_FALLTHROUGH;
1968         case '1':
1969         case '2':
1970         case '3':
1971         case '4':
1972         case '5':
1973         case '6':
1974         case '7':
1975         case '8':
1976         case '9':
1977         number_parsing:
1978         {
1979           gboolean in_number = TRUE;
1980           gchar *endptr;
1981           
1982           if (token == G_TOKEN_NONE)
1983             token = G_TOKEN_INT;
1984           
1985           gstring = g_string_new (dotted_float ? "0." : "");
1986           gstring = g_string_append_c (gstring, ch);
1987           
1988           do /* while (in_number) */
1989             {
1990               gboolean is_E;
1991               
1992               is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1993               
1994               ch = g_scanner_peek_next_char (scanner);
1995               
1996               if (g_scanner_char_2_num (ch, 36) >= 0 ||
1997                   (config->scan_float && ch == '.') ||
1998                   (is_E && (ch == '+' || ch == '-')))
1999                 {
2000                   ch = g_scanner_get_char (scanner, line_p, position_p);
2001                   
2002                   switch (ch)
2003                     {
2004                     case '.':
2005                       if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
2006                         {
2007                           value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
2008                           token = G_TOKEN_ERROR;
2009                           in_number = FALSE;
2010                         }
2011                       else
2012                         {
2013                           token = G_TOKEN_FLOAT;
2014                           gstring = g_string_append_c (gstring, ch);
2015                         }
2016                       break;
2017                       
2018                     case '0':
2019                     case '1':
2020                     case '2':
2021                     case '3':
2022                     case '4':
2023                     case '5':
2024                     case '6':
2025                     case '7':
2026                     case '8':
2027                     case '9':
2028                       gstring = g_string_append_c (gstring, ch);
2029                       break;
2030                       
2031                     case '-':
2032                     case '+':
2033                       if (token != G_TOKEN_FLOAT)
2034                         {
2035                           token = G_TOKEN_ERROR;
2036                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2037                           in_number = FALSE;
2038                         }
2039                       else
2040                         gstring = g_string_append_c (gstring, ch);
2041                       break;
2042                       
2043                     case 'e':
2044                     case 'E':
2045                       if ((token != G_TOKEN_HEX && !config->scan_float) ||
2046                           (token != G_TOKEN_HEX &&
2047                            token != G_TOKEN_OCTAL &&
2048                            token != G_TOKEN_FLOAT &&
2049                            token != G_TOKEN_INT))
2050                         {
2051                           token = G_TOKEN_ERROR;
2052                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2053                           in_number = FALSE;
2054                         }
2055                       else
2056                         {
2057                           if (token != G_TOKEN_HEX)
2058                             token = G_TOKEN_FLOAT;
2059                           gstring = g_string_append_c (gstring, ch);
2060                         }
2061                       break;
2062                       
2063                     default:
2064                       if (token != G_TOKEN_HEX)
2065                         {
2066                           token = G_TOKEN_ERROR;
2067                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2068                           in_number = FALSE;
2069                         }
2070                       else
2071                         gstring = g_string_append_c (gstring, ch);
2072                       break;
2073                     }
2074                 }
2075               else
2076                 in_number = FALSE;
2077             }
2078           while (in_number);
2079           
2080           endptr = NULL;
2081           if (token == G_TOKEN_FLOAT)
2082             value.v_float = g_strtod (gstring->str, &endptr);
2083           else
2084             {
2085               guint64 ui64 = 0;
2086               switch (token)
2087                 {
2088                 case G_TOKEN_BINARY:
2089                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
2090                   break;
2091                 case G_TOKEN_OCTAL:
2092                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
2093                   break;
2094                 case G_TOKEN_INT:
2095                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
2096                   break;
2097                 case G_TOKEN_HEX:
2098                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
2099                   break;
2100                 default: ;
2101                 }
2102               if (scanner->config->store_int64)
2103                 value.v_int64 = ui64;
2104               else
2105                 value.v_int = ui64;
2106             }
2107           if (endptr && *endptr)
2108             {
2109               token = G_TOKEN_ERROR;
2110               if (*endptr == 'e' || *endptr == 'E')
2111                 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
2112               else
2113                 value.v_error = G_ERR_DIGIT_RADIX;
2114             }
2115           g_string_free (gstring, TRUE);
2116           gstring = NULL;
2117           ch = 0;
2118         } /* number_parsing:... */
2119         break;
2120         
2121         default:
2122         default_case:
2123         {
2124           if (config->cpair_comment_single &&
2125               ch == config->cpair_comment_single[0])
2126             {
2127               token = G_TOKEN_COMMENT_SINGLE;
2128               in_comment_single = TRUE;
2129               gstring = g_string_new (NULL);
2130               ch = g_scanner_get_char (scanner, line_p, position_p);
2131               while (ch != 0)
2132                 {
2133                   if (ch == config->cpair_comment_single[1])
2134                     {
2135                       in_comment_single = FALSE;
2136                       ch = 0;
2137                       break;
2138                     }
2139                   
2140                   gstring = g_string_append_c (gstring, ch);
2141                   ch = g_scanner_get_char (scanner, line_p, position_p);
2142                 }
2143               /* ignore a missing newline at EOF for single line comments */
2144               if (in_comment_single &&
2145                   config->cpair_comment_single[1] == '\n')
2146                 in_comment_single = FALSE;
2147             }
2148           else if (config->scan_identifier && ch &&
2149                    strchr (config->cset_identifier_first, ch))
2150             {
2151             identifier_precedence:
2152               
2153               if (config->cset_identifier_nth && ch &&
2154                   strchr (config->cset_identifier_nth,
2155                           g_scanner_peek_next_char (scanner)))
2156                 {
2157                   token = G_TOKEN_IDENTIFIER;
2158                   gstring = g_string_new (NULL);
2159                   gstring = g_string_append_c (gstring, ch);
2160                   do
2161                     {
2162                       ch = g_scanner_get_char (scanner, line_p, position_p);
2163                       gstring = g_string_append_c (gstring, ch);
2164                       ch = g_scanner_peek_next_char (scanner);
2165                     }
2166                   while (ch && strchr (config->cset_identifier_nth, ch));
2167                   ch = 0;
2168                 }
2169               else if (config->scan_identifier_1char)
2170                 {
2171                   token = G_TOKEN_IDENTIFIER;
2172                   value.v_identifier = g_new0 (gchar, 2);
2173                   value.v_identifier[0] = ch;
2174                   ch = 0;
2175                 }
2176             }
2177           if (ch)
2178             {
2179               if (config->char_2_token)
2180                 token = ch;
2181               else
2182                 {
2183                   token = G_TOKEN_CHAR;
2184                   value.v_char = ch;
2185                 }
2186               ch = 0;
2187             }
2188         } /* default_case:... */
2189         break;
2190         }
2191       g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
2192     }
2193   while (ch != 0);
2194   
2195   if (in_comment_multi || in_comment_single ||
2196       in_string_sq || in_string_dq)
2197     {
2198       token = G_TOKEN_ERROR;
2199       if (gstring)
2200         {
2201           g_string_free (gstring, TRUE);
2202           gstring = NULL;
2203         }
2204       (*position_p)++;
2205       if (in_comment_multi || in_comment_single)
2206         value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
2207       else /* (in_string_sq || in_string_dq) */
2208         value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
2209     }
2210   
2211   if (gstring)
2212     {
2213       value.v_string = g_string_free (gstring, FALSE);
2214       gstring = NULL;
2215     }
2216   
2217   if (token == G_TOKEN_IDENTIFIER)
2218     {
2219       if (config->scan_symbols)
2220         {
2221           GScannerKey *key;
2222           guint scope_id;
2223           
2224           scope_id = scanner->scope_id;
2225           key = g_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
2226           if (!key && scope_id && scanner->config->scope_0_fallback)
2227             key = g_scanner_lookup_internal (scanner, 0, value.v_identifier);
2228           
2229           if (key)
2230             {
2231               g_free (value.v_identifier);
2232               token = G_TOKEN_SYMBOL;
2233               value.v_symbol = key->value;
2234             }
2235         }
2236       
2237       if (token == G_TOKEN_IDENTIFIER &&
2238           config->scan_identifier_NULL &&
2239           strlen (value.v_identifier) == 4)
2240         {
2241           gchar *null_upper = "NULL";
2242           gchar *null_lower = "null";
2243           
2244           if (scanner->config->case_sensitive)
2245             {
2246               if (value.v_identifier[0] == null_upper[0] &&
2247                   value.v_identifier[1] == null_upper[1] &&
2248                   value.v_identifier[2] == null_upper[2] &&
2249                   value.v_identifier[3] == null_upper[3])
2250                 token = G_TOKEN_IDENTIFIER_NULL;
2251             }
2252           else
2253             {
2254               if ((value.v_identifier[0] == null_upper[0] ||
2255                    value.v_identifier[0] == null_lower[0]) &&
2256                   (value.v_identifier[1] == null_upper[1] ||
2257                    value.v_identifier[1] == null_lower[1]) &&
2258                   (value.v_identifier[2] == null_upper[2] ||
2259                    value.v_identifier[2] == null_lower[2]) &&
2260                   (value.v_identifier[3] == null_upper[3] ||
2261                    value.v_identifier[3] == null_lower[3]))
2262                 token = G_TOKEN_IDENTIFIER_NULL;
2263             }
2264         }
2265     }
2266   
2267   *token_p = token;
2268   *value_p = value;
2269 }