Imported Upstream version 1.2.4
[platform/upstream/json-glib.git] / json-glib / json-scanner.c
1 /* json-scanner.c: Tokenizer for JSON
2  * Copyright (C) 2008 OpenedHand
3  *
4  * Based on JsonScanner: Flexible lexical scanner for general purpose.
5  * Copyright (C) 1997, 1998 Tim Janik
6  *
7  * Modified by Emmanuele Bassi <ebassi@openedhand.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21  */
22
23 #include "config.h"
24
25 #include <errno.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <string.h>
29 #include <stdio.h>
30 #ifdef HAVE_UNISTD_H
31 #include <unistd.h>
32 #endif
33
34 #include <glib.h>
35 #include <glib/gprintf.h>
36
37 #include "json-scanner.h"
38
39 #ifdef G_OS_WIN32
40 #include <io.h> /* For _read() */
41 #endif
42
43 struct _JsonScannerConfig
44 {
45   /* Character sets
46    */
47   gchar *cset_skip_characters; /* default: " \t\n" */
48   gchar *cset_identifier_first;
49   gchar *cset_identifier_nth;
50   gchar *cpair_comment_single; /* default: "#\n" */
51   
52   /* Should symbol lookup work case sensitive? */
53   guint case_sensitive : 1;
54   
55   /* Boolean values to be adjusted "on the fly"
56    * to configure scanning behaviour.
57    */
58   guint skip_comment_multi : 1;  /* C like comment */
59   guint skip_comment_single : 1; /* single line comment */
60   guint scan_comment_multi : 1;  /* scan multi line comments? */
61   guint scan_identifier : 1;
62   guint scan_identifier_1char : 1;
63   guint scan_identifier_NULL : 1;
64   guint scan_symbols : 1;
65   guint scan_binary : 1;
66   guint scan_octal : 1;
67   guint scan_float : 1;
68   guint scan_hex : 1;            /* `0x0ff0' */
69   guint scan_hex_dollar : 1;     /* `$0ff0' */
70   guint scan_string_sq : 1;      /* string: 'anything' */
71   guint scan_string_dq : 1;      /* string: "\\-escapes!\n" */
72   guint numbers_2_int : 1;       /* bin, octal, hex => int */
73   guint int_2_float : 1;         /* int => G_TOKEN_FLOAT? */
74   guint identifier_2_string : 1;
75   guint char_2_token : 1;        /* return G_TOKEN_CHAR? */
76   guint symbol_2_token : 1;
77   guint scope_0_fallback : 1;    /* try scope 0 on lookups? */
78   guint store_int64 : 1;         /* use value.v_int64 rather than v_int */
79   guint padding_dummy;
80 };
81
82 static JsonScannerConfig json_scanner_config_template =
83 {
84   ( " \t\r\n" )         /* cset_skip_characters */,
85   (
86    "_"
87    G_CSET_a_2_z
88    G_CSET_A_2_Z
89   )                     /* cset_identifier_first */,
90   (
91    G_CSET_DIGITS
92    "-_"
93    G_CSET_a_2_z
94    G_CSET_A_2_Z
95   )                     /* cset_identifier_nth */,
96   ( "//\n" )            /* cpair_comment_single */,
97   TRUE                  /* case_sensitive */,
98   TRUE                  /* skip_comment_multi */,
99   TRUE                  /* skip_comment_single */,
100   FALSE                 /* scan_comment_multi */,
101   TRUE                  /* scan_identifier */,
102   TRUE                  /* scan_identifier_1char */,
103   FALSE                 /* scan_identifier_NULL */,
104   TRUE                  /* scan_symbols */,
105   TRUE                  /* scan_binary */,
106   TRUE                  /* scan_octal */,
107   TRUE                  /* scan_float */,
108   TRUE                  /* scan_hex */,
109   TRUE                  /* scan_hex_dollar */,
110   TRUE                  /* scan_string_sq */,
111   TRUE                  /* scan_string_dq */,
112   TRUE                  /* numbers_2_int */,
113   FALSE                 /* int_2_float */,
114   FALSE                 /* identifier_2_string */,
115   TRUE                  /* char_2_token */,
116   TRUE                  /* symbol_2_token */,
117   FALSE                 /* scope_0_fallback */,
118   TRUE                  /* store_int64 */
119 };
120
121 /* --- defines --- */
122 #define to_lower(c)                             ( \
123         (guchar) (                                                      \
124           ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) |  \
125           ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) |  \
126           ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) |  \
127           ((guchar)(c))                                                 \
128         )                                                               \
129 )
130
131 #define READ_BUFFER_SIZE        (4000)
132
133 /* --- typedefs --- */
134 typedef struct  _JsonScannerKey JsonScannerKey;
135
136 struct  _JsonScannerKey
137 {
138   guint scope_id;
139   gchar *symbol;
140   gpointer value;
141 };
142
143 /* --- prototypes --- */
144 static gboolean json_scanner_key_equal (gconstpointer v1,
145                                         gconstpointer v2);
146 static guint    json_scanner_key_hash  (gconstpointer v);
147
148 static inline
149 JsonScannerKey *json_scanner_lookup_internal (JsonScanner *scanner,
150                                               guint        scope_id,
151                                               const gchar *symbol);
152 static void     json_scanner_get_token_ll    (JsonScanner *scanner,
153                                               GTokenType  *token_p,
154                                               GTokenValue *value_p,
155                                               guint       *line_p,
156                                               guint       *position_p);
157 static void     json_scanner_get_token_i     (JsonScanner *scanner,
158                                               GTokenType  *token_p,
159                                               GTokenValue *value_p,
160                                               guint       *line_p,
161                                               guint       *position_p);
162
163 static guchar   json_scanner_peek_next_char  (JsonScanner *scanner);
164 static guchar   json_scanner_get_char        (JsonScanner *scanner,
165                                               guint       *line_p,
166                                               guint       *position_p);
167 static gunichar json_scanner_get_unichar     (JsonScanner *scanner,
168                                               guint       *line_p,
169                                               guint       *position_p);
170
171 /* --- functions --- */
172 static inline gint
173 json_scanner_char_2_num (guchar c,
174                          guchar base)
175 {
176   if (c >= '0' && c <= '9')
177     c -= '0';
178   else if (c >= 'A' && c <= 'Z')
179     c -= 'A' - 10;
180   else if (c >= 'a' && c <= 'z')
181     c -= 'a' - 10;
182   else
183     return -1;
184   
185   if (c < base)
186     return c;
187   
188   return -1;
189 }
190
191 JsonScanner *
192 json_scanner_new (void)
193 {
194   JsonScanner *scanner;
195   JsonScannerConfig *config_templ;
196   
197   config_templ = &json_scanner_config_template;
198   
199   scanner = g_new0 (JsonScanner, 1);
200   
201   scanner->user_data = NULL;
202   scanner->max_parse_errors = 1;
203   scanner->parse_errors = 0;
204   scanner->input_name = NULL;
205   g_datalist_init (&scanner->qdata);
206   
207   scanner->config = g_new0 (JsonScannerConfig, 1);
208   
209   scanner->config->case_sensitive        = config_templ->case_sensitive;
210   scanner->config->cset_skip_characters  = config_templ->cset_skip_characters;
211   if (!scanner->config->cset_skip_characters)
212     scanner->config->cset_skip_characters = "";
213   scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
214   scanner->config->cset_identifier_nth   = config_templ->cset_identifier_nth;
215   scanner->config->cpair_comment_single  = config_templ->cpair_comment_single;
216   scanner->config->skip_comment_multi    = config_templ->skip_comment_multi;
217   scanner->config->skip_comment_single   = config_templ->skip_comment_single;
218   scanner->config->scan_comment_multi    = config_templ->scan_comment_multi;
219   scanner->config->scan_identifier       = config_templ->scan_identifier;
220   scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
221   scanner->config->scan_identifier_NULL  = config_templ->scan_identifier_NULL;
222   scanner->config->scan_symbols          = config_templ->scan_symbols;
223   scanner->config->scan_binary           = config_templ->scan_binary;
224   scanner->config->scan_octal            = config_templ->scan_octal;
225   scanner->config->scan_float            = config_templ->scan_float;
226   scanner->config->scan_hex              = config_templ->scan_hex;
227   scanner->config->scan_hex_dollar       = config_templ->scan_hex_dollar;
228   scanner->config->scan_string_sq        = config_templ->scan_string_sq;
229   scanner->config->scan_string_dq        = config_templ->scan_string_dq;
230   scanner->config->numbers_2_int         = config_templ->numbers_2_int;
231   scanner->config->int_2_float           = config_templ->int_2_float;
232   scanner->config->identifier_2_string   = config_templ->identifier_2_string;
233   scanner->config->char_2_token          = config_templ->char_2_token;
234   scanner->config->symbol_2_token        = config_templ->symbol_2_token;
235   scanner->config->scope_0_fallback      = config_templ->scope_0_fallback;
236   scanner->config->store_int64           = config_templ->store_int64;
237   
238   scanner->token = G_TOKEN_NONE;
239   scanner->value.v_int64 = 0;
240   scanner->line = 1;
241   scanner->position = 0;
242   
243   scanner->next_token = G_TOKEN_NONE;
244   scanner->next_value.v_int64 = 0;
245   scanner->next_line = 1;
246   scanner->next_position = 0;
247   
248   scanner->symbol_table = g_hash_table_new (json_scanner_key_hash,
249                                             json_scanner_key_equal);
250   scanner->text = NULL;
251   scanner->text_end = NULL;
252   scanner->buffer = NULL;
253   scanner->scope_id = 0;
254   
255   return scanner;
256 }
257
258 static inline void
259 json_scanner_free_value (GTokenType  *token_p,
260                          GTokenValue *value_p)
261 {
262   switch (*token_p)
263     {
264     case G_TOKEN_STRING:
265     case G_TOKEN_IDENTIFIER:
266     case G_TOKEN_IDENTIFIER_NULL:
267     case G_TOKEN_COMMENT_SINGLE:
268     case G_TOKEN_COMMENT_MULTI:
269       g_free (value_p->v_string);
270       break;
271       
272     default:
273       break;
274     }
275   
276   *token_p = G_TOKEN_NONE;
277 }
278
279 static void
280 json_scanner_destroy_symbol_table_entry (gpointer _key,
281                                          gpointer _value,
282                                          gpointer _data)
283 {
284   JsonScannerKey *key = _key;
285   
286   g_free (key->symbol);
287   g_slice_free (JsonScannerKey, key);
288 }
289
290 void
291 json_scanner_destroy (JsonScanner *scanner)
292 {
293   g_return_if_fail (scanner != NULL);
294   
295   g_datalist_clear (&scanner->qdata);
296   g_hash_table_foreach (scanner->symbol_table, 
297                         json_scanner_destroy_symbol_table_entry,
298                         NULL);
299   g_hash_table_destroy (scanner->symbol_table);
300   json_scanner_free_value (&scanner->token, &scanner->value);
301   json_scanner_free_value (&scanner->next_token, &scanner->next_value);
302   g_free (scanner->config);
303   g_free (scanner->buffer);
304   g_free (scanner);
305 }
306
307 void
308 json_scanner_error (JsonScanner *scanner,
309                     const gchar *format,
310                     ...)
311 {
312   g_return_if_fail (scanner != NULL);
313   g_return_if_fail (format != NULL);
314   
315   scanner->parse_errors++;
316   
317   if (scanner->msg_handler)
318     {
319       va_list args;
320       gchar *string;
321       
322       va_start (args, format);
323       string = g_strdup_vprintf (format, args);
324       va_end (args);
325       
326       scanner->msg_handler (scanner, string);
327       
328       g_free (string);
329     }
330 }
331
332 static gboolean
333 json_scanner_key_equal (gconstpointer v1,
334                         gconstpointer v2)
335 {
336   const JsonScannerKey *key1 = v1;
337   const JsonScannerKey *key2 = v2;
338   
339   return (key1->scope_id == key2->scope_id) &&
340          (strcmp (key1->symbol, key2->symbol) == 0);
341 }
342
343 static guint
344 json_scanner_key_hash (gconstpointer v)
345 {
346   const JsonScannerKey *key = v;
347   gchar *c;
348   guint h;
349   
350   h = key->scope_id;
351   for (c = key->symbol; *c; c++)
352     h = (h << 5) - h + *c;
353   
354   return h;
355 }
356
357 static inline JsonScannerKey *
358 json_scanner_lookup_internal (JsonScanner *scanner,
359                               guint        scope_id,
360                               const gchar *symbol)
361 {
362   JsonScannerKey *key_p;
363   JsonScannerKey key;
364   
365   key.scope_id = scope_id;
366   
367   if (!scanner->config->case_sensitive)
368     {
369       gchar *d;
370       const gchar *c;
371       
372       key.symbol = g_new (gchar, strlen (symbol) + 1);
373       for (d = key.symbol, c = symbol; *c; c++, d++)
374         *d = to_lower (*c);
375       *d = 0;
376       key_p = g_hash_table_lookup (scanner->symbol_table, &key);
377       g_free (key.symbol);
378     }
379   else
380     {
381       key.symbol = (gchar*) symbol;
382       key_p = g_hash_table_lookup (scanner->symbol_table, &key);
383     }
384   
385   return key_p;
386 }
387
388 void
389 json_scanner_scope_add_symbol (JsonScanner *scanner,
390                                guint        scope_id,
391                                const gchar *symbol,
392                                gpointer     value)
393 {
394   JsonScannerKey *key;
395
396   g_return_if_fail (scanner != NULL);
397   g_return_if_fail (symbol != NULL);
398
399   key = json_scanner_lookup_internal (scanner, scope_id, symbol);
400   if (!key)
401     {
402       key = g_slice_new (JsonScannerKey);
403       key->scope_id = scope_id;
404       key->symbol = g_strdup (symbol);
405       key->value = value;
406       if (!scanner->config->case_sensitive)
407         {
408           gchar *c;
409
410           c = key->symbol;
411           while (*c != 0)
412             {
413               *c = to_lower (*c);
414               c++;
415             }
416         }
417
418       g_hash_table_insert (scanner->symbol_table, key, key);
419     }
420   else
421     key->value = value;
422 }
423
424 GTokenType
425 json_scanner_peek_next_token (JsonScanner *scanner)
426 {
427   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
428
429   if (scanner->next_token == G_TOKEN_NONE)
430     {
431       scanner->next_line = scanner->line;
432       scanner->next_position = scanner->position;
433       json_scanner_get_token_i (scanner,
434                                 &scanner->next_token,
435                                 &scanner->next_value,
436                                 &scanner->next_line,
437                                 &scanner->next_position);
438     }
439
440   return scanner->next_token;
441 }
442
443 GTokenType
444 json_scanner_get_next_token (JsonScanner *scanner)
445 {
446   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
447
448   if (scanner->next_token != G_TOKEN_NONE)
449     {
450       json_scanner_free_value (&scanner->token, &scanner->value);
451
452       scanner->token = scanner->next_token;
453       scanner->value = scanner->next_value;
454       scanner->line = scanner->next_line;
455       scanner->position = scanner->next_position;
456       scanner->next_token = G_TOKEN_NONE;
457     }
458   else
459     json_scanner_get_token_i (scanner,
460                               &scanner->token,
461                               &scanner->value,
462                               &scanner->line,
463                               &scanner->position);
464
465   return scanner->token;
466 }
467
468 void
469 json_scanner_input_text (JsonScanner *scanner,
470                          const gchar *text,
471                          guint        text_len)
472 {
473   g_return_if_fail (scanner != NULL);
474   if (text_len)
475     g_return_if_fail (text != NULL);
476   else
477     text = NULL;
478
479   scanner->token = G_TOKEN_NONE;
480   scanner->value.v_int64 = 0;
481   scanner->line = 1;
482   scanner->position = 0;
483   scanner->next_token = G_TOKEN_NONE;
484
485   scanner->text = text;
486   scanner->text_end = text + text_len;
487
488   if (scanner->buffer)
489     {
490       g_free (scanner->buffer);
491       scanner->buffer = NULL;
492     }
493 }
494
495 static guchar
496 json_scanner_peek_next_char (JsonScanner *scanner)
497 {
498   if (scanner->text < scanner->text_end)
499     return *scanner->text;
500   else
501     return 0;
502 }
503
504 static guchar
505 json_scanner_get_char (JsonScanner *scanner,
506                        guint       *line_p,
507                        guint       *position_p)
508 {
509   guchar fchar;
510
511   if (scanner->text < scanner->text_end)
512     fchar = *(scanner->text++);
513   else
514     fchar = 0;
515   
516   if (fchar == '\n')
517     {
518       (*position_p) = 0;
519       (*line_p)++;
520     }
521   else if (fchar)
522     {
523       (*position_p)++;
524     }
525   
526   return fchar;
527 }
528
529 #define is_hex_digit(c)         (((c) >= '0' && (c) <= '9') || \
530                                  ((c) >= 'a' && (c) <= 'f') || \
531                                  ((c) >= 'A' && (c) <= 'F'))
532 #define to_hex_digit(c)         (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9)
533
534 static gunichar
535 json_scanner_get_unichar (JsonScanner *scanner,
536                           guint       *line_p,
537                           guint       *position_p)
538 {
539   gunichar uchar;
540   gchar ch;
541   gint i;
542
543   uchar = 0;
544   for (i = 0; i < 4; i++)
545     {
546       ch = json_scanner_get_char (scanner, line_p, position_p);
547
548       if (is_hex_digit (ch))
549         uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4));
550       else
551         break;
552     }
553
554   g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE);
555
556   return uchar;
557 }
558
559 /*
560  * decode_utf16_surrogate_pair:
561  * @units: (array length=2): a pair of UTF-16 code points
562  *
563  * Decodes a surrogate pair of UTF-16 code points into the equivalent
564  * Unicode code point.
565  *
566  * Returns: the Unicode code point equivalent to the surrogate pair
567  */
568 static inline gunichar
569 decode_utf16_surrogate_pair (const gunichar units[2])
570 {
571   gunichar ucs;
572
573   g_assert (0xd800 <= units[0] && units[0] <= 0xdbff);
574   g_assert (0xdc00 <= units[1] && units[1] <= 0xdfff);
575
576   ucs = 0x10000;
577   ucs += (units[0] & 0x3ff) << 10;
578   ucs += (units[1] & 0x3ff);
579
580   return ucs;
581 }
582
583 void
584 json_scanner_unexp_token (JsonScanner *scanner,
585                           GTokenType   expected_token,
586                           const gchar *identifier_spec,
587                           const gchar *symbol_spec,
588                           const gchar *symbol_name,
589                           const gchar *message)
590 {
591   gchar *token_string;
592   guint token_string_len;
593   gchar *expected_string;
594   guint expected_string_len;
595   gchar *message_prefix;
596   gboolean print_unexp;
597   
598   g_return_if_fail (scanner != NULL);
599   
600   if (!identifier_spec)
601     identifier_spec = "identifier";
602   if (!symbol_spec)
603     symbol_spec = "symbol";
604   
605   token_string_len = 56;
606   token_string = g_new (gchar, token_string_len + 1);
607   expected_string_len = 64;
608   expected_string = g_new (gchar, expected_string_len + 1);
609   print_unexp = TRUE;
610   
611   switch (scanner->token)
612     {
613     case G_TOKEN_EOF:
614       g_snprintf (token_string, token_string_len, "end of file");
615       break;
616       
617     default:
618       if (scanner->token >= 1 && scanner->token <= 255)
619         {
620           if ((scanner->token >= ' ' && scanner->token <= '~') ||
621               strchr (scanner->config->cset_identifier_first, scanner->token) ||
622               strchr (scanner->config->cset_identifier_nth, scanner->token))
623             g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
624           else
625             g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
626           break;
627         }
628       else if (!scanner->config->symbol_2_token)
629         {
630           g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
631           break;
632         }
633       /* fall through */
634     case G_TOKEN_SYMBOL:
635       if (expected_token == G_TOKEN_SYMBOL ||
636           (scanner->config->symbol_2_token &&
637            expected_token > G_TOKEN_LAST))
638         print_unexp = FALSE;
639       if (symbol_name)
640         g_snprintf (token_string, token_string_len,
641                     "%s%s `%s'",
642                     print_unexp ? "" : "invalid ",
643                     symbol_spec,
644                     symbol_name);
645       else
646         g_snprintf (token_string, token_string_len,
647                     "%s%s",
648                     print_unexp ? "" : "invalid ",
649                     symbol_spec);
650       break;
651  
652     case G_TOKEN_ERROR:
653       print_unexp = FALSE;
654       expected_token = G_TOKEN_NONE;
655       switch (scanner->value.v_error)
656         {
657         case G_ERR_UNEXP_EOF:
658           g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
659           break;
660           
661         case G_ERR_UNEXP_EOF_IN_STRING:
662           g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
663           break;
664           
665         case G_ERR_UNEXP_EOF_IN_COMMENT:
666           g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
667           break;
668           
669         case G_ERR_NON_DIGIT_IN_CONST:
670           g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
671           break;
672           
673         case G_ERR_FLOAT_RADIX:
674           g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
675           break;
676           
677         case G_ERR_FLOAT_MALFORMED:
678           g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
679           break;
680           
681         case G_ERR_DIGIT_RADIX:
682           g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
683           break;
684           
685         case G_ERR_UNKNOWN:
686         default:
687           g_snprintf (token_string, token_string_len, "scanner: unknown error");
688           break;
689         }
690       break;
691       
692     case G_TOKEN_CHAR:
693       g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
694       break;
695       
696     case G_TOKEN_IDENTIFIER:
697     case G_TOKEN_IDENTIFIER_NULL:
698       if (expected_token == G_TOKEN_IDENTIFIER ||
699           expected_token == G_TOKEN_IDENTIFIER_NULL)
700         print_unexp = FALSE;
701       g_snprintf (token_string, token_string_len,
702                   "%s%s `%s'",
703                   print_unexp ? "" : "invalid ",
704                   identifier_spec,
705                   scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
706       break;
707       
708     case G_TOKEN_BINARY:
709     case G_TOKEN_OCTAL:
710     case G_TOKEN_INT:
711     case G_TOKEN_HEX:
712       if (scanner->config->store_int64)
713         g_snprintf (token_string, token_string_len, "number `%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
714       else
715         g_snprintf (token_string, token_string_len, "number `%lu'", scanner->value.v_int);
716       break;
717       
718     case G_TOKEN_FLOAT:
719       g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
720       break;
721       
722     case G_TOKEN_STRING:
723       if (expected_token == G_TOKEN_STRING)
724         print_unexp = FALSE;
725       g_snprintf (token_string, token_string_len,
726                   "%s%sstring constant \"%s\"",
727                   print_unexp ? "" : "invalid ",
728                   scanner->value.v_string[0] == 0 ? "empty " : "",
729                   scanner->value.v_string);
730       token_string[token_string_len - 2] = '"';
731       token_string[token_string_len - 1] = 0;
732       break;
733       
734     case G_TOKEN_COMMENT_SINGLE:
735     case G_TOKEN_COMMENT_MULTI:
736       g_snprintf (token_string, token_string_len, "comment");
737       break;
738       
739     case G_TOKEN_NONE:
740       /* somehow the user's parsing code is screwed, there isn't much
741        * we can do about it.
742        * Note, a common case to trigger this is
743        * json_scanner_peek_next_token(); json_scanner_unexp_token();
744        * without an intermediate json_scanner_get_next_token().
745        */
746       g_assert_not_reached ();
747       break;
748     }
749   
750   
751   switch (expected_token)
752     {
753       gboolean need_valid;
754       gchar *tstring;
755     case G_TOKEN_EOF:
756       g_snprintf (expected_string, expected_string_len, "end of file");
757       break;
758     default:
759       if (expected_token >= 1 && expected_token <= 255)
760         {
761           if ((expected_token >= ' ' && expected_token <= '~') ||
762               strchr (scanner->config->cset_identifier_first, expected_token) ||
763               strchr (scanner->config->cset_identifier_nth, expected_token))
764             g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
765           else
766             g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
767           break;
768         }
769       else if (!scanner->config->symbol_2_token)
770         {
771           g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
772           break;
773         }
774       /* fall through */
775     case G_TOKEN_SYMBOL:
776       need_valid = (scanner->token == G_TOKEN_SYMBOL ||
777                     (scanner->config->symbol_2_token &&
778                      scanner->token > G_TOKEN_LAST));
779       g_snprintf (expected_string, expected_string_len,
780                   "%s%s",
781                   need_valid ? "valid " : "",
782                   symbol_spec);
783       /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
784       break;
785     case G_TOKEN_CHAR:
786       g_snprintf (expected_string, expected_string_len, "%scharacter",
787                   scanner->token == G_TOKEN_CHAR ? "valid " : "");
788       break;
789     case G_TOKEN_BINARY:
790       tstring = "binary";
791       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
792                   scanner->token == expected_token ? "valid " : "", tstring);
793       break;
794     case G_TOKEN_OCTAL:
795       tstring = "octal";
796       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
797                   scanner->token == expected_token ? "valid " : "", tstring);
798       break;
799     case G_TOKEN_INT:
800       tstring = "integer";
801       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
802                   scanner->token == expected_token ? "valid " : "", tstring);
803       break;
804     case G_TOKEN_HEX:
805       tstring = "hexadecimal";
806       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
807                   scanner->token == expected_token ? "valid " : "", tstring);
808       break;
809     case G_TOKEN_FLOAT:
810       tstring = "float";
811       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
812                   scanner->token == expected_token ? "valid " : "", tstring);
813       break;
814     case G_TOKEN_STRING:
815       g_snprintf (expected_string,
816                   expected_string_len,
817                   "%sstring constant",
818                   scanner->token == G_TOKEN_STRING ? "valid " : "");
819       break;
820     case G_TOKEN_IDENTIFIER:
821     case G_TOKEN_IDENTIFIER_NULL:
822       need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
823                     scanner->token == G_TOKEN_IDENTIFIER);
824       g_snprintf (expected_string,
825                   expected_string_len,
826                   "%s%s",
827                   need_valid ? "valid " : "",
828                   identifier_spec);
829       break;
830     case G_TOKEN_COMMENT_SINGLE:
831       tstring = "single-line";
832       g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
833                   scanner->token == expected_token ? "valid " : "", tstring);
834       break;
835     case G_TOKEN_COMMENT_MULTI:
836       tstring = "multi-line";
837       g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
838                   scanner->token == expected_token ? "valid " : "", tstring);
839       break;
840     case G_TOKEN_NONE:
841     case G_TOKEN_ERROR:
842       /* this is handled upon printout */
843       break;
844     }
845   
846   if (message && message[0] != 0)
847     message_prefix = " - ";
848   else
849     {
850       message_prefix = "";
851       message = "";
852     }
853   if (expected_token == G_TOKEN_ERROR)
854     {
855       json_scanner_error (scanner,
856                           "failure around %s%s%s",
857                           token_string,
858                           message_prefix,
859                           message);
860     }
861   else if (expected_token == G_TOKEN_NONE)
862     {
863       if (print_unexp)
864         json_scanner_error (scanner,
865                             "unexpected %s%s%s",
866                             token_string,
867                             message_prefix,
868                             message);
869       else
870         json_scanner_error (scanner,
871                             "%s%s%s",
872                             token_string,
873                             message_prefix,
874                             message);
875     }
876   else
877     {
878       if (print_unexp)
879         json_scanner_error (scanner,
880                             "unexpected %s, expected %s%s%s",
881                             token_string,
882                             expected_string,
883                             message_prefix,
884                             message);
885       else
886         json_scanner_error (scanner,
887                             "%s, expected %s%s%s",
888                             token_string,
889                             expected_string,
890                             message_prefix,
891                             message);
892     }
893   
894   g_free (token_string);
895   g_free (expected_string);
896 }
897
898 static void
899 json_scanner_get_token_i (JsonScanner   *scanner,
900                           GTokenType    *token_p,
901                           GTokenValue   *value_p,
902                           guint         *line_p,
903                           guint         *position_p)
904 {
905   do
906     {
907       json_scanner_free_value (token_p, value_p);
908       json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
909     }
910   while (((*token_p > 0 && *token_p < 256) &&
911           strchr (scanner->config->cset_skip_characters, *token_p)) ||
912          (*token_p == G_TOKEN_CHAR &&
913           strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
914          (*token_p == G_TOKEN_COMMENT_MULTI &&
915           scanner->config->skip_comment_multi) ||
916          (*token_p == G_TOKEN_COMMENT_SINGLE &&
917           scanner->config->skip_comment_single));
918   
919   switch (*token_p)
920     {
921     case G_TOKEN_IDENTIFIER:
922       if (scanner->config->identifier_2_string)
923         *token_p = G_TOKEN_STRING;
924       break;
925       
926     case G_TOKEN_SYMBOL:
927       if (scanner->config->symbol_2_token)
928         *token_p = (GTokenType) value_p->v_symbol;
929       break;
930       
931     case G_TOKEN_BINARY:
932     case G_TOKEN_OCTAL:
933     case G_TOKEN_HEX:
934       if (scanner->config->numbers_2_int)
935         *token_p = G_TOKEN_INT;
936       break;
937       
938     default:
939       break;
940     }
941   
942   if (*token_p == G_TOKEN_INT &&
943       scanner->config->int_2_float)
944     {
945       *token_p = G_TOKEN_FLOAT;
946       if (scanner->config->store_int64)
947         {
948 #ifdef _MSC_VER
949           /* work around error C2520, see gvaluetransform.c */
950           value_p->v_float = (__int64)value_p->v_int64;
951 #else
952           value_p->v_float = value_p->v_int64;
953 #endif
954         }
955       else
956         value_p->v_float = value_p->v_int;
957     }
958   
959   errno = 0;
960 }
961
962 static void
963 json_scanner_get_token_ll (JsonScanner *scanner,
964                            GTokenType  *token_p,
965                            GTokenValue *value_p,
966                            guint       *line_p,
967                            guint       *position_p)
968 {
969   JsonScannerConfig *config;
970   GTokenType       token;
971   gboolean         in_comment_multi;
972   gboolean         in_comment_single;
973   gboolean         in_string_sq;
974   gboolean         in_string_dq;
975   GString         *gstring;
976   GTokenValue      value;
977   guchar           ch;
978   
979   config = scanner->config;
980   (*value_p).v_int64 = 0;
981   
982   if (scanner->text >= scanner->text_end ||
983       scanner->token == G_TOKEN_EOF)
984     {
985       *token_p = G_TOKEN_EOF;
986       return;
987     }
988   
989   in_comment_multi = FALSE;
990   in_comment_single = FALSE;
991   in_string_sq = FALSE;
992   in_string_dq = FALSE;
993   gstring = NULL;
994   
995   do /* while (ch != 0) */
996     {
997       gboolean dotted_float = FALSE;
998       
999       ch = json_scanner_get_char (scanner, line_p, position_p);
1000       
1001       value.v_int64 = 0;
1002       token = G_TOKEN_NONE;
1003       
1004       /* this is *evil*, but needed ;(
1005        * we first check for identifier first character, because  it
1006        * might interfere with other key chars like slashes or numbers
1007        */
1008       if (config->scan_identifier &&
1009           ch && strchr (config->cset_identifier_first, ch))
1010         goto identifier_precedence;
1011       
1012       switch (ch)
1013         {
1014         case 0:
1015           token = G_TOKEN_EOF;
1016           (*position_p)++;
1017           /* ch = 0; */
1018           break;
1019           
1020         case '/':
1021           if (!config->scan_comment_multi ||
1022               json_scanner_peek_next_char (scanner) != '*')
1023             goto default_case;
1024           json_scanner_get_char (scanner, line_p, position_p);
1025           token = G_TOKEN_COMMENT_MULTI;
1026           in_comment_multi = TRUE;
1027           gstring = g_string_new (NULL);
1028           while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1029             {
1030               if (ch == '*' && json_scanner_peek_next_char (scanner) == '/')
1031                 {
1032                   json_scanner_get_char (scanner, line_p, position_p);
1033                   in_comment_multi = FALSE;
1034                   break;
1035                 }
1036               else
1037                 gstring = g_string_append_c (gstring, ch);
1038             }
1039           ch = 0;
1040           break;
1041           
1042         case '\'':
1043           if (!config->scan_string_sq)
1044             goto default_case;
1045           token = G_TOKEN_STRING;
1046           in_string_sq = TRUE;
1047           gstring = g_string_new (NULL);
1048           while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1049             {
1050               if (ch == '\'')
1051                 {
1052                   in_string_sq = FALSE;
1053                   break;
1054                 }
1055               else
1056                 gstring = g_string_append_c (gstring, ch);
1057             }
1058           ch = 0;
1059           break;
1060           
1061         case '"':
1062           if (!config->scan_string_dq)
1063             goto default_case;
1064           token = G_TOKEN_STRING;
1065           in_string_dq = TRUE;
1066           gstring = g_string_new (NULL);
1067           while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1068             {
1069               if (ch == '"')
1070                 {
1071                   in_string_dq = FALSE;
1072                   break;
1073                 }
1074               else
1075                 {
1076                   if (ch == '\\')
1077                     {
1078                       ch = json_scanner_get_char (scanner, line_p, position_p);
1079                       switch (ch)
1080                         {
1081                           guint i;
1082                           guint fchar;
1083                           
1084                         case 0:
1085                           break;
1086                           
1087                         case '\\':
1088                           gstring = g_string_append_c (gstring, '\\');
1089                           break;
1090                           
1091                         case 'n':
1092                           gstring = g_string_append_c (gstring, '\n');
1093                           break;
1094                           
1095                         case 't':
1096                           gstring = g_string_append_c (gstring, '\t');
1097                           break;
1098                           
1099                         case 'r':
1100                           gstring = g_string_append_c (gstring, '\r');
1101                           break;
1102                           
1103                         case 'b':
1104                           gstring = g_string_append_c (gstring, '\b');
1105                           break;
1106                           
1107                         case 'f':
1108                           gstring = g_string_append_c (gstring, '\f');
1109                           break;
1110
1111                         case 'u':
1112                           fchar = json_scanner_peek_next_char (scanner);
1113                           if (is_hex_digit (fchar))
1114                             {
1115                               gunichar ucs;
1116
1117                               ucs = json_scanner_get_unichar (scanner, line_p, position_p);
1118
1119                               /* resolve UTF-16 surrogates for Unicode characters not in the BMP,
1120                                 * as per ECMA 404, Â§ 9, "String"
1121                                 */
1122                               if (g_unichar_type (ucs) == G_UNICODE_SURROGATE)
1123                                 {
1124                                   /* read next surrogate */
1125                                   if ('\\' == json_scanner_get_char (scanner, line_p, position_p) &&
1126                                       'u' == json_scanner_get_char (scanner, line_p, position_p))
1127                                     {
1128                                       gunichar units[2];
1129
1130                                       units[0] = ucs;
1131                                       units[1] = json_scanner_get_unichar (scanner, line_p, position_p);
1132
1133                                       ucs = decode_utf16_surrogate_pair (units);
1134                                       g_assert (g_unichar_validate (ucs));
1135                                     }
1136                                 }
1137
1138                               gstring = g_string_append_unichar (gstring, ucs);
1139                             }
1140                           break;
1141                           
1142                         case '0':
1143                         case '1':
1144                         case '2':
1145                         case '3':
1146                         case '4':
1147                         case '5':
1148                         case '6':
1149                         case '7':
1150                           i = ch - '0';
1151                           fchar = json_scanner_peek_next_char (scanner);
1152                           if (fchar >= '0' && fchar <= '7')
1153                             {
1154                               ch = json_scanner_get_char (scanner, line_p, position_p);
1155                               i = i * 8 + ch - '0';
1156                               fchar = json_scanner_peek_next_char (scanner);
1157                               if (fchar >= '0' && fchar <= '7')
1158                                 {
1159                                   ch = json_scanner_get_char (scanner, line_p, position_p);
1160                                   i = i * 8 + ch - '0';
1161                                 }
1162                             }
1163                           gstring = g_string_append_c (gstring, i);
1164                           break;
1165                           
1166                         default:
1167                           gstring = g_string_append_c (gstring, ch);
1168                           break;
1169                         }
1170                     }
1171                   else
1172                     gstring = g_string_append_c (gstring, ch);
1173                 }
1174             }
1175           ch = 0;
1176           break;
1177           
1178         case '.':
1179           if (!config->scan_float)
1180             goto default_case;
1181           token = G_TOKEN_FLOAT;
1182           dotted_float = TRUE;
1183           ch = json_scanner_get_char (scanner, line_p, position_p);
1184           goto number_parsing;
1185           
1186         case '$':
1187           if (!config->scan_hex_dollar)
1188             goto default_case;
1189           token = G_TOKEN_HEX;
1190           ch = json_scanner_get_char (scanner, line_p, position_p);
1191           goto number_parsing;
1192           
1193         case '0':
1194           if (config->scan_octal)
1195             token = G_TOKEN_OCTAL;
1196           else
1197             token = G_TOKEN_INT;
1198           ch = json_scanner_peek_next_char (scanner);
1199           if (config->scan_hex && (ch == 'x' || ch == 'X'))
1200             {
1201               token = G_TOKEN_HEX;
1202               json_scanner_get_char (scanner, line_p, position_p);
1203               ch = json_scanner_get_char (scanner, line_p, position_p);
1204               if (ch == 0)
1205                 {
1206                   token = G_TOKEN_ERROR;
1207                   value.v_error = G_ERR_UNEXP_EOF;
1208                   (*position_p)++;
1209                   break;
1210                 }
1211               if (json_scanner_char_2_num (ch, 16) < 0)
1212                 {
1213                   token = G_TOKEN_ERROR;
1214                   value.v_error = G_ERR_DIGIT_RADIX;
1215                   ch = 0;
1216                   break;
1217                 }
1218             }
1219           else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1220             {
1221               token = G_TOKEN_BINARY;
1222               json_scanner_get_char (scanner, line_p, position_p);
1223               ch = json_scanner_get_char (scanner, line_p, position_p);
1224               if (ch == 0)
1225                 {
1226                   token = G_TOKEN_ERROR;
1227                   value.v_error = G_ERR_UNEXP_EOF;
1228                   (*position_p)++;
1229                   break;
1230                 }
1231               if (json_scanner_char_2_num (ch, 10) < 0)
1232                 {
1233                   token = G_TOKEN_ERROR;
1234                   value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1235                   ch = 0;
1236                   break;
1237                 }
1238             }
1239           else
1240             ch = '0';
1241           /* fall through */
1242         case '1':
1243         case '2':
1244         case '3':
1245         case '4':
1246         case '5':
1247         case '6':
1248         case '7':
1249         case '8':
1250         case '9':
1251         number_parsing:
1252         {
1253           gboolean in_number = TRUE;
1254           gchar *endptr;
1255           
1256           if (token == G_TOKEN_NONE)
1257             token = G_TOKEN_INT;
1258           
1259           gstring = g_string_new (dotted_float ? "0." : "");
1260           gstring = g_string_append_c (gstring, ch);
1261           
1262           do /* while (in_number) */
1263             {
1264               gboolean is_E;
1265               
1266               is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1267               
1268               ch = json_scanner_peek_next_char (scanner);
1269               
1270               if (json_scanner_char_2_num (ch, 36) >= 0 ||
1271                   (config->scan_float && ch == '.') ||
1272                   (is_E && (ch == '+' || ch == '-')))
1273                 {
1274                   ch = json_scanner_get_char (scanner, line_p, position_p);
1275                   
1276                   switch (ch)
1277                     {
1278                     case '.':
1279                       if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1280                         {
1281                           value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1282                           token = G_TOKEN_ERROR;
1283                           in_number = FALSE;
1284                         }
1285                       else
1286                         {
1287                           token = G_TOKEN_FLOAT;
1288                           gstring = g_string_append_c (gstring, ch);
1289                         }
1290                       break;
1291                       
1292                     case '0':
1293                     case '1':
1294                     case '2':
1295                     case '3':
1296                     case '4':
1297                     case '5':
1298                     case '6':
1299                     case '7':
1300                     case '8':
1301                     case '9':
1302                       gstring = g_string_append_c (gstring, ch);
1303                       break;
1304                       
1305                     case '-':
1306                     case '+':
1307                       if (token != G_TOKEN_FLOAT)
1308                         {
1309                           token = G_TOKEN_ERROR;
1310                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1311                           in_number = FALSE;
1312                         }
1313                       else
1314                         gstring = g_string_append_c (gstring, ch);
1315                       break;
1316                       
1317                     case 'e':
1318                     case 'E':
1319                       if ((token != G_TOKEN_HEX && !config->scan_float) ||
1320                           (token != G_TOKEN_HEX &&
1321                            token != G_TOKEN_OCTAL &&
1322                            token != G_TOKEN_FLOAT &&
1323                            token != G_TOKEN_INT))
1324                         {
1325                           token = G_TOKEN_ERROR;
1326                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1327                           in_number = FALSE;
1328                         }
1329                       else
1330                         {
1331                           if (token != G_TOKEN_HEX)
1332                             token = G_TOKEN_FLOAT;
1333                           gstring = g_string_append_c (gstring, ch);
1334                         }
1335                       break;
1336                       
1337                     default:
1338                       if (token != G_TOKEN_HEX)
1339                         {
1340                           token = G_TOKEN_ERROR;
1341                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1342                           in_number = FALSE;
1343                         }
1344                       else
1345                         gstring = g_string_append_c (gstring, ch);
1346                       break;
1347                     }
1348                 }
1349               else
1350                 in_number = FALSE;
1351             }
1352           while (in_number);
1353           
1354           endptr = NULL;
1355           if (token == G_TOKEN_FLOAT)
1356             value.v_float = g_strtod (gstring->str, &endptr);
1357           else
1358             {
1359               guint64 ui64 = 0;
1360               switch (token)
1361                 {
1362                 case G_TOKEN_BINARY:
1363                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
1364                   break;
1365                 case G_TOKEN_OCTAL:
1366                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
1367                   break;
1368                 case G_TOKEN_INT:
1369                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
1370                   break;
1371                 case G_TOKEN_HEX:
1372                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
1373                   break;
1374                 default: ;
1375                 }
1376               if (scanner->config->store_int64)
1377                 value.v_int64 = ui64;
1378               else
1379                 value.v_int = ui64;
1380             }
1381           if (endptr && *endptr)
1382             {
1383               token = G_TOKEN_ERROR;
1384               if (*endptr == 'e' || *endptr == 'E')
1385                 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1386               else
1387                 value.v_error = G_ERR_DIGIT_RADIX;
1388             }
1389           g_string_free (gstring, TRUE);
1390           gstring = NULL;
1391           ch = 0;
1392         } /* number_parsing:... */
1393         break;
1394         
1395         default:
1396         default_case:
1397         {
1398           if (config->cpair_comment_single &&
1399               ch == config->cpair_comment_single[0])
1400             {
1401               token = G_TOKEN_COMMENT_SINGLE;
1402               in_comment_single = TRUE;
1403               gstring = g_string_new (NULL);
1404               ch = json_scanner_get_char (scanner, line_p, position_p);
1405               while (ch != 0)
1406                 {
1407                   if (ch == config->cpair_comment_single[1])
1408                     {
1409                       in_comment_single = FALSE;
1410                       ch = 0;
1411                       break;
1412                     }
1413                   
1414                   gstring = g_string_append_c (gstring, ch);
1415                   ch = json_scanner_get_char (scanner, line_p, position_p);
1416                 }
1417               /* ignore a missing newline at EOF for single line comments */
1418               if (in_comment_single &&
1419                   config->cpair_comment_single[1] == '\n')
1420                 in_comment_single = FALSE;
1421             }
1422           else if (config->scan_identifier && ch &&
1423                    strchr (config->cset_identifier_first, ch))
1424             {
1425             identifier_precedence:
1426               
1427               if (config->cset_identifier_nth && ch &&
1428                   strchr (config->cset_identifier_nth,
1429                           json_scanner_peek_next_char (scanner)))
1430                 {
1431                   token = G_TOKEN_IDENTIFIER;
1432                   gstring = g_string_new (NULL);
1433                   gstring = g_string_append_c (gstring, ch);
1434                   do
1435                     {
1436                       ch = json_scanner_get_char (scanner, line_p, position_p);
1437                       gstring = g_string_append_c (gstring, ch);
1438                       ch = json_scanner_peek_next_char (scanner);
1439                     }
1440                   while (ch && strchr (config->cset_identifier_nth, ch));
1441                   ch = 0;
1442                 }
1443               else if (config->scan_identifier_1char)
1444                 {
1445                   token = G_TOKEN_IDENTIFIER;
1446                   value.v_identifier = g_new0 (gchar, 2);
1447                   value.v_identifier[0] = ch;
1448                   ch = 0;
1449                 }
1450             }
1451           if (ch)
1452             {
1453               if (config->char_2_token)
1454                 token = ch;
1455               else
1456                 {
1457                   token = G_TOKEN_CHAR;
1458                   value.v_char = ch;
1459                 }
1460               ch = 0;
1461             }
1462         } /* default_case:... */
1463         break;
1464         }
1465       g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
1466     }
1467   while (ch != 0);
1468   
1469   if (in_comment_multi || in_comment_single ||
1470       in_string_sq || in_string_dq)
1471     {
1472       token = G_TOKEN_ERROR;
1473       if (gstring)
1474         {
1475           g_string_free (gstring, TRUE);
1476           gstring = NULL;
1477         }
1478       (*position_p)++;
1479       if (in_comment_multi || in_comment_single)
1480         value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1481       else /* (in_string_sq || in_string_dq) */
1482         value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1483     }
1484   
1485   if (gstring)
1486     {
1487       value.v_string = g_string_free (gstring, FALSE);
1488       gstring = NULL;
1489     }
1490   
1491   if (token == G_TOKEN_IDENTIFIER)
1492     {
1493       if (config->scan_symbols)
1494         {
1495           JsonScannerKey *key;
1496           guint scope_id;
1497           
1498           scope_id = scanner->scope_id;
1499           key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
1500           if (!key && scope_id && scanner->config->scope_0_fallback)
1501             key = json_scanner_lookup_internal (scanner, 0, value.v_identifier);
1502           
1503           if (key)
1504             {
1505               g_free (value.v_identifier);
1506               token = G_TOKEN_SYMBOL;
1507               value.v_symbol = key->value;
1508             }
1509         }
1510       
1511       if (token == G_TOKEN_IDENTIFIER &&
1512           config->scan_identifier_NULL &&
1513           strlen (value.v_identifier) == 4)
1514         {
1515           gchar *null_upper = "NULL";
1516           gchar *null_lower = "null";
1517           
1518           if (scanner->config->case_sensitive)
1519             {
1520               if (value.v_identifier[0] == null_upper[0] &&
1521                   value.v_identifier[1] == null_upper[1] &&
1522                   value.v_identifier[2] == null_upper[2] &&
1523                   value.v_identifier[3] == null_upper[3])
1524                 token = G_TOKEN_IDENTIFIER_NULL;
1525             }
1526           else
1527             {
1528               if ((value.v_identifier[0] == null_upper[0] ||
1529                    value.v_identifier[0] == null_lower[0]) &&
1530                   (value.v_identifier[1] == null_upper[1] ||
1531                    value.v_identifier[1] == null_lower[1]) &&
1532                   (value.v_identifier[2] == null_upper[2] ||
1533                    value.v_identifier[2] == null_lower[2]) &&
1534                   (value.v_identifier[3] == null_upper[3] ||
1535                    value.v_identifier[3] == null_lower[3]))
1536                 token = G_TOKEN_IDENTIFIER_NULL;
1537             }
1538         }
1539     }
1540   
1541   *token_p = token;
1542   *value_p = value;
1543 }