Initial Import
[profile/ivi/json-glib.git] / json-glib / json-scanner.c
1 /* json-scanner.c: Tokenizer for JSON
2  * Copyright (C) 2008 OpenedHand
3  *
4  * Based on JsonScanner: Flexible lexical scanner for general purpose.
5  * Copyright (C) 1997, 1998 Tim Janik
6  *
7  * Modified by Emmanuele Bassi <ebassi@openedhand.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the
21  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22  * Boston, MA 02111-1307, USA.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #include <errno.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <string.h>
33 #include <stdio.h>
34 #ifdef HAVE_UNISTD_H
35 #include <unistd.h>
36 #endif
37
38 #include <glib.h>
39 #include <glib/gprintf.h>
40
41 #include "json-scanner.h"
42
43 #ifdef G_OS_WIN32
44 #include <io.h> /* For _read() */
45 #endif
46
47 struct _JsonScannerConfig
48 {
49   /* Character sets
50    */
51   gchar *cset_skip_characters; /* default: " \t\n" */
52   gchar *cset_identifier_first;
53   gchar *cset_identifier_nth;
54   gchar *cpair_comment_single; /* default: "#\n" */
55   
56   /* Should symbol lookup work case sensitive? */
57   guint case_sensitive : 1;
58   
59   /* Boolean values to be adjusted "on the fly"
60    * to configure scanning behaviour.
61    */
62   guint skip_comment_multi : 1;  /* C like comment */
63   guint skip_comment_single : 1; /* single line comment */
64   guint scan_comment_multi : 1;  /* scan multi line comments? */
65   guint scan_identifier : 1;
66   guint scan_identifier_1char : 1;
67   guint scan_identifier_NULL : 1;
68   guint scan_symbols : 1;
69   guint scan_binary : 1;
70   guint scan_octal : 1;
71   guint scan_float : 1;
72   guint scan_hex : 1;            /* `0x0ff0' */
73   guint scan_hex_dollar : 1;     /* `$0ff0' */
74   guint scan_string_sq : 1;      /* string: 'anything' */
75   guint scan_string_dq : 1;      /* string: "\\-escapes!\n" */
76   guint numbers_2_int : 1;       /* bin, octal, hex => int */
77   guint int_2_float : 1;         /* int => G_TOKEN_FLOAT? */
78   guint identifier_2_string : 1;
79   guint char_2_token : 1;        /* return G_TOKEN_CHAR? */
80   guint symbol_2_token : 1;
81   guint scope_0_fallback : 1;    /* try scope 0 on lookups? */
82   guint store_int64 : 1;         /* use value.v_int64 rather than v_int */
83   guint padding_dummy;
84 };
85
86 static JsonScannerConfig json_scanner_config_template =
87 {
88   ( " \t\r\n" )         /* cset_skip_characters */,
89   (
90    "_"
91    G_CSET_a_2_z
92    G_CSET_A_2_Z
93   )                     /* cset_identifier_first */,
94   (
95    G_CSET_DIGITS
96    "-_"
97    G_CSET_a_2_z
98    G_CSET_A_2_Z
99   )                     /* cset_identifier_nth */,
100   ( "//\n" )            /* cpair_comment_single */,
101   TRUE                  /* case_sensitive */,
102   TRUE                  /* skip_comment_multi */,
103   TRUE                  /* skip_comment_single */,
104   FALSE                 /* scan_comment_multi */,
105   TRUE                  /* scan_identifier */,
106   TRUE                  /* scan_identifier_1char */,
107   FALSE                 /* scan_identifier_NULL */,
108   TRUE                  /* scan_symbols */,
109   TRUE                  /* scan_binary */,
110   TRUE                  /* scan_octal */,
111   TRUE                  /* scan_float */,
112   TRUE                  /* scan_hex */,
113   TRUE                  /* scan_hex_dollar */,
114   TRUE                  /* scan_string_sq */,
115   TRUE                  /* scan_string_dq */,
116   TRUE                  /* numbers_2_int */,
117   FALSE                 /* int_2_float */,
118   FALSE                 /* identifier_2_string */,
119   TRUE                  /* char_2_token */,
120   TRUE                  /* symbol_2_token */,
121   FALSE                 /* scope_0_fallback */,
122   TRUE                  /* store_int64 */
123 };
124
125 /* --- defines --- */
126 #define to_lower(c)                             ( \
127         (guchar) (                                                      \
128           ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) |  \
129           ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) |  \
130           ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) |  \
131           ((guchar)(c))                                                 \
132         )                                                               \
133 )
134
135 #define READ_BUFFER_SIZE        (4000)
136
137 static const gchar json_symbol_names[] =
138   "true\0"
139   "false\0"
140   "null\0"
141   "var\0";
142
143 static const struct
144 {
145   guint name_offset;
146   guint token;
147 } json_symbols[] = {
148   {  0, JSON_TOKEN_TRUE },
149   {  5, JSON_TOKEN_FALSE },
150   { 11, JSON_TOKEN_NULL },
151   { 16, JSON_TOKEN_VAR }
152 };
153
154 static const guint n_json_symbols = G_N_ELEMENTS (json_symbols);
155
156 /* --- typedefs --- */
157 typedef struct  _JsonScannerKey JsonScannerKey;
158
159 struct  _JsonScannerKey
160 {
161   guint scope_id;
162   gchar *symbol;
163   gpointer value;
164 };
165
166 /* --- prototypes --- */
167 static gboolean json_scanner_key_equal (gconstpointer v1,
168                                         gconstpointer v2);
169 static guint    json_scanner_key_hash  (gconstpointer v);
170
171 static inline
172 JsonScannerKey *json_scanner_lookup_internal (JsonScanner *scanner,
173                                               guint        scope_id,
174                                               const gchar *symbol);
175 static void     json_scanner_get_token_ll    (JsonScanner *scanner,
176                                               GTokenType  *token_p,
177                                               GTokenValue *value_p,
178                                               guint       *line_p,
179                                               guint       *position_p);
180 static void     json_scanner_get_token_i     (JsonScanner *scanner,
181                                               GTokenType  *token_p,
182                                               GTokenValue *value_p,
183                                               guint       *line_p,
184                                               guint       *position_p);
185
186 static guchar   json_scanner_peek_next_char  (JsonScanner *scanner);
187 static guchar   json_scanner_get_char        (JsonScanner *scanner,
188                                               guint       *line_p,
189                                               guint       *position_p);
190 static gunichar json_scanner_get_unichar     (JsonScanner *scanner,
191                                               guint       *line_p,
192                                               guint       *position_p);
193 static void     json_scanner_msg_handler     (JsonScanner *scanner,
194                                               gchar       *message,
195                                               gboolean     is_error);
196
197 /* --- functions --- */
198 static inline gint
199 json_scanner_char_2_num (guchar c,
200                          guchar base)
201 {
202   if (c >= '0' && c <= '9')
203     c -= '0';
204   else if (c >= 'A' && c <= 'Z')
205     c -= 'A' - 10;
206   else if (c >= 'a' && c <= 'z')
207     c -= 'a' - 10;
208   else
209     return -1;
210   
211   if (c < base)
212     return c;
213   
214   return -1;
215 }
216
217 JsonScanner *
218 json_scanner_new (void)
219 {
220   JsonScanner *scanner;
221   JsonScannerConfig *config_templ;
222   
223   config_templ = &json_scanner_config_template;
224   
225   scanner = g_new0 (JsonScanner, 1);
226   
227   scanner->user_data = NULL;
228   scanner->max_parse_errors = 1;
229   scanner->parse_errors = 0;
230   scanner->input_name = NULL;
231   g_datalist_init (&scanner->qdata);
232   
233   scanner->config = g_new0 (JsonScannerConfig, 1);
234   
235   scanner->config->case_sensitive        = config_templ->case_sensitive;
236   scanner->config->cset_skip_characters  = config_templ->cset_skip_characters;
237   if (!scanner->config->cset_skip_characters)
238     scanner->config->cset_skip_characters = "";
239   scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
240   scanner->config->cset_identifier_nth   = config_templ->cset_identifier_nth;
241   scanner->config->cpair_comment_single  = config_templ->cpair_comment_single;
242   scanner->config->skip_comment_multi    = config_templ->skip_comment_multi;
243   scanner->config->skip_comment_single   = config_templ->skip_comment_single;
244   scanner->config->scan_comment_multi    = config_templ->scan_comment_multi;
245   scanner->config->scan_identifier       = config_templ->scan_identifier;
246   scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
247   scanner->config->scan_identifier_NULL  = config_templ->scan_identifier_NULL;
248   scanner->config->scan_symbols          = config_templ->scan_symbols;
249   scanner->config->scan_binary           = config_templ->scan_binary;
250   scanner->config->scan_octal            = config_templ->scan_octal;
251   scanner->config->scan_float            = config_templ->scan_float;
252   scanner->config->scan_hex              = config_templ->scan_hex;
253   scanner->config->scan_hex_dollar       = config_templ->scan_hex_dollar;
254   scanner->config->scan_string_sq        = config_templ->scan_string_sq;
255   scanner->config->scan_string_dq        = config_templ->scan_string_dq;
256   scanner->config->numbers_2_int         = config_templ->numbers_2_int;
257   scanner->config->int_2_float           = config_templ->int_2_float;
258   scanner->config->identifier_2_string   = config_templ->identifier_2_string;
259   scanner->config->char_2_token          = config_templ->char_2_token;
260   scanner->config->symbol_2_token        = config_templ->symbol_2_token;
261   scanner->config->scope_0_fallback      = config_templ->scope_0_fallback;
262   scanner->config->store_int64           = config_templ->store_int64;
263   
264   scanner->token = G_TOKEN_NONE;
265   scanner->value.v_int64 = 0;
266   scanner->line = 1;
267   scanner->position = 0;
268   
269   scanner->next_token = G_TOKEN_NONE;
270   scanner->next_value.v_int64 = 0;
271   scanner->next_line = 1;
272   scanner->next_position = 0;
273   
274   scanner->symbol_table = g_hash_table_new (json_scanner_key_hash,
275                                             json_scanner_key_equal);
276   scanner->input_fd = -1;
277   scanner->text = NULL;
278   scanner->text_end = NULL;
279   scanner->buffer = NULL;
280   scanner->scope_id = 0;
281   
282   scanner->msg_handler = json_scanner_msg_handler;
283   
284   return scanner;
285 }
286
287 static inline void
288 json_scanner_free_value (GTokenType  *token_p,
289                          GTokenValue *value_p)
290 {
291   switch (*token_p)
292     {
293     case G_TOKEN_STRING:
294     case G_TOKEN_IDENTIFIER:
295     case G_TOKEN_IDENTIFIER_NULL:
296     case G_TOKEN_COMMENT_SINGLE:
297     case G_TOKEN_COMMENT_MULTI:
298       g_free (value_p->v_string);
299       break;
300       
301     default:
302       break;
303     }
304   
305   *token_p = G_TOKEN_NONE;
306 }
307
308 static void
309 json_scanner_destroy_symbol_table_entry (gpointer _key,
310                                          gpointer _value,
311                                          gpointer _data)
312 {
313   JsonScannerKey *key = _key;
314   
315   g_free (key->symbol);
316   g_slice_free (JsonScannerKey, key);
317 }
318
319 void
320 json_scanner_destroy (JsonScanner *scanner)
321 {
322   g_return_if_fail (scanner != NULL);
323   
324   g_datalist_clear (&scanner->qdata);
325   g_hash_table_foreach (scanner->symbol_table, 
326                         json_scanner_destroy_symbol_table_entry,
327                         NULL);
328   g_hash_table_destroy (scanner->symbol_table);
329   json_scanner_free_value (&scanner->token, &scanner->value);
330   json_scanner_free_value (&scanner->next_token, &scanner->next_value);
331   g_free (scanner->config);
332   g_free (scanner->buffer);
333   g_free (scanner);
334 }
335
336 static void
337 json_scanner_msg_handler (JsonScanner *scanner,
338                           gchar       *message,
339                           gboolean     is_error)
340 {
341   g_return_if_fail (scanner != NULL);
342   
343   g_fprintf (stderr, "%s:%d: ",
344              scanner->input_name ? scanner->input_name : "<memory>",
345              scanner->line);
346   if (is_error)
347     g_fprintf (stderr, "error: ");
348
349   g_fprintf (stderr, "%s\n", message);
350 }
351
352 void
353 json_scanner_error (JsonScanner *scanner,
354                     const gchar *format,
355                     ...)
356 {
357   g_return_if_fail (scanner != NULL);
358   g_return_if_fail (format != NULL);
359   
360   scanner->parse_errors++;
361   
362   if (scanner->msg_handler)
363     {
364       va_list args;
365       gchar *string;
366       
367       va_start (args, format);
368       string = g_strdup_vprintf (format, args);
369       va_end (args);
370       
371       scanner->msg_handler (scanner, string, TRUE);
372       
373       g_free (string);
374     }
375 }
376
377 void
378 json_scanner_warn (JsonScanner *scanner,
379                    const gchar *format,
380                    ...)
381 {
382   g_return_if_fail (scanner != NULL);
383   g_return_if_fail (format != NULL);
384   
385   if (scanner->msg_handler)
386     {
387       va_list args;
388       gchar *string;
389       
390       va_start (args, format);
391       string = g_strdup_vprintf (format, args);
392       va_end (args);
393       
394       scanner->msg_handler (scanner, string, FALSE);
395       
396       g_free (string);
397     }
398 }
399
400 static gboolean
401 json_scanner_key_equal (gconstpointer v1,
402                         gconstpointer v2)
403 {
404   const JsonScannerKey *key1 = v1;
405   const JsonScannerKey *key2 = v2;
406   
407   return (key1->scope_id == key2->scope_id) &&
408          (strcmp (key1->symbol, key2->symbol) == 0);
409 }
410
411 static guint
412 json_scanner_key_hash (gconstpointer v)
413 {
414   const JsonScannerKey *key = v;
415   gchar *c;
416   guint h;
417   
418   h = key->scope_id;
419   for (c = key->symbol; *c; c++)
420     h = (h << 5) - h + *c;
421   
422   return h;
423 }
424
425 static inline JsonScannerKey *
426 json_scanner_lookup_internal (JsonScanner *scanner,
427                               guint        scope_id,
428                               const gchar *symbol)
429 {
430   JsonScannerKey *key_p;
431   JsonScannerKey key;
432   
433   key.scope_id = scope_id;
434   
435   if (!scanner->config->case_sensitive)
436     {
437       gchar *d;
438       const gchar *c;
439       
440       key.symbol = g_new (gchar, strlen (symbol) + 1);
441       for (d = key.symbol, c = symbol; *c; c++, d++)
442         *d = to_lower (*c);
443       *d = 0;
444       key_p = g_hash_table_lookup (scanner->symbol_table, &key);
445       g_free (key.symbol);
446     }
447   else
448     {
449       key.symbol = (gchar*) symbol;
450       key_p = g_hash_table_lookup (scanner->symbol_table, &key);
451     }
452   
453   return key_p;
454 }
455
456 void
457 json_scanner_scope_add_symbol (JsonScanner *scanner,
458                                guint        scope_id,
459                                const gchar *symbol,
460                                gpointer     value)
461 {
462   JsonScannerKey *key;
463
464   g_return_if_fail (scanner != NULL);
465   g_return_if_fail (symbol != NULL);
466
467   key = json_scanner_lookup_internal (scanner, scope_id, symbol);
468   if (!key)
469     {
470       key = g_slice_new (JsonScannerKey);
471       key->scope_id = scope_id;
472       key->symbol = g_strdup (symbol);
473       key->value = value;
474       if (!scanner->config->case_sensitive)
475         {
476           gchar *c;
477
478           c = key->symbol;
479           while (*c != 0)
480             {
481               *c = to_lower (*c);
482               c++;
483             }
484         }
485
486       g_hash_table_insert (scanner->symbol_table, key, key);
487     }
488   else
489     key->value = value;
490 }
491
492 void
493 json_scanner_scope_remove_symbol (JsonScanner *scanner,
494                                   guint        scope_id,
495                                   const gchar *symbol)
496 {
497   JsonScannerKey *key;
498
499   g_return_if_fail (scanner != NULL);
500   g_return_if_fail (symbol != NULL);
501
502   key = json_scanner_lookup_internal (scanner, scope_id, symbol);
503   if (key)
504     {
505       g_hash_table_remove (scanner->symbol_table, key);
506       g_free (key->symbol);
507       g_slice_free (JsonScannerKey, key);
508     }
509 }
510
511 gpointer
512 json_scanner_lookup_symbol (JsonScanner *scanner,
513                             const gchar *symbol)
514 {
515   JsonScannerKey *key;
516   guint scope_id;
517
518   g_return_val_if_fail (scanner != NULL, NULL);
519
520   if (!symbol)
521     return NULL;
522
523   scope_id = scanner->scope_id;
524   key = json_scanner_lookup_internal (scanner, scope_id, symbol);
525   if (!key && scope_id && scanner->config->scope_0_fallback)
526     key = json_scanner_lookup_internal (scanner, 0, symbol);
527
528   if (key)
529     return key->value;
530   else
531     return NULL;
532 }
533
534 gpointer
535 json_scanner_scope_lookup_symbol (JsonScanner *scanner,
536                                   guint        scope_id,
537                                   const gchar *symbol)
538 {
539   JsonScannerKey *key;
540
541   g_return_val_if_fail (scanner != NULL, NULL);
542
543   if (!symbol)
544     return NULL;
545
546   key = json_scanner_lookup_internal (scanner, scope_id, symbol);
547
548   if (key)
549     return key->value;
550   else
551     return NULL;
552 }
553
554 guint
555 json_scanner_set_scope (JsonScanner *scanner,
556                         guint        scope_id)
557 {
558   guint old_scope_id;
559
560   g_return_val_if_fail (scanner != NULL, 0);
561
562   old_scope_id = scanner->scope_id;
563   scanner->scope_id = scope_id;
564
565   return old_scope_id;
566 }
567
568 typedef struct {
569   GHFunc func;
570   gpointer data;
571   guint scope_id;
572 } ForeachClosure;
573
574 static void
575 json_scanner_foreach_internal (gpointer _key,
576                                gpointer _value,
577                                gpointer _user_data)
578 {
579   JsonScannerKey *key = _value;
580   ForeachClosure *closure = _user_data;
581
582   if (key->scope_id == closure->scope_id)
583     closure->func (key->symbol, key->value, closure->data);
584 }
585
586 void
587 json_scanner_scope_foreach_symbol (JsonScanner *scanner,
588                                    guint        scope_id,
589                                    GHFunc       func,
590                                    gpointer     user_data)
591 {
592   ForeachClosure closure;
593
594   g_return_if_fail (scanner != NULL);
595   g_return_if_fail (func != NULL);
596
597   closure.func = func;
598   closure.data = user_data;
599   closure.scope_id = scope_id;
600
601   g_hash_table_foreach (scanner->symbol_table,
602                         json_scanner_foreach_internal,
603                         &closure);
604 }
605
606 GTokenType
607 json_scanner_peek_next_token (JsonScanner *scanner)
608 {
609   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
610
611   if (scanner->next_token == G_TOKEN_NONE)
612     {
613       scanner->next_line = scanner->line;
614       scanner->next_position = scanner->position;
615       json_scanner_get_token_i (scanner,
616                                 &scanner->next_token,
617                                 &scanner->next_value,
618                                 &scanner->next_line,
619                                 &scanner->next_position);
620     }
621
622   return scanner->next_token;
623 }
624
625 GTokenType
626 json_scanner_get_next_token (JsonScanner *scanner)
627 {
628   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
629
630   if (scanner->next_token != G_TOKEN_NONE)
631     {
632       json_scanner_free_value (&scanner->token, &scanner->value);
633
634       scanner->token = scanner->next_token;
635       scanner->value = scanner->next_value;
636       scanner->line = scanner->next_line;
637       scanner->position = scanner->next_position;
638       scanner->next_token = G_TOKEN_NONE;
639     }
640   else
641     json_scanner_get_token_i (scanner,
642                               &scanner->token,
643                               &scanner->value,
644                               &scanner->line,
645                               &scanner->position);
646
647   return scanner->token;
648 }
649
650 GTokenType
651 json_scanner_cur_token (JsonScanner *scanner)
652 {
653   g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
654
655   return scanner->token;
656 }
657
658 GTokenValue
659 json_scanner_cur_value (JsonScanner *scanner)
660 {
661   GTokenValue v;
662
663   v.v_int64 = 0;
664
665   g_return_val_if_fail (scanner != NULL, v);
666
667   /* MSC isn't capable of handling return scanner->value; ? */
668
669   v = scanner->value;
670
671   return v;
672 }
673
674 guint
675 json_scanner_cur_line (JsonScanner *scanner)
676 {
677   g_return_val_if_fail (scanner != NULL, 0);
678
679   return scanner->line;
680 }
681
682 guint
683 json_scanner_cur_position (JsonScanner *scanner)
684 {
685   g_return_val_if_fail (scanner != NULL, 0);
686
687   return scanner->position;
688 }
689
690 gboolean
691 json_scanner_eof (JsonScanner *scanner)
692 {
693   g_return_val_if_fail (scanner != NULL, TRUE);
694
695   return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR;
696 }
697
698 void
699 json_scanner_input_file (JsonScanner *scanner,
700                          gint         input_fd)
701 {
702   g_return_if_fail (scanner != NULL);
703   g_return_if_fail (input_fd >= 0);
704
705   if (scanner->input_fd >= 0)
706     json_scanner_sync_file_offset (scanner);
707
708   scanner->token = G_TOKEN_NONE;
709   scanner->value.v_int64 = 0;
710   scanner->line = 1;
711   scanner->position = 0;
712   scanner->next_token = G_TOKEN_NONE;
713
714   scanner->input_fd = input_fd;
715   scanner->text = NULL;
716   scanner->text_end = NULL;
717
718   if (!scanner->buffer)
719     scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
720 }
721
722 void
723 json_scanner_input_text (JsonScanner *scanner,
724                          const gchar *text,
725                          guint        text_len)
726 {
727   g_return_if_fail (scanner != NULL);
728   if (text_len)
729     g_return_if_fail (text != NULL);
730   else
731     text = NULL;
732
733   if (scanner->input_fd >= 0)
734     json_scanner_sync_file_offset (scanner);
735
736   scanner->token = G_TOKEN_NONE;
737   scanner->value.v_int64 = 0;
738   scanner->line = 1;
739   scanner->position = 0;
740   scanner->next_token = G_TOKEN_NONE;
741
742   scanner->input_fd = -1;
743   scanner->text = text;
744   scanner->text_end = text + text_len;
745
746   if (scanner->buffer)
747     {
748       g_free (scanner->buffer);
749       scanner->buffer = NULL;
750     }
751 }
752
753 static guchar
754 json_scanner_peek_next_char (JsonScanner *scanner)
755 {
756   if (scanner->text < scanner->text_end)
757     return *scanner->text;
758   else if (scanner->input_fd >= 0)
759     {
760       gint count;
761       gchar *buffer;
762
763       buffer = scanner->buffer;
764       do
765         {
766           count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
767         }
768       while (count == -1 && (errno == EINTR || errno == EAGAIN));
769
770       if (count < 1)
771         {
772           scanner->input_fd = -1;
773
774           return 0;
775         }
776       else
777         {
778           scanner->text = buffer;
779           scanner->text_end = buffer + count;
780
781           return *buffer;
782         }
783     }
784   else
785     return 0;
786 }
787
788 void
789 json_scanner_sync_file_offset (JsonScanner *scanner)
790 {
791   g_return_if_fail (scanner != NULL);
792
793   /* for file input, rewind the filedescriptor to the current
794    * buffer position and blow the file read ahead buffer. useful
795    * for third party uses of our file descriptor, which hooks 
796    * onto the current scanning position.
797    */
798
799   if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
800     {
801       gint buffered;
802
803       buffered = scanner->text_end - scanner->text;
804       if (lseek (scanner->input_fd, - buffered, SEEK_CUR) >= 0)
805         {
806           /* we succeeded, blow our buffer's contents now */
807           scanner->text = NULL;
808           scanner->text_end = NULL;
809         }
810       else
811         errno = 0;
812     }
813 }
814
815 static guchar
816 json_scanner_get_char (JsonScanner *scanner,
817                        guint       *line_p,
818                        guint       *position_p)
819 {
820   guchar fchar;
821
822   if (scanner->text < scanner->text_end)
823     fchar = *(scanner->text++);
824   else if (scanner->input_fd >= 0)
825     {
826       gint count;
827       gchar *buffer;
828
829       buffer = scanner->buffer;
830       do
831         {
832           count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
833         }
834       while (count == -1 && (errno == EINTR || errno == EAGAIN));
835
836       if (count < 1)
837         {
838           scanner->input_fd = -1;
839           fchar = 0;
840         }
841       else
842         {
843           scanner->text = buffer + 1;
844           scanner->text_end = buffer + count;
845           fchar = *buffer;
846           if (!fchar)
847             {
848               json_scanner_sync_file_offset (scanner);
849               scanner->text_end = scanner->text;
850               scanner->input_fd = -1;
851             }
852         }
853     }
854   else
855     fchar = 0;
856   
857   if (fchar == '\n')
858     {
859       (*position_p) = 0;
860       (*line_p)++;
861     }
862   else if (fchar)
863     {
864       (*position_p)++;
865     }
866   
867   return fchar;
868 }
869
870 #define is_hex_digit(c)         (((c) >= '0' && (c) <= '9') || \
871                                  ((c) >= 'a' && (c) <= 'f') || \
872                                  ((c) >= 'A' && (c) <= 'F'))
873 #define to_hex_digit(c)         (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9)
874
875 static gunichar
876 json_scanner_get_unichar (JsonScanner *scanner,
877                           guint       *line_p,
878                           guint       *position_p)
879 {
880   gunichar uchar;
881   gchar ch;
882   gint i;
883
884   uchar = 0;
885   for (i = 0; i < 4; i++)
886     {
887       ch = json_scanner_get_char (scanner, line_p, position_p);
888
889       if (is_hex_digit (ch))
890         uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4));
891       else
892         break;
893     }
894
895   g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE);
896
897   return uchar;
898 }
899
900 void
901 json_scanner_unexp_token (JsonScanner *scanner,
902                           GTokenType   expected_token,
903                           const gchar *identifier_spec,
904                           const gchar *symbol_spec,
905                           const gchar *symbol_name,
906                           const gchar *message,
907                           gint         is_error)
908 {
909   gchar *token_string;
910   guint token_string_len;
911   gchar *expected_string;
912   guint expected_string_len;
913   gchar *message_prefix;
914   gboolean print_unexp;
915   void (*msg_handler) (JsonScanner*, const gchar*, ...);
916   
917   g_return_if_fail (scanner != NULL);
918   
919   if (is_error)
920     msg_handler = json_scanner_error;
921   else
922     msg_handler = json_scanner_warn;
923   
924   if (!identifier_spec)
925     identifier_spec = "identifier";
926   if (!symbol_spec)
927     symbol_spec = "symbol";
928   
929   token_string_len = 56;
930   token_string = g_new (gchar, token_string_len + 1);
931   expected_string_len = 64;
932   expected_string = g_new (gchar, expected_string_len + 1);
933   print_unexp = TRUE;
934   
935   switch (scanner->token)
936     {
937     case G_TOKEN_EOF:
938       g_snprintf (token_string, token_string_len, "end of file");
939       break;
940       
941     default:
942       if (scanner->token >= 1 && scanner->token <= 255)
943         {
944           if ((scanner->token >= ' ' && scanner->token <= '~') ||
945               strchr (scanner->config->cset_identifier_first, scanner->token) ||
946               strchr (scanner->config->cset_identifier_nth, scanner->token))
947             g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
948           else
949             g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
950           break;
951         }
952       else if (!scanner->config->symbol_2_token)
953         {
954           g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
955           break;
956         }
957       /* fall through */
958     case G_TOKEN_SYMBOL:
959       if (expected_token == G_TOKEN_SYMBOL ||
960           (scanner->config->symbol_2_token &&
961            expected_token > G_TOKEN_LAST))
962         print_unexp = FALSE;
963       if (symbol_name)
964         g_snprintf (token_string, token_string_len,
965                     "%s%s `%s'",
966                     print_unexp ? "" : "invalid ",
967                     symbol_spec,
968                     symbol_name);
969       else
970         g_snprintf (token_string, token_string_len,
971                     "%s%s",
972                     print_unexp ? "" : "invalid ",
973                     symbol_spec);
974       break;
975  
976     case G_TOKEN_ERROR:
977       print_unexp = FALSE;
978       expected_token = G_TOKEN_NONE;
979       switch (scanner->value.v_error)
980         {
981         case G_ERR_UNEXP_EOF:
982           g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
983           break;
984           
985         case G_ERR_UNEXP_EOF_IN_STRING:
986           g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
987           break;
988           
989         case G_ERR_UNEXP_EOF_IN_COMMENT:
990           g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
991           break;
992           
993         case G_ERR_NON_DIGIT_IN_CONST:
994           g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
995           break;
996           
997         case G_ERR_FLOAT_RADIX:
998           g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
999           break;
1000           
1001         case G_ERR_FLOAT_MALFORMED:
1002           g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
1003           break;
1004           
1005         case G_ERR_DIGIT_RADIX:
1006           g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
1007           break;
1008           
1009         case G_ERR_UNKNOWN:
1010         default:
1011           g_snprintf (token_string, token_string_len, "scanner: unknown error");
1012           break;
1013         }
1014       break;
1015       
1016     case G_TOKEN_CHAR:
1017       g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
1018       break;
1019       
1020     case G_TOKEN_IDENTIFIER:
1021     case G_TOKEN_IDENTIFIER_NULL:
1022       if (expected_token == G_TOKEN_IDENTIFIER ||
1023           expected_token == G_TOKEN_IDENTIFIER_NULL)
1024         print_unexp = FALSE;
1025       g_snprintf (token_string, token_string_len,
1026                   "%s%s `%s'",
1027                   print_unexp ? "" : "invalid ",
1028                   identifier_spec,
1029                   scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
1030       break;
1031       
1032     case G_TOKEN_BINARY:
1033     case G_TOKEN_OCTAL:
1034     case G_TOKEN_INT:
1035     case G_TOKEN_HEX:
1036       if (scanner->config->store_int64)
1037         g_snprintf (token_string, token_string_len, "number `%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
1038       else
1039         g_snprintf (token_string, token_string_len, "number `%lu'", scanner->value.v_int);
1040       break;
1041       
1042     case G_TOKEN_FLOAT:
1043       g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
1044       break;
1045       
1046     case G_TOKEN_STRING:
1047       if (expected_token == G_TOKEN_STRING)
1048         print_unexp = FALSE;
1049       g_snprintf (token_string, token_string_len,
1050                   "%s%sstring constant \"%s\"",
1051                   print_unexp ? "" : "invalid ",
1052                   scanner->value.v_string[0] == 0 ? "empty " : "",
1053                   scanner->value.v_string);
1054       token_string[token_string_len - 2] = '"';
1055       token_string[token_string_len - 1] = 0;
1056       break;
1057       
1058     case G_TOKEN_COMMENT_SINGLE:
1059     case G_TOKEN_COMMENT_MULTI:
1060       g_snprintf (token_string, token_string_len, "comment");
1061       break;
1062       
1063     case G_TOKEN_NONE:
1064       /* somehow the user's parsing code is screwed, there isn't much
1065        * we can do about it.
1066        * Note, a common case to trigger this is
1067        * json_scanner_peek_next_token(); json_scanner_unexp_token();
1068        * without an intermediate json_scanner_get_next_token().
1069        */
1070       g_assert_not_reached ();
1071       break;
1072     }
1073   
1074   
1075   switch (expected_token)
1076     {
1077       gboolean need_valid;
1078       gchar *tstring;
1079     case G_TOKEN_EOF:
1080       g_snprintf (expected_string, expected_string_len, "end of file");
1081       break;
1082     default:
1083       if (expected_token >= 1 && expected_token <= 255)
1084         {
1085           if ((expected_token >= ' ' && expected_token <= '~') ||
1086               strchr (scanner->config->cset_identifier_first, expected_token) ||
1087               strchr (scanner->config->cset_identifier_nth, expected_token))
1088             g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
1089           else
1090             g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
1091           break;
1092         }
1093       else if (!scanner->config->symbol_2_token)
1094         {
1095           g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
1096           break;
1097         }
1098       /* fall through */
1099     case G_TOKEN_SYMBOL:
1100       need_valid = (scanner->token == G_TOKEN_SYMBOL ||
1101                     (scanner->config->symbol_2_token &&
1102                      scanner->token > G_TOKEN_LAST));
1103       g_snprintf (expected_string, expected_string_len,
1104                   "%s%s",
1105                   need_valid ? "valid " : "",
1106                   symbol_spec);
1107       /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
1108       break;
1109     case G_TOKEN_CHAR:
1110       g_snprintf (expected_string, expected_string_len, "%scharacter",
1111                   scanner->token == G_TOKEN_CHAR ? "valid " : "");
1112       break;
1113     case G_TOKEN_BINARY:
1114       tstring = "binary";
1115       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1116                   scanner->token == expected_token ? "valid " : "", tstring);
1117       break;
1118     case G_TOKEN_OCTAL:
1119       tstring = "octal";
1120       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1121                   scanner->token == expected_token ? "valid " : "", tstring);
1122       break;
1123     case G_TOKEN_INT:
1124       tstring = "integer";
1125       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1126                   scanner->token == expected_token ? "valid " : "", tstring);
1127       break;
1128     case G_TOKEN_HEX:
1129       tstring = "hexadecimal";
1130       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1131                   scanner->token == expected_token ? "valid " : "", tstring);
1132       break;
1133     case G_TOKEN_FLOAT:
1134       tstring = "float";
1135       g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1136                   scanner->token == expected_token ? "valid " : "", tstring);
1137       break;
1138     case G_TOKEN_STRING:
1139       g_snprintf (expected_string,
1140                   expected_string_len,
1141                   "%sstring constant",
1142                   scanner->token == G_TOKEN_STRING ? "valid " : "");
1143       break;
1144     case G_TOKEN_IDENTIFIER:
1145     case G_TOKEN_IDENTIFIER_NULL:
1146       need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
1147                     scanner->token == G_TOKEN_IDENTIFIER);
1148       g_snprintf (expected_string,
1149                   expected_string_len,
1150                   "%s%s",
1151                   need_valid ? "valid " : "",
1152                   identifier_spec);
1153       break;
1154     case G_TOKEN_COMMENT_SINGLE:
1155       tstring = "single-line";
1156       g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1157                   scanner->token == expected_token ? "valid " : "", tstring);
1158       break;
1159     case G_TOKEN_COMMENT_MULTI:
1160       tstring = "multi-line";
1161       g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1162                   scanner->token == expected_token ? "valid " : "", tstring);
1163       break;
1164     case G_TOKEN_NONE:
1165     case G_TOKEN_ERROR:
1166       /* this is handled upon printout */
1167       break;
1168     }
1169   
1170   if (message && message[0] != 0)
1171     message_prefix = " - ";
1172   else
1173     {
1174       message_prefix = "";
1175       message = "";
1176     }
1177   if (expected_token == G_TOKEN_ERROR)
1178     {
1179       msg_handler (scanner,
1180                    "failure around %s%s%s",
1181                    token_string,
1182                    message_prefix,
1183                    message);
1184     }
1185   else if (expected_token == G_TOKEN_NONE)
1186     {
1187       if (print_unexp)
1188         msg_handler (scanner,
1189                      "unexpected %s%s%s",
1190                      token_string,
1191                      message_prefix,
1192                      message);
1193       else
1194         msg_handler (scanner,
1195                      "%s%s%s",
1196                      token_string,
1197                      message_prefix,
1198                      message);
1199     }
1200   else
1201     {
1202       if (print_unexp)
1203         msg_handler (scanner,
1204                      "unexpected %s, expected %s%s%s",
1205                      token_string,
1206                      expected_string,
1207                      message_prefix,
1208                      message);
1209       else
1210         msg_handler (scanner,
1211                      "%s, expected %s%s%s",
1212                      token_string,
1213                      expected_string,
1214                      message_prefix,
1215                      message);
1216     }
1217   
1218   g_free (token_string);
1219   g_free (expected_string);
1220 }
1221
1222 static void
1223 json_scanner_get_token_i (JsonScanner   *scanner,
1224                        GTokenType       *token_p,
1225                        GTokenValue      *value_p,
1226                        guint            *line_p,
1227                        guint            *position_p)
1228 {
1229   do
1230     {
1231       json_scanner_free_value (token_p, value_p);
1232       json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
1233     }
1234   while (((*token_p > 0 && *token_p < 256) &&
1235           strchr (scanner->config->cset_skip_characters, *token_p)) ||
1236          (*token_p == G_TOKEN_CHAR &&
1237           strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
1238          (*token_p == G_TOKEN_COMMENT_MULTI &&
1239           scanner->config->skip_comment_multi) ||
1240          (*token_p == G_TOKEN_COMMENT_SINGLE &&
1241           scanner->config->skip_comment_single));
1242   
1243   switch (*token_p)
1244     {
1245     case G_TOKEN_IDENTIFIER:
1246       if (scanner->config->identifier_2_string)
1247         *token_p = G_TOKEN_STRING;
1248       break;
1249       
1250     case G_TOKEN_SYMBOL:
1251       if (scanner->config->symbol_2_token)
1252         *token_p = (GTokenType) value_p->v_symbol;
1253       break;
1254       
1255     case G_TOKEN_BINARY:
1256     case G_TOKEN_OCTAL:
1257     case G_TOKEN_HEX:
1258       if (scanner->config->numbers_2_int)
1259         *token_p = G_TOKEN_INT;
1260       break;
1261       
1262     default:
1263       break;
1264     }
1265   
1266   if (*token_p == G_TOKEN_INT &&
1267       scanner->config->int_2_float)
1268     {
1269       *token_p = G_TOKEN_FLOAT;
1270       if (scanner->config->store_int64)
1271         {
1272 #ifdef _MSC_VER
1273           /* work around error C2520, see gvaluetransform.c */
1274           value_p->v_float = (__int64)value_p->v_int64;
1275 #else
1276           value_p->v_float = value_p->v_int64;
1277 #endif
1278         }
1279       else
1280         value_p->v_float = value_p->v_int;
1281     }
1282   
1283   errno = 0;
1284 }
1285
1286 static void
1287 json_scanner_get_token_ll (JsonScanner *scanner,
1288                            GTokenType  *token_p,
1289                            GTokenValue *value_p,
1290                            guint       *line_p,
1291                            guint       *position_p)
1292 {
1293   JsonScannerConfig *config;
1294   GTokenType       token;
1295   gboolean         in_comment_multi;
1296   gboolean         in_comment_single;
1297   gboolean         in_string_sq;
1298   gboolean         in_string_dq;
1299   GString         *gstring;
1300   GTokenValue      value;
1301   guchar           ch;
1302   
1303   config = scanner->config;
1304   (*value_p).v_int64 = 0;
1305   
1306   if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
1307       scanner->token == G_TOKEN_EOF)
1308     {
1309       *token_p = G_TOKEN_EOF;
1310       return;
1311     }
1312   
1313   in_comment_multi = FALSE;
1314   in_comment_single = FALSE;
1315   in_string_sq = FALSE;
1316   in_string_dq = FALSE;
1317   gstring = NULL;
1318   
1319   do /* while (ch != 0) */
1320     {
1321       gboolean dotted_float = FALSE;
1322       
1323       ch = json_scanner_get_char (scanner, line_p, position_p);
1324       
1325       value.v_int64 = 0;
1326       token = G_TOKEN_NONE;
1327       
1328       /* this is *evil*, but needed ;(
1329        * we first check for identifier first character, because  it
1330        * might interfere with other key chars like slashes or numbers
1331        */
1332       if (config->scan_identifier &&
1333           ch && strchr (config->cset_identifier_first, ch))
1334         goto identifier_precedence;
1335       
1336       switch (ch)
1337         {
1338         case 0:
1339           token = G_TOKEN_EOF;
1340           (*position_p)++;
1341           /* ch = 0; */
1342           break;
1343           
1344         case '/':
1345           if (!config->scan_comment_multi ||
1346               json_scanner_peek_next_char (scanner) != '*')
1347             goto default_case;
1348           json_scanner_get_char (scanner, line_p, position_p);
1349           token = G_TOKEN_COMMENT_MULTI;
1350           in_comment_multi = TRUE;
1351           gstring = g_string_new (NULL);
1352           while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1353             {
1354               if (ch == '*' && json_scanner_peek_next_char (scanner) == '/')
1355                 {
1356                   json_scanner_get_char (scanner, line_p, position_p);
1357                   in_comment_multi = FALSE;
1358                   break;
1359                 }
1360               else
1361                 gstring = g_string_append_c (gstring, ch);
1362             }
1363           ch = 0;
1364           break;
1365           
1366         case '\'':
1367           if (!config->scan_string_sq)
1368             goto default_case;
1369           token = G_TOKEN_STRING;
1370           in_string_sq = TRUE;
1371           gstring = g_string_new (NULL);
1372           while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1373             {
1374               if (ch == '\'')
1375                 {
1376                   in_string_sq = FALSE;
1377                   break;
1378                 }
1379               else
1380                 gstring = g_string_append_c (gstring, ch);
1381             }
1382           ch = 0;
1383           break;
1384           
1385         case '"':
1386           if (!config->scan_string_dq)
1387             goto default_case;
1388           token = G_TOKEN_STRING;
1389           in_string_dq = TRUE;
1390           gstring = g_string_new (NULL);
1391           while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1392             {
1393               if (ch == '"')
1394                 {
1395                   in_string_dq = FALSE;
1396                   break;
1397                 }
1398               else
1399                 {
1400                   if (ch == '\\')
1401                     {
1402                       ch = json_scanner_get_char (scanner, line_p, position_p);
1403                       switch (ch)
1404                         {
1405                           guint i;
1406                           guint fchar;
1407                           
1408                         case 0:
1409                           break;
1410                           
1411                         case '\\':
1412                           gstring = g_string_append_c (gstring, '\\');
1413                           break;
1414                           
1415                         case 'n':
1416                           gstring = g_string_append_c (gstring, '\n');
1417                           break;
1418                           
1419                         case 't':
1420                           gstring = g_string_append_c (gstring, '\t');
1421                           break;
1422                           
1423                         case 'r':
1424                           gstring = g_string_append_c (gstring, '\r');
1425                           break;
1426                           
1427                         case 'b':
1428                           gstring = g_string_append_c (gstring, '\b');
1429                           break;
1430                           
1431                         case 'f':
1432                           gstring = g_string_append_c (gstring, '\f');
1433                           break;
1434
1435                         case 'u':
1436                           fchar = json_scanner_peek_next_char (scanner);
1437                           if (is_hex_digit (fchar))
1438                             {
1439                               gunichar ucs;
1440
1441                               ucs = json_scanner_get_unichar (scanner, line_p, position_p);
1442
1443                               if (g_unichar_type (ucs) == G_UNICODE_SURROGATE)
1444                                 {
1445                                   /* read next surrogate */
1446                                   if ('\\' == json_scanner_get_char (scanner, line_p, position_p)
1447                                       && 'u' == json_scanner_get_char (scanner, line_p, position_p))
1448                                     {
1449                                       gunichar ucs_lo = json_scanner_get_unichar (scanner, line_p, position_p);
1450                                       g_assert (g_unichar_type (ucs_lo) == G_UNICODE_SURROGATE);
1451                                       ucs = (((ucs & 0x3ff) << 10) | (ucs_lo & 0x3ff)) + 0x10000;
1452                                     }
1453                                 }
1454
1455                               g_assert (g_unichar_validate (ucs));
1456                               gstring = g_string_append_unichar (gstring, ucs);
1457                             }
1458                           break;
1459                           
1460                         case '0':
1461                         case '1':
1462                         case '2':
1463                         case '3':
1464                         case '4':
1465                         case '5':
1466                         case '6':
1467                         case '7':
1468                           i = ch - '0';
1469                           fchar = json_scanner_peek_next_char (scanner);
1470                           if (fchar >= '0' && fchar <= '7')
1471                             {
1472                               ch = json_scanner_get_char (scanner, line_p, position_p);
1473                               i = i * 8 + ch - '0';
1474                               fchar = json_scanner_peek_next_char (scanner);
1475                               if (fchar >= '0' && fchar <= '7')
1476                                 {
1477                                   ch = json_scanner_get_char (scanner, line_p, position_p);
1478                                   i = i * 8 + ch - '0';
1479                                 }
1480                             }
1481                           gstring = g_string_append_c (gstring, i);
1482                           break;
1483                           
1484                         default:
1485                           gstring = g_string_append_c (gstring, ch);
1486                           break;
1487                         }
1488                     }
1489                   else
1490                     gstring = g_string_append_c (gstring, ch);
1491                 }
1492             }
1493           ch = 0;
1494           break;
1495           
1496         case '.':
1497           if (!config->scan_float)
1498             goto default_case;
1499           token = G_TOKEN_FLOAT;
1500           dotted_float = TRUE;
1501           ch = json_scanner_get_char (scanner, line_p, position_p);
1502           goto number_parsing;
1503           
1504         case '$':
1505           if (!config->scan_hex_dollar)
1506             goto default_case;
1507           token = G_TOKEN_HEX;
1508           ch = json_scanner_get_char (scanner, line_p, position_p);
1509           goto number_parsing;
1510           
1511         case '0':
1512           if (config->scan_octal)
1513             token = G_TOKEN_OCTAL;
1514           else
1515             token = G_TOKEN_INT;
1516           ch = json_scanner_peek_next_char (scanner);
1517           if (config->scan_hex && (ch == 'x' || ch == 'X'))
1518             {
1519               token = G_TOKEN_HEX;
1520               json_scanner_get_char (scanner, line_p, position_p);
1521               ch = json_scanner_get_char (scanner, line_p, position_p);
1522               if (ch == 0)
1523                 {
1524                   token = G_TOKEN_ERROR;
1525                   value.v_error = G_ERR_UNEXP_EOF;
1526                   (*position_p)++;
1527                   break;
1528                 }
1529               if (json_scanner_char_2_num (ch, 16) < 0)
1530                 {
1531                   token = G_TOKEN_ERROR;
1532                   value.v_error = G_ERR_DIGIT_RADIX;
1533                   ch = 0;
1534                   break;
1535                 }
1536             }
1537           else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1538             {
1539               token = G_TOKEN_BINARY;
1540               json_scanner_get_char (scanner, line_p, position_p);
1541               ch = json_scanner_get_char (scanner, line_p, position_p);
1542               if (ch == 0)
1543                 {
1544                   token = G_TOKEN_ERROR;
1545                   value.v_error = G_ERR_UNEXP_EOF;
1546                   (*position_p)++;
1547                   break;
1548                 }
1549               if (json_scanner_char_2_num (ch, 10) < 0)
1550                 {
1551                   token = G_TOKEN_ERROR;
1552                   value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1553                   ch = 0;
1554                   break;
1555                 }
1556             }
1557           else
1558             ch = '0';
1559           /* fall through */
1560         case '1':
1561         case '2':
1562         case '3':
1563         case '4':
1564         case '5':
1565         case '6':
1566         case '7':
1567         case '8':
1568         case '9':
1569         number_parsing:
1570         {
1571           gboolean in_number = TRUE;
1572           gchar *endptr;
1573           
1574           if (token == G_TOKEN_NONE)
1575             token = G_TOKEN_INT;
1576           
1577           gstring = g_string_new (dotted_float ? "0." : "");
1578           gstring = g_string_append_c (gstring, ch);
1579           
1580           do /* while (in_number) */
1581             {
1582               gboolean is_E;
1583               
1584               is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1585               
1586               ch = json_scanner_peek_next_char (scanner);
1587               
1588               if (json_scanner_char_2_num (ch, 36) >= 0 ||
1589                   (config->scan_float && ch == '.') ||
1590                   (is_E && (ch == '+' || ch == '-')))
1591                 {
1592                   ch = json_scanner_get_char (scanner, line_p, position_p);
1593                   
1594                   switch (ch)
1595                     {
1596                     case '.':
1597                       if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1598                         {
1599                           value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1600                           token = G_TOKEN_ERROR;
1601                           in_number = FALSE;
1602                         }
1603                       else
1604                         {
1605                           token = G_TOKEN_FLOAT;
1606                           gstring = g_string_append_c (gstring, ch);
1607                         }
1608                       break;
1609                       
1610                     case '0':
1611                     case '1':
1612                     case '2':
1613                     case '3':
1614                     case '4':
1615                     case '5':
1616                     case '6':
1617                     case '7':
1618                     case '8':
1619                     case '9':
1620                       gstring = g_string_append_c (gstring, ch);
1621                       break;
1622                       
1623                     case '-':
1624                     case '+':
1625                       if (token != G_TOKEN_FLOAT)
1626                         {
1627                           token = G_TOKEN_ERROR;
1628                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1629                           in_number = FALSE;
1630                         }
1631                       else
1632                         gstring = g_string_append_c (gstring, ch);
1633                       break;
1634                       
1635                     case 'e':
1636                     case 'E':
1637                       if ((token != G_TOKEN_HEX && !config->scan_float) ||
1638                           (token != G_TOKEN_HEX &&
1639                            token != G_TOKEN_OCTAL &&
1640                            token != G_TOKEN_FLOAT &&
1641                            token != G_TOKEN_INT))
1642                         {
1643                           token = G_TOKEN_ERROR;
1644                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1645                           in_number = FALSE;
1646                         }
1647                       else
1648                         {
1649                           if (token != G_TOKEN_HEX)
1650                             token = G_TOKEN_FLOAT;
1651                           gstring = g_string_append_c (gstring, ch);
1652                         }
1653                       break;
1654                       
1655                     default:
1656                       if (token != G_TOKEN_HEX)
1657                         {
1658                           token = G_TOKEN_ERROR;
1659                           value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1660                           in_number = FALSE;
1661                         }
1662                       else
1663                         gstring = g_string_append_c (gstring, ch);
1664                       break;
1665                     }
1666                 }
1667               else
1668                 in_number = FALSE;
1669             }
1670           while (in_number);
1671           
1672           endptr = NULL;
1673           if (token == G_TOKEN_FLOAT)
1674             value.v_float = g_strtod (gstring->str, &endptr);
1675           else
1676             {
1677               guint64 ui64 = 0;
1678               switch (token)
1679                 {
1680                 case G_TOKEN_BINARY:
1681                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
1682                   break;
1683                 case G_TOKEN_OCTAL:
1684                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
1685                   break;
1686                 case G_TOKEN_INT:
1687                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
1688                   break;
1689                 case G_TOKEN_HEX:
1690                   ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
1691                   break;
1692                 default: ;
1693                 }
1694               if (scanner->config->store_int64)
1695                 value.v_int64 = ui64;
1696               else
1697                 value.v_int = ui64;
1698             }
1699           if (endptr && *endptr)
1700             {
1701               token = G_TOKEN_ERROR;
1702               if (*endptr == 'e' || *endptr == 'E')
1703                 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1704               else
1705                 value.v_error = G_ERR_DIGIT_RADIX;
1706             }
1707           g_string_free (gstring, TRUE);
1708           gstring = NULL;
1709           ch = 0;
1710         } /* number_parsing:... */
1711         break;
1712         
1713         default:
1714         default_case:
1715         {
1716           if (config->cpair_comment_single &&
1717               ch == config->cpair_comment_single[0])
1718             {
1719               token = G_TOKEN_COMMENT_SINGLE;
1720               in_comment_single = TRUE;
1721               gstring = g_string_new (NULL);
1722               ch = json_scanner_get_char (scanner, line_p, position_p);
1723               while (ch != 0)
1724                 {
1725                   if (ch == config->cpair_comment_single[1])
1726                     {
1727                       in_comment_single = FALSE;
1728                       ch = 0;
1729                       break;
1730                     }
1731                   
1732                   gstring = g_string_append_c (gstring, ch);
1733                   ch = json_scanner_get_char (scanner, line_p, position_p);
1734                 }
1735               /* ignore a missing newline at EOF for single line comments */
1736               if (in_comment_single &&
1737                   config->cpair_comment_single[1] == '\n')
1738                 in_comment_single = FALSE;
1739             }
1740           else if (config->scan_identifier && ch &&
1741                    strchr (config->cset_identifier_first, ch))
1742             {
1743             identifier_precedence:
1744               
1745               if (config->cset_identifier_nth && ch &&
1746                   strchr (config->cset_identifier_nth,
1747                           json_scanner_peek_next_char (scanner)))
1748                 {
1749                   token = G_TOKEN_IDENTIFIER;
1750                   gstring = g_string_new (NULL);
1751                   gstring = g_string_append_c (gstring, ch);
1752                   do
1753                     {
1754                       ch = json_scanner_get_char (scanner, line_p, position_p);
1755                       gstring = g_string_append_c (gstring, ch);
1756                       ch = json_scanner_peek_next_char (scanner);
1757                     }
1758                   while (ch && strchr (config->cset_identifier_nth, ch));
1759                   ch = 0;
1760                 }
1761               else if (config->scan_identifier_1char)
1762                 {
1763                   token = G_TOKEN_IDENTIFIER;
1764                   value.v_identifier = g_new0 (gchar, 2);
1765                   value.v_identifier[0] = ch;
1766                   ch = 0;
1767                 }
1768             }
1769           if (ch)
1770             {
1771               if (config->char_2_token)
1772                 token = ch;
1773               else
1774                 {
1775                   token = G_TOKEN_CHAR;
1776                   value.v_char = ch;
1777                 }
1778               ch = 0;
1779             }
1780         } /* default_case:... */
1781         break;
1782         }
1783       g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
1784     }
1785   while (ch != 0);
1786   
1787   if (in_comment_multi || in_comment_single ||
1788       in_string_sq || in_string_dq)
1789     {
1790       token = G_TOKEN_ERROR;
1791       if (gstring)
1792         {
1793           g_string_free (gstring, TRUE);
1794           gstring = NULL;
1795         }
1796       (*position_p)++;
1797       if (in_comment_multi || in_comment_single)
1798         value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1799       else /* (in_string_sq || in_string_dq) */
1800         value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1801     }
1802   
1803   if (gstring)
1804     {
1805       value.v_string = g_string_free (gstring, FALSE);
1806       gstring = NULL;
1807     }
1808   
1809   if (token == G_TOKEN_IDENTIFIER)
1810     {
1811       if (config->scan_symbols)
1812         {
1813           JsonScannerKey *key;
1814           guint scope_id;
1815           
1816           scope_id = scanner->scope_id;
1817           key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
1818           if (!key && scope_id && scanner->config->scope_0_fallback)
1819             key = json_scanner_lookup_internal (scanner, 0, value.v_identifier);
1820           
1821           if (key)
1822             {
1823               g_free (value.v_identifier);
1824               token = G_TOKEN_SYMBOL;
1825               value.v_symbol = key->value;
1826             }
1827         }
1828       
1829       if (token == G_TOKEN_IDENTIFIER &&
1830           config->scan_identifier_NULL &&
1831           strlen (value.v_identifier) == 4)
1832         {
1833           gchar *null_upper = "NULL";
1834           gchar *null_lower = "null";
1835           
1836           if (scanner->config->case_sensitive)
1837             {
1838               if (value.v_identifier[0] == null_upper[0] &&
1839                   value.v_identifier[1] == null_upper[1] &&
1840                   value.v_identifier[2] == null_upper[2] &&
1841                   value.v_identifier[3] == null_upper[3])
1842                 token = G_TOKEN_IDENTIFIER_NULL;
1843             }
1844           else
1845             {
1846               if ((value.v_identifier[0] == null_upper[0] ||
1847                    value.v_identifier[0] == null_lower[0]) &&
1848                   (value.v_identifier[1] == null_upper[1] ||
1849                    value.v_identifier[1] == null_lower[1]) &&
1850                   (value.v_identifier[2] == null_upper[2] ||
1851                    value.v_identifier[2] == null_lower[2]) &&
1852                   (value.v_identifier[3] == null_upper[3] ||
1853                    value.v_identifier[3] == null_lower[3]))
1854                 token = G_TOKEN_IDENTIFIER_NULL;
1855             }
1856         }
1857     }
1858   
1859   *token_p = token;
1860   *value_p = value;
1861 }