1 /* json-scanner.c: Tokenizer for JSON
2 * Copyright (C) 2008 OpenedHand
4 * Based on JsonScanner: Flexible lexical scanner for general purpose.
5 * Copyright (C) 1997, 1998 Tim Janik
7 * Modified by Emmanuele Bassi <ebassi@openedhand.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
35 #include <glib/gprintf.h>
37 #include "json-scanner.h"
40 #include <io.h> /* For _read() */
43 struct _JsonScannerConfig
47 gchar *cset_skip_characters; /* default: " \t\n" */
48 gchar *cset_identifier_first;
49 gchar *cset_identifier_nth;
50 gchar *cpair_comment_single; /* default: "#\n" */
52 /* Should symbol lookup work case sensitive? */
53 guint case_sensitive : 1;
55 /* Boolean values to be adjusted "on the fly"
56 * to configure scanning behaviour.
58 guint skip_comment_multi : 1; /* C like comment */
59 guint skip_comment_single : 1; /* single line comment */
60 guint scan_comment_multi : 1; /* scan multi line comments? */
61 guint scan_identifier : 1;
62 guint scan_identifier_1char : 1;
63 guint scan_identifier_NULL : 1;
64 guint scan_symbols : 1;
65 guint scan_binary : 1;
68 guint scan_hex : 1; /* `0x0ff0' */
69 guint scan_hex_dollar : 1; /* `$0ff0' */
70 guint scan_string_sq : 1; /* string: 'anything' */
71 guint scan_string_dq : 1; /* string: "\\-escapes!\n" */
72 guint numbers_2_int : 1; /* bin, octal, hex => int */
73 guint int_2_float : 1; /* int => G_TOKEN_FLOAT? */
74 guint identifier_2_string : 1;
75 guint char_2_token : 1; /* return G_TOKEN_CHAR? */
76 guint symbol_2_token : 1;
77 guint scope_0_fallback : 1; /* try scope 0 on lookups? */
78 guint store_int64 : 1; /* use value.v_int64 rather than v_int */
82 static JsonScannerConfig json_scanner_config_template =
84 ( " \t\r\n" ) /* cset_skip_characters */,
89 ) /* cset_identifier_first */,
95 ) /* cset_identifier_nth */,
96 ( "//\n" ) /* cpair_comment_single */,
97 TRUE /* case_sensitive */,
98 TRUE /* skip_comment_multi */,
99 TRUE /* skip_comment_single */,
100 FALSE /* scan_comment_multi */,
101 TRUE /* scan_identifier */,
102 TRUE /* scan_identifier_1char */,
103 FALSE /* scan_identifier_NULL */,
104 TRUE /* scan_symbols */,
105 TRUE /* scan_binary */,
106 TRUE /* scan_octal */,
107 TRUE /* scan_float */,
109 TRUE /* scan_hex_dollar */,
110 TRUE /* scan_string_sq */,
111 TRUE /* scan_string_dq */,
112 TRUE /* numbers_2_int */,
113 FALSE /* int_2_float */,
114 FALSE /* identifier_2_string */,
115 TRUE /* char_2_token */,
116 TRUE /* symbol_2_token */,
117 FALSE /* scope_0_fallback */,
118 TRUE /* store_int64 */
121 /* --- defines --- */
122 #define to_lower(c) ( \
124 ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
125 ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
126 ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
131 #define READ_BUFFER_SIZE (4000)
133 /* --- typedefs --- */
134 typedef struct _JsonScannerKey JsonScannerKey;
136 struct _JsonScannerKey
143 /* --- prototypes --- */
144 static gboolean json_scanner_key_equal (gconstpointer v1,
146 static guint json_scanner_key_hash (gconstpointer v);
149 JsonScannerKey *json_scanner_lookup_internal (JsonScanner *scanner,
151 const gchar *symbol);
152 static void json_scanner_get_token_ll (JsonScanner *scanner,
154 GTokenValue *value_p,
157 static void json_scanner_get_token_i (JsonScanner *scanner,
159 GTokenValue *value_p,
163 static guchar json_scanner_peek_next_char (JsonScanner *scanner);
164 static guchar json_scanner_get_char (JsonScanner *scanner,
167 static gunichar json_scanner_get_unichar (JsonScanner *scanner,
171 /* --- functions --- */
173 json_scanner_char_2_num (guchar c,
176 if (c >= '0' && c <= '9')
178 else if (c >= 'A' && c <= 'Z')
180 else if (c >= 'a' && c <= 'z')
192 json_scanner_new (void)
194 JsonScanner *scanner;
195 JsonScannerConfig *config_templ;
197 config_templ = &json_scanner_config_template;
199 scanner = g_new0 (JsonScanner, 1);
201 scanner->user_data = NULL;
202 scanner->max_parse_errors = 1;
203 scanner->parse_errors = 0;
204 scanner->input_name = NULL;
205 g_datalist_init (&scanner->qdata);
207 scanner->config = g_new0 (JsonScannerConfig, 1);
209 scanner->config->case_sensitive = config_templ->case_sensitive;
210 scanner->config->cset_skip_characters = config_templ->cset_skip_characters;
211 if (!scanner->config->cset_skip_characters)
212 scanner->config->cset_skip_characters = "";
213 scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
214 scanner->config->cset_identifier_nth = config_templ->cset_identifier_nth;
215 scanner->config->cpair_comment_single = config_templ->cpair_comment_single;
216 scanner->config->skip_comment_multi = config_templ->skip_comment_multi;
217 scanner->config->skip_comment_single = config_templ->skip_comment_single;
218 scanner->config->scan_comment_multi = config_templ->scan_comment_multi;
219 scanner->config->scan_identifier = config_templ->scan_identifier;
220 scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
221 scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL;
222 scanner->config->scan_symbols = config_templ->scan_symbols;
223 scanner->config->scan_binary = config_templ->scan_binary;
224 scanner->config->scan_octal = config_templ->scan_octal;
225 scanner->config->scan_float = config_templ->scan_float;
226 scanner->config->scan_hex = config_templ->scan_hex;
227 scanner->config->scan_hex_dollar = config_templ->scan_hex_dollar;
228 scanner->config->scan_string_sq = config_templ->scan_string_sq;
229 scanner->config->scan_string_dq = config_templ->scan_string_dq;
230 scanner->config->numbers_2_int = config_templ->numbers_2_int;
231 scanner->config->int_2_float = config_templ->int_2_float;
232 scanner->config->identifier_2_string = config_templ->identifier_2_string;
233 scanner->config->char_2_token = config_templ->char_2_token;
234 scanner->config->symbol_2_token = config_templ->symbol_2_token;
235 scanner->config->scope_0_fallback = config_templ->scope_0_fallback;
236 scanner->config->store_int64 = config_templ->store_int64;
238 scanner->token = G_TOKEN_NONE;
239 scanner->value.v_int64 = 0;
241 scanner->position = 0;
243 scanner->next_token = G_TOKEN_NONE;
244 scanner->next_value.v_int64 = 0;
245 scanner->next_line = 1;
246 scanner->next_position = 0;
248 scanner->symbol_table = g_hash_table_new (json_scanner_key_hash,
249 json_scanner_key_equal);
250 scanner->text = NULL;
251 scanner->text_end = NULL;
252 scanner->buffer = NULL;
253 scanner->scope_id = 0;
259 json_scanner_free_value (GTokenType *token_p,
260 GTokenValue *value_p)
265 case G_TOKEN_IDENTIFIER:
266 case G_TOKEN_IDENTIFIER_NULL:
267 case G_TOKEN_COMMENT_SINGLE:
268 case G_TOKEN_COMMENT_MULTI:
269 g_free (value_p->v_string);
276 *token_p = G_TOKEN_NONE;
280 json_scanner_destroy_symbol_table_entry (gpointer _key,
284 JsonScannerKey *key = _key;
286 g_free (key->symbol);
287 g_slice_free (JsonScannerKey, key);
291 json_scanner_destroy (JsonScanner *scanner)
293 g_return_if_fail (scanner != NULL);
295 g_datalist_clear (&scanner->qdata);
296 g_hash_table_foreach (scanner->symbol_table,
297 json_scanner_destroy_symbol_table_entry,
299 g_hash_table_destroy (scanner->symbol_table);
300 json_scanner_free_value (&scanner->token, &scanner->value);
301 json_scanner_free_value (&scanner->next_token, &scanner->next_value);
302 g_free (scanner->config);
303 g_free (scanner->buffer);
308 json_scanner_error (JsonScanner *scanner,
312 g_return_if_fail (scanner != NULL);
313 g_return_if_fail (format != NULL);
315 scanner->parse_errors++;
317 if (scanner->msg_handler)
322 va_start (args, format);
323 string = g_strdup_vprintf (format, args);
326 scanner->msg_handler (scanner, string);
333 json_scanner_key_equal (gconstpointer v1,
336 const JsonScannerKey *key1 = v1;
337 const JsonScannerKey *key2 = v2;
339 return (key1->scope_id == key2->scope_id) &&
340 (strcmp (key1->symbol, key2->symbol) == 0);
344 json_scanner_key_hash (gconstpointer v)
346 const JsonScannerKey *key = v;
351 for (c = key->symbol; *c; c++)
352 h = (h << 5) - h + *c;
357 static inline JsonScannerKey *
358 json_scanner_lookup_internal (JsonScanner *scanner,
362 JsonScannerKey *key_p;
365 key.scope_id = scope_id;
367 if (!scanner->config->case_sensitive)
372 key.symbol = g_new (gchar, strlen (symbol) + 1);
373 for (d = key.symbol, c = symbol; *c; c++, d++)
376 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
381 key.symbol = (gchar*) symbol;
382 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
389 json_scanner_scope_add_symbol (JsonScanner *scanner,
396 g_return_if_fail (scanner != NULL);
397 g_return_if_fail (symbol != NULL);
399 key = json_scanner_lookup_internal (scanner, scope_id, symbol);
402 key = g_slice_new (JsonScannerKey);
403 key->scope_id = scope_id;
404 key->symbol = g_strdup (symbol);
406 if (!scanner->config->case_sensitive)
418 g_hash_table_insert (scanner->symbol_table, key, key);
425 json_scanner_peek_next_token (JsonScanner *scanner)
427 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
429 if (scanner->next_token == G_TOKEN_NONE)
431 scanner->next_line = scanner->line;
432 scanner->next_position = scanner->position;
433 json_scanner_get_token_i (scanner,
434 &scanner->next_token,
435 &scanner->next_value,
437 &scanner->next_position);
440 return scanner->next_token;
444 json_scanner_get_next_token (JsonScanner *scanner)
446 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
448 if (scanner->next_token != G_TOKEN_NONE)
450 json_scanner_free_value (&scanner->token, &scanner->value);
452 scanner->token = scanner->next_token;
453 scanner->value = scanner->next_value;
454 scanner->line = scanner->next_line;
455 scanner->position = scanner->next_position;
456 scanner->next_token = G_TOKEN_NONE;
459 json_scanner_get_token_i (scanner,
465 return scanner->token;
469 json_scanner_input_text (JsonScanner *scanner,
473 g_return_if_fail (scanner != NULL);
475 g_return_if_fail (text != NULL);
479 scanner->token = G_TOKEN_NONE;
480 scanner->value.v_int64 = 0;
482 scanner->position = 0;
483 scanner->next_token = G_TOKEN_NONE;
485 scanner->text = text;
486 scanner->text_end = text + text_len;
490 g_free (scanner->buffer);
491 scanner->buffer = NULL;
496 json_scanner_peek_next_char (JsonScanner *scanner)
498 if (scanner->text < scanner->text_end)
499 return *scanner->text;
505 json_scanner_get_char (JsonScanner *scanner,
511 if (scanner->text < scanner->text_end)
512 fchar = *(scanner->text++);
529 #define is_hex_digit(c) (((c) >= '0' && (c) <= '9') || \
530 ((c) >= 'a' && (c) <= 'f') || \
531 ((c) >= 'A' && (c) <= 'F'))
532 #define to_hex_digit(c) (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9)
535 json_scanner_get_unichar (JsonScanner *scanner,
544 for (i = 0; i < 4; i++)
546 ch = json_scanner_get_char (scanner, line_p, position_p);
548 if (is_hex_digit (ch))
549 uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4));
554 g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE);
560 * decode_utf16_surrogate_pair:
561 * @units: (array length=2): a pair of UTF-16 code points
563 * Decodes a surrogate pair of UTF-16 code points into the equivalent
564 * Unicode code point.
566 * Returns: the Unicode code point equivalent to the surrogate pair
568 static inline gunichar
569 decode_utf16_surrogate_pair (const gunichar units[2])
573 g_assert (0xd800 <= units[0] && units[0] <= 0xdbff);
574 g_assert (0xdc00 <= units[1] && units[1] <= 0xdfff);
577 ucs += (units[0] & 0x3ff) << 10;
578 ucs += (units[1] & 0x3ff);
584 json_scanner_unexp_token (JsonScanner *scanner,
585 GTokenType expected_token,
586 const gchar *identifier_spec,
587 const gchar *symbol_spec,
588 const gchar *symbol_name,
589 const gchar *message)
592 guint token_string_len;
593 gchar *expected_string;
594 guint expected_string_len;
595 gchar *message_prefix;
596 gboolean print_unexp;
598 g_return_if_fail (scanner != NULL);
600 if (!identifier_spec)
601 identifier_spec = "identifier";
603 symbol_spec = "symbol";
605 token_string_len = 56;
606 token_string = g_new (gchar, token_string_len + 1);
607 expected_string_len = 64;
608 expected_string = g_new (gchar, expected_string_len + 1);
611 switch (scanner->token)
614 g_snprintf (token_string, token_string_len, "end of file");
618 if (scanner->token >= 1 && scanner->token <= 255)
620 if ((scanner->token >= ' ' && scanner->token <= '~') ||
621 strchr (scanner->config->cset_identifier_first, scanner->token) ||
622 strchr (scanner->config->cset_identifier_nth, scanner->token))
623 g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
625 g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
628 else if (!scanner->config->symbol_2_token)
630 g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
635 if (expected_token == G_TOKEN_SYMBOL ||
636 (scanner->config->symbol_2_token &&
637 expected_token > G_TOKEN_LAST))
640 g_snprintf (token_string, token_string_len,
642 print_unexp ? "" : "invalid ",
646 g_snprintf (token_string, token_string_len,
648 print_unexp ? "" : "invalid ",
654 expected_token = G_TOKEN_NONE;
655 switch (scanner->value.v_error)
657 case G_ERR_UNEXP_EOF:
658 g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
661 case G_ERR_UNEXP_EOF_IN_STRING:
662 g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
665 case G_ERR_UNEXP_EOF_IN_COMMENT:
666 g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
669 case G_ERR_NON_DIGIT_IN_CONST:
670 g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
673 case G_ERR_FLOAT_RADIX:
674 g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
677 case G_ERR_FLOAT_MALFORMED:
678 g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
681 case G_ERR_DIGIT_RADIX:
682 g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
687 g_snprintf (token_string, token_string_len, "scanner: unknown error");
693 g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
696 case G_TOKEN_IDENTIFIER:
697 case G_TOKEN_IDENTIFIER_NULL:
698 if (expected_token == G_TOKEN_IDENTIFIER ||
699 expected_token == G_TOKEN_IDENTIFIER_NULL)
701 g_snprintf (token_string, token_string_len,
703 print_unexp ? "" : "invalid ",
705 scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
712 if (scanner->config->store_int64)
713 g_snprintf (token_string, token_string_len, "number `%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
715 g_snprintf (token_string, token_string_len, "number `%lu'", scanner->value.v_int);
719 g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
723 if (expected_token == G_TOKEN_STRING)
725 g_snprintf (token_string, token_string_len,
726 "%s%sstring constant \"%s\"",
727 print_unexp ? "" : "invalid ",
728 scanner->value.v_string[0] == 0 ? "empty " : "",
729 scanner->value.v_string);
730 token_string[token_string_len - 2] = '"';
731 token_string[token_string_len - 1] = 0;
734 case G_TOKEN_COMMENT_SINGLE:
735 case G_TOKEN_COMMENT_MULTI:
736 g_snprintf (token_string, token_string_len, "comment");
740 /* somehow the user's parsing code is screwed, there isn't much
741 * we can do about it.
742 * Note, a common case to trigger this is
743 * json_scanner_peek_next_token(); json_scanner_unexp_token();
744 * without an intermediate json_scanner_get_next_token().
746 g_assert_not_reached ();
751 switch (expected_token)
756 g_snprintf (expected_string, expected_string_len, "end of file");
759 if (expected_token >= 1 && expected_token <= 255)
761 if ((expected_token >= ' ' && expected_token <= '~') ||
762 strchr (scanner->config->cset_identifier_first, expected_token) ||
763 strchr (scanner->config->cset_identifier_nth, expected_token))
764 g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
766 g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
769 else if (!scanner->config->symbol_2_token)
771 g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
776 need_valid = (scanner->token == G_TOKEN_SYMBOL ||
777 (scanner->config->symbol_2_token &&
778 scanner->token > G_TOKEN_LAST));
779 g_snprintf (expected_string, expected_string_len,
781 need_valid ? "valid " : "",
783 /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
786 g_snprintf (expected_string, expected_string_len, "%scharacter",
787 scanner->token == G_TOKEN_CHAR ? "valid " : "");
791 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
792 scanner->token == expected_token ? "valid " : "", tstring);
796 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
797 scanner->token == expected_token ? "valid " : "", tstring);
801 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
802 scanner->token == expected_token ? "valid " : "", tstring);
805 tstring = "hexadecimal";
806 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
807 scanner->token == expected_token ? "valid " : "", tstring);
811 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
812 scanner->token == expected_token ? "valid " : "", tstring);
815 g_snprintf (expected_string,
818 scanner->token == G_TOKEN_STRING ? "valid " : "");
820 case G_TOKEN_IDENTIFIER:
821 case G_TOKEN_IDENTIFIER_NULL:
822 need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
823 scanner->token == G_TOKEN_IDENTIFIER);
824 g_snprintf (expected_string,
827 need_valid ? "valid " : "",
830 case G_TOKEN_COMMENT_SINGLE:
831 tstring = "single-line";
832 g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
833 scanner->token == expected_token ? "valid " : "", tstring);
835 case G_TOKEN_COMMENT_MULTI:
836 tstring = "multi-line";
837 g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
838 scanner->token == expected_token ? "valid " : "", tstring);
842 /* this is handled upon printout */
846 if (message && message[0] != 0)
847 message_prefix = " - ";
853 if (expected_token == G_TOKEN_ERROR)
855 json_scanner_error (scanner,
856 "failure around %s%s%s",
861 else if (expected_token == G_TOKEN_NONE)
864 json_scanner_error (scanner,
870 json_scanner_error (scanner,
879 json_scanner_error (scanner,
880 "unexpected %s, expected %s%s%s",
886 json_scanner_error (scanner,
887 "%s, expected %s%s%s",
894 g_free (token_string);
895 g_free (expected_string);
899 json_scanner_get_token_i (JsonScanner *scanner,
901 GTokenValue *value_p,
907 json_scanner_free_value (token_p, value_p);
908 json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
910 while (((*token_p > 0 && *token_p < 256) &&
911 strchr (scanner->config->cset_skip_characters, *token_p)) ||
912 (*token_p == G_TOKEN_CHAR &&
913 strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
914 (*token_p == G_TOKEN_COMMENT_MULTI &&
915 scanner->config->skip_comment_multi) ||
916 (*token_p == G_TOKEN_COMMENT_SINGLE &&
917 scanner->config->skip_comment_single));
921 case G_TOKEN_IDENTIFIER:
922 if (scanner->config->identifier_2_string)
923 *token_p = G_TOKEN_STRING;
927 if (scanner->config->symbol_2_token)
928 *token_p = (GTokenType) value_p->v_symbol;
934 if (scanner->config->numbers_2_int)
935 *token_p = G_TOKEN_INT;
942 if (*token_p == G_TOKEN_INT &&
943 scanner->config->int_2_float)
945 *token_p = G_TOKEN_FLOAT;
946 if (scanner->config->store_int64)
949 /* work around error C2520, see gvaluetransform.c */
950 value_p->v_float = (__int64)value_p->v_int64;
952 value_p->v_float = value_p->v_int64;
956 value_p->v_float = value_p->v_int;
963 json_scanner_get_token_ll (JsonScanner *scanner,
965 GTokenValue *value_p,
969 JsonScannerConfig *config;
971 gboolean in_comment_multi;
972 gboolean in_comment_single;
973 gboolean in_string_sq;
974 gboolean in_string_dq;
979 config = scanner->config;
980 (*value_p).v_int64 = 0;
982 if (scanner->text >= scanner->text_end ||
983 scanner->token == G_TOKEN_EOF)
985 *token_p = G_TOKEN_EOF;
989 in_comment_multi = FALSE;
990 in_comment_single = FALSE;
991 in_string_sq = FALSE;
992 in_string_dq = FALSE;
995 do /* while (ch != 0) */
997 gboolean dotted_float = FALSE;
999 ch = json_scanner_get_char (scanner, line_p, position_p);
1002 token = G_TOKEN_NONE;
1004 /* this is *evil*, but needed ;(
1005 * we first check for identifier first character, because it
1006 * might interfere with other key chars like slashes or numbers
1008 if (config->scan_identifier &&
1009 ch && strchr (config->cset_identifier_first, ch))
1010 goto identifier_precedence;
1015 token = G_TOKEN_EOF;
1021 if (!config->scan_comment_multi ||
1022 json_scanner_peek_next_char (scanner) != '*')
1024 json_scanner_get_char (scanner, line_p, position_p);
1025 token = G_TOKEN_COMMENT_MULTI;
1026 in_comment_multi = TRUE;
1027 gstring = g_string_new (NULL);
1028 while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1030 if (ch == '*' && json_scanner_peek_next_char (scanner) == '/')
1032 json_scanner_get_char (scanner, line_p, position_p);
1033 in_comment_multi = FALSE;
1037 gstring = g_string_append_c (gstring, ch);
1043 if (!config->scan_string_sq)
1045 token = G_TOKEN_STRING;
1046 in_string_sq = TRUE;
1047 gstring = g_string_new (NULL);
1048 while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1052 in_string_sq = FALSE;
1056 gstring = g_string_append_c (gstring, ch);
1062 if (!config->scan_string_dq)
1064 token = G_TOKEN_STRING;
1065 in_string_dq = TRUE;
1066 gstring = g_string_new (NULL);
1067 while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1071 in_string_dq = FALSE;
1078 ch = json_scanner_get_char (scanner, line_p, position_p);
1088 gstring = g_string_append_c (gstring, '\\');
1092 gstring = g_string_append_c (gstring, '\n');
1096 gstring = g_string_append_c (gstring, '\t');
1100 gstring = g_string_append_c (gstring, '\r');
1104 gstring = g_string_append_c (gstring, '\b');
1108 gstring = g_string_append_c (gstring, '\f');
1112 fchar = json_scanner_peek_next_char (scanner);
1113 if (is_hex_digit (fchar))
1117 ucs = json_scanner_get_unichar (scanner, line_p, position_p);
1119 /* resolve UTF-16 surrogates for Unicode characters not in the BMP,
1120 * as per ECMA 404, § 9, "String"
1122 if (g_unichar_type (ucs) == G_UNICODE_SURROGATE)
1124 /* read next surrogate */
1125 if ('\\' == json_scanner_get_char (scanner, line_p, position_p) &&
1126 'u' == json_scanner_get_char (scanner, line_p, position_p))
1131 units[1] = json_scanner_get_unichar (scanner, line_p, position_p);
1133 ucs = decode_utf16_surrogate_pair (units);
1134 g_assert (g_unichar_validate (ucs));
1138 gstring = g_string_append_unichar (gstring, ucs);
1151 fchar = json_scanner_peek_next_char (scanner);
1152 if (fchar >= '0' && fchar <= '7')
1154 ch = json_scanner_get_char (scanner, line_p, position_p);
1155 i = i * 8 + ch - '0';
1156 fchar = json_scanner_peek_next_char (scanner);
1157 if (fchar >= '0' && fchar <= '7')
1159 ch = json_scanner_get_char (scanner, line_p, position_p);
1160 i = i * 8 + ch - '0';
1163 gstring = g_string_append_c (gstring, i);
1167 gstring = g_string_append_c (gstring, ch);
1172 gstring = g_string_append_c (gstring, ch);
1179 if (!config->scan_float)
1181 token = G_TOKEN_FLOAT;
1182 dotted_float = TRUE;
1183 ch = json_scanner_get_char (scanner, line_p, position_p);
1184 goto number_parsing;
1187 if (!config->scan_hex_dollar)
1189 token = G_TOKEN_HEX;
1190 ch = json_scanner_get_char (scanner, line_p, position_p);
1191 goto number_parsing;
1194 if (config->scan_octal)
1195 token = G_TOKEN_OCTAL;
1197 token = G_TOKEN_INT;
1198 ch = json_scanner_peek_next_char (scanner);
1199 if (config->scan_hex && (ch == 'x' || ch == 'X'))
1201 token = G_TOKEN_HEX;
1202 json_scanner_get_char (scanner, line_p, position_p);
1203 ch = json_scanner_get_char (scanner, line_p, position_p);
1206 token = G_TOKEN_ERROR;
1207 value.v_error = G_ERR_UNEXP_EOF;
1211 if (json_scanner_char_2_num (ch, 16) < 0)
1213 token = G_TOKEN_ERROR;
1214 value.v_error = G_ERR_DIGIT_RADIX;
1219 else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1221 token = G_TOKEN_BINARY;
1222 json_scanner_get_char (scanner, line_p, position_p);
1223 ch = json_scanner_get_char (scanner, line_p, position_p);
1226 token = G_TOKEN_ERROR;
1227 value.v_error = G_ERR_UNEXP_EOF;
1231 if (json_scanner_char_2_num (ch, 10) < 0)
1233 token = G_TOKEN_ERROR;
1234 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1253 gboolean in_number = TRUE;
1256 if (token == G_TOKEN_NONE)
1257 token = G_TOKEN_INT;
1259 gstring = g_string_new (dotted_float ? "0." : "");
1260 gstring = g_string_append_c (gstring, ch);
1262 do /* while (in_number) */
1266 is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1268 ch = json_scanner_peek_next_char (scanner);
1270 if (json_scanner_char_2_num (ch, 36) >= 0 ||
1271 (config->scan_float && ch == '.') ||
1272 (is_E && (ch == '+' || ch == '-')))
1274 ch = json_scanner_get_char (scanner, line_p, position_p);
1279 if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1281 value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1282 token = G_TOKEN_ERROR;
1287 token = G_TOKEN_FLOAT;
1288 gstring = g_string_append_c (gstring, ch);
1302 gstring = g_string_append_c (gstring, ch);
1307 if (token != G_TOKEN_FLOAT)
1309 token = G_TOKEN_ERROR;
1310 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1314 gstring = g_string_append_c (gstring, ch);
1319 if ((token != G_TOKEN_HEX && !config->scan_float) ||
1320 (token != G_TOKEN_HEX &&
1321 token != G_TOKEN_OCTAL &&
1322 token != G_TOKEN_FLOAT &&
1323 token != G_TOKEN_INT))
1325 token = G_TOKEN_ERROR;
1326 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1331 if (token != G_TOKEN_HEX)
1332 token = G_TOKEN_FLOAT;
1333 gstring = g_string_append_c (gstring, ch);
1338 if (token != G_TOKEN_HEX)
1340 token = G_TOKEN_ERROR;
1341 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1345 gstring = g_string_append_c (gstring, ch);
1355 if (token == G_TOKEN_FLOAT)
1356 value.v_float = g_strtod (gstring->str, &endptr);
1362 case G_TOKEN_BINARY:
1363 ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
1366 ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
1369 ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
1372 ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
1376 if (scanner->config->store_int64)
1377 value.v_int64 = ui64;
1381 if (endptr && *endptr)
1383 token = G_TOKEN_ERROR;
1384 if (*endptr == 'e' || *endptr == 'E')
1385 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1387 value.v_error = G_ERR_DIGIT_RADIX;
1389 g_string_free (gstring, TRUE);
1392 } /* number_parsing:... */
1398 if (config->cpair_comment_single &&
1399 ch == config->cpair_comment_single[0])
1401 token = G_TOKEN_COMMENT_SINGLE;
1402 in_comment_single = TRUE;
1403 gstring = g_string_new (NULL);
1404 ch = json_scanner_get_char (scanner, line_p, position_p);
1407 if (ch == config->cpair_comment_single[1])
1409 in_comment_single = FALSE;
1414 gstring = g_string_append_c (gstring, ch);
1415 ch = json_scanner_get_char (scanner, line_p, position_p);
1417 /* ignore a missing newline at EOF for single line comments */
1418 if (in_comment_single &&
1419 config->cpair_comment_single[1] == '\n')
1420 in_comment_single = FALSE;
1422 else if (config->scan_identifier && ch &&
1423 strchr (config->cset_identifier_first, ch))
1425 identifier_precedence:
1427 if (config->cset_identifier_nth && ch &&
1428 strchr (config->cset_identifier_nth,
1429 json_scanner_peek_next_char (scanner)))
1431 token = G_TOKEN_IDENTIFIER;
1432 gstring = g_string_new (NULL);
1433 gstring = g_string_append_c (gstring, ch);
1436 ch = json_scanner_get_char (scanner, line_p, position_p);
1437 gstring = g_string_append_c (gstring, ch);
1438 ch = json_scanner_peek_next_char (scanner);
1440 while (ch && strchr (config->cset_identifier_nth, ch));
1443 else if (config->scan_identifier_1char)
1445 token = G_TOKEN_IDENTIFIER;
1446 value.v_identifier = g_new0 (gchar, 2);
1447 value.v_identifier[0] = ch;
1453 if (config->char_2_token)
1457 token = G_TOKEN_CHAR;
1462 } /* default_case:... */
1465 g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
1469 if (in_comment_multi || in_comment_single ||
1470 in_string_sq || in_string_dq)
1472 token = G_TOKEN_ERROR;
1475 g_string_free (gstring, TRUE);
1479 if (in_comment_multi || in_comment_single)
1480 value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1481 else /* (in_string_sq || in_string_dq) */
1482 value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1487 value.v_string = g_string_free (gstring, FALSE);
1491 if (token == G_TOKEN_IDENTIFIER)
1493 if (config->scan_symbols)
1495 JsonScannerKey *key;
1498 scope_id = scanner->scope_id;
1499 key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
1500 if (!key && scope_id && scanner->config->scope_0_fallback)
1501 key = json_scanner_lookup_internal (scanner, 0, value.v_identifier);
1505 g_free (value.v_identifier);
1506 token = G_TOKEN_SYMBOL;
1507 value.v_symbol = key->value;
1511 if (token == G_TOKEN_IDENTIFIER &&
1512 config->scan_identifier_NULL &&
1513 strlen (value.v_identifier) == 4)
1515 gchar *null_upper = "NULL";
1516 gchar *null_lower = "null";
1518 if (scanner->config->case_sensitive)
1520 if (value.v_identifier[0] == null_upper[0] &&
1521 value.v_identifier[1] == null_upper[1] &&
1522 value.v_identifier[2] == null_upper[2] &&
1523 value.v_identifier[3] == null_upper[3])
1524 token = G_TOKEN_IDENTIFIER_NULL;
1528 if ((value.v_identifier[0] == null_upper[0] ||
1529 value.v_identifier[0] == null_lower[0]) &&
1530 (value.v_identifier[1] == null_upper[1] ||
1531 value.v_identifier[1] == null_lower[1]) &&
1532 (value.v_identifier[2] == null_upper[2] ||
1533 value.v_identifier[2] == null_lower[2]) &&
1534 (value.v_identifier[3] == null_upper[3] ||
1535 value.v_identifier[3] == null_lower[3]))
1536 token = G_TOKEN_IDENTIFIER_NULL;