1 /* json-scanner.c: Tokenizer for JSON
2 * Copyright (C) 2008 OpenedHand
4 * Based on JsonScanner: Flexible lexical scanner for general purpose.
5 * Copyright (C) 1997, 1998 Tim Janik
7 * Modified by Emmanuele Bassi <ebassi@openedhand.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the
21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 * Boston, MA 02111-1307, USA.
39 #include <glib/gprintf.h>
41 #include "json-scanner.h"
44 #include <io.h> /* For _read() */
47 struct _JsonScannerConfig
51 gchar *cset_skip_characters; /* default: " \t\n" */
52 gchar *cset_identifier_first;
53 gchar *cset_identifier_nth;
54 gchar *cpair_comment_single; /* default: "#\n" */
56 /* Should symbol lookup work case sensitive? */
57 guint case_sensitive : 1;
59 /* Boolean values to be adjusted "on the fly"
60 * to configure scanning behaviour.
62 guint skip_comment_multi : 1; /* C like comment */
63 guint skip_comment_single : 1; /* single line comment */
64 guint scan_comment_multi : 1; /* scan multi line comments? */
65 guint scan_identifier : 1;
66 guint scan_identifier_1char : 1;
67 guint scan_identifier_NULL : 1;
68 guint scan_symbols : 1;
69 guint scan_binary : 1;
72 guint scan_hex : 1; /* `0x0ff0' */
73 guint scan_hex_dollar : 1; /* `$0ff0' */
74 guint scan_string_sq : 1; /* string: 'anything' */
75 guint scan_string_dq : 1; /* string: "\\-escapes!\n" */
76 guint numbers_2_int : 1; /* bin, octal, hex => int */
77 guint int_2_float : 1; /* int => G_TOKEN_FLOAT? */
78 guint identifier_2_string : 1;
79 guint char_2_token : 1; /* return G_TOKEN_CHAR? */
80 guint symbol_2_token : 1;
81 guint scope_0_fallback : 1; /* try scope 0 on lookups? */
82 guint store_int64 : 1; /* use value.v_int64 rather than v_int */
86 static JsonScannerConfig json_scanner_config_template =
88 ( " \t\r\n" ) /* cset_skip_characters */,
93 ) /* cset_identifier_first */,
99 ) /* cset_identifier_nth */,
100 ( "//\n" ) /* cpair_comment_single */,
101 TRUE /* case_sensitive */,
102 TRUE /* skip_comment_multi */,
103 TRUE /* skip_comment_single */,
104 FALSE /* scan_comment_multi */,
105 TRUE /* scan_identifier */,
106 TRUE /* scan_identifier_1char */,
107 FALSE /* scan_identifier_NULL */,
108 TRUE /* scan_symbols */,
109 TRUE /* scan_binary */,
110 TRUE /* scan_octal */,
111 TRUE /* scan_float */,
113 TRUE /* scan_hex_dollar */,
114 TRUE /* scan_string_sq */,
115 TRUE /* scan_string_dq */,
116 TRUE /* numbers_2_int */,
117 FALSE /* int_2_float */,
118 FALSE /* identifier_2_string */,
119 TRUE /* char_2_token */,
120 TRUE /* symbol_2_token */,
121 FALSE /* scope_0_fallback */,
122 TRUE /* store_int64 */
125 /* --- defines --- */
126 #define to_lower(c) ( \
128 ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \
129 ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \
130 ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \
135 #define READ_BUFFER_SIZE (4000)
137 static const gchar json_symbol_names[] =
148 { 0, JSON_TOKEN_TRUE },
149 { 5, JSON_TOKEN_FALSE },
150 { 11, JSON_TOKEN_NULL },
151 { 16, JSON_TOKEN_VAR }
154 static const guint n_json_symbols = G_N_ELEMENTS (json_symbols);
156 /* --- typedefs --- */
157 typedef struct _JsonScannerKey JsonScannerKey;
159 struct _JsonScannerKey
166 /* --- prototypes --- */
167 static gboolean json_scanner_key_equal (gconstpointer v1,
169 static guint json_scanner_key_hash (gconstpointer v);
172 JsonScannerKey *json_scanner_lookup_internal (JsonScanner *scanner,
174 const gchar *symbol);
175 static void json_scanner_get_token_ll (JsonScanner *scanner,
177 GTokenValue *value_p,
180 static void json_scanner_get_token_i (JsonScanner *scanner,
182 GTokenValue *value_p,
186 static guchar json_scanner_peek_next_char (JsonScanner *scanner);
187 static guchar json_scanner_get_char (JsonScanner *scanner,
190 static gunichar json_scanner_get_unichar (JsonScanner *scanner,
193 static void json_scanner_msg_handler (JsonScanner *scanner,
197 /* --- functions --- */
199 json_scanner_char_2_num (guchar c,
202 if (c >= '0' && c <= '9')
204 else if (c >= 'A' && c <= 'Z')
206 else if (c >= 'a' && c <= 'z')
218 json_scanner_new (void)
220 JsonScanner *scanner;
221 JsonScannerConfig *config_templ;
223 config_templ = &json_scanner_config_template;
225 scanner = g_new0 (JsonScanner, 1);
227 scanner->user_data = NULL;
228 scanner->max_parse_errors = 1;
229 scanner->parse_errors = 0;
230 scanner->input_name = NULL;
231 g_datalist_init (&scanner->qdata);
233 scanner->config = g_new0 (JsonScannerConfig, 1);
235 scanner->config->case_sensitive = config_templ->case_sensitive;
236 scanner->config->cset_skip_characters = config_templ->cset_skip_characters;
237 if (!scanner->config->cset_skip_characters)
238 scanner->config->cset_skip_characters = "";
239 scanner->config->cset_identifier_first = config_templ->cset_identifier_first;
240 scanner->config->cset_identifier_nth = config_templ->cset_identifier_nth;
241 scanner->config->cpair_comment_single = config_templ->cpair_comment_single;
242 scanner->config->skip_comment_multi = config_templ->skip_comment_multi;
243 scanner->config->skip_comment_single = config_templ->skip_comment_single;
244 scanner->config->scan_comment_multi = config_templ->scan_comment_multi;
245 scanner->config->scan_identifier = config_templ->scan_identifier;
246 scanner->config->scan_identifier_1char = config_templ->scan_identifier_1char;
247 scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL;
248 scanner->config->scan_symbols = config_templ->scan_symbols;
249 scanner->config->scan_binary = config_templ->scan_binary;
250 scanner->config->scan_octal = config_templ->scan_octal;
251 scanner->config->scan_float = config_templ->scan_float;
252 scanner->config->scan_hex = config_templ->scan_hex;
253 scanner->config->scan_hex_dollar = config_templ->scan_hex_dollar;
254 scanner->config->scan_string_sq = config_templ->scan_string_sq;
255 scanner->config->scan_string_dq = config_templ->scan_string_dq;
256 scanner->config->numbers_2_int = config_templ->numbers_2_int;
257 scanner->config->int_2_float = config_templ->int_2_float;
258 scanner->config->identifier_2_string = config_templ->identifier_2_string;
259 scanner->config->char_2_token = config_templ->char_2_token;
260 scanner->config->symbol_2_token = config_templ->symbol_2_token;
261 scanner->config->scope_0_fallback = config_templ->scope_0_fallback;
262 scanner->config->store_int64 = config_templ->store_int64;
264 scanner->token = G_TOKEN_NONE;
265 scanner->value.v_int64 = 0;
267 scanner->position = 0;
269 scanner->next_token = G_TOKEN_NONE;
270 scanner->next_value.v_int64 = 0;
271 scanner->next_line = 1;
272 scanner->next_position = 0;
274 scanner->symbol_table = g_hash_table_new (json_scanner_key_hash,
275 json_scanner_key_equal);
276 scanner->input_fd = -1;
277 scanner->text = NULL;
278 scanner->text_end = NULL;
279 scanner->buffer = NULL;
280 scanner->scope_id = 0;
282 scanner->msg_handler = json_scanner_msg_handler;
288 json_scanner_free_value (GTokenType *token_p,
289 GTokenValue *value_p)
294 case G_TOKEN_IDENTIFIER:
295 case G_TOKEN_IDENTIFIER_NULL:
296 case G_TOKEN_COMMENT_SINGLE:
297 case G_TOKEN_COMMENT_MULTI:
298 g_free (value_p->v_string);
305 *token_p = G_TOKEN_NONE;
309 json_scanner_destroy_symbol_table_entry (gpointer _key,
313 JsonScannerKey *key = _key;
315 g_free (key->symbol);
316 g_slice_free (JsonScannerKey, key);
320 json_scanner_destroy (JsonScanner *scanner)
322 g_return_if_fail (scanner != NULL);
324 g_datalist_clear (&scanner->qdata);
325 g_hash_table_foreach (scanner->symbol_table,
326 json_scanner_destroy_symbol_table_entry,
328 g_hash_table_destroy (scanner->symbol_table);
329 json_scanner_free_value (&scanner->token, &scanner->value);
330 json_scanner_free_value (&scanner->next_token, &scanner->next_value);
331 g_free (scanner->config);
332 g_free (scanner->buffer);
337 json_scanner_msg_handler (JsonScanner *scanner,
341 g_return_if_fail (scanner != NULL);
343 g_fprintf (stderr, "%s:%d: ",
344 scanner->input_name ? scanner->input_name : "<memory>",
347 g_fprintf (stderr, "error: ");
349 g_fprintf (stderr, "%s\n", message);
353 json_scanner_error (JsonScanner *scanner,
357 g_return_if_fail (scanner != NULL);
358 g_return_if_fail (format != NULL);
360 scanner->parse_errors++;
362 if (scanner->msg_handler)
367 va_start (args, format);
368 string = g_strdup_vprintf (format, args);
371 scanner->msg_handler (scanner, string, TRUE);
378 json_scanner_warn (JsonScanner *scanner,
382 g_return_if_fail (scanner != NULL);
383 g_return_if_fail (format != NULL);
385 if (scanner->msg_handler)
390 va_start (args, format);
391 string = g_strdup_vprintf (format, args);
394 scanner->msg_handler (scanner, string, FALSE);
401 json_scanner_key_equal (gconstpointer v1,
404 const JsonScannerKey *key1 = v1;
405 const JsonScannerKey *key2 = v2;
407 return (key1->scope_id == key2->scope_id) &&
408 (strcmp (key1->symbol, key2->symbol) == 0);
412 json_scanner_key_hash (gconstpointer v)
414 const JsonScannerKey *key = v;
419 for (c = key->symbol; *c; c++)
420 h = (h << 5) - h + *c;
425 static inline JsonScannerKey *
426 json_scanner_lookup_internal (JsonScanner *scanner,
430 JsonScannerKey *key_p;
433 key.scope_id = scope_id;
435 if (!scanner->config->case_sensitive)
440 key.symbol = g_new (gchar, strlen (symbol) + 1);
441 for (d = key.symbol, c = symbol; *c; c++, d++)
444 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
449 key.symbol = (gchar*) symbol;
450 key_p = g_hash_table_lookup (scanner->symbol_table, &key);
457 json_scanner_scope_add_symbol (JsonScanner *scanner,
464 g_return_if_fail (scanner != NULL);
465 g_return_if_fail (symbol != NULL);
467 key = json_scanner_lookup_internal (scanner, scope_id, symbol);
470 key = g_slice_new (JsonScannerKey);
471 key->scope_id = scope_id;
472 key->symbol = g_strdup (symbol);
474 if (!scanner->config->case_sensitive)
486 g_hash_table_insert (scanner->symbol_table, key, key);
493 json_scanner_scope_remove_symbol (JsonScanner *scanner,
499 g_return_if_fail (scanner != NULL);
500 g_return_if_fail (symbol != NULL);
502 key = json_scanner_lookup_internal (scanner, scope_id, symbol);
505 g_hash_table_remove (scanner->symbol_table, key);
506 g_free (key->symbol);
507 g_slice_free (JsonScannerKey, key);
512 json_scanner_lookup_symbol (JsonScanner *scanner,
518 g_return_val_if_fail (scanner != NULL, NULL);
523 scope_id = scanner->scope_id;
524 key = json_scanner_lookup_internal (scanner, scope_id, symbol);
525 if (!key && scope_id && scanner->config->scope_0_fallback)
526 key = json_scanner_lookup_internal (scanner, 0, symbol);
535 json_scanner_scope_lookup_symbol (JsonScanner *scanner,
541 g_return_val_if_fail (scanner != NULL, NULL);
546 key = json_scanner_lookup_internal (scanner, scope_id, symbol);
555 json_scanner_set_scope (JsonScanner *scanner,
560 g_return_val_if_fail (scanner != NULL, 0);
562 old_scope_id = scanner->scope_id;
563 scanner->scope_id = scope_id;
575 json_scanner_foreach_internal (gpointer _key,
579 JsonScannerKey *key = _value;
580 ForeachClosure *closure = _user_data;
582 if (key->scope_id == closure->scope_id)
583 closure->func (key->symbol, key->value, closure->data);
587 json_scanner_scope_foreach_symbol (JsonScanner *scanner,
592 ForeachClosure closure;
594 g_return_if_fail (scanner != NULL);
595 g_return_if_fail (func != NULL);
598 closure.data = user_data;
599 closure.scope_id = scope_id;
601 g_hash_table_foreach (scanner->symbol_table,
602 json_scanner_foreach_internal,
607 json_scanner_peek_next_token (JsonScanner *scanner)
609 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
611 if (scanner->next_token == G_TOKEN_NONE)
613 scanner->next_line = scanner->line;
614 scanner->next_position = scanner->position;
615 json_scanner_get_token_i (scanner,
616 &scanner->next_token,
617 &scanner->next_value,
619 &scanner->next_position);
622 return scanner->next_token;
626 json_scanner_get_next_token (JsonScanner *scanner)
628 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
630 if (scanner->next_token != G_TOKEN_NONE)
632 json_scanner_free_value (&scanner->token, &scanner->value);
634 scanner->token = scanner->next_token;
635 scanner->value = scanner->next_value;
636 scanner->line = scanner->next_line;
637 scanner->position = scanner->next_position;
638 scanner->next_token = G_TOKEN_NONE;
641 json_scanner_get_token_i (scanner,
647 return scanner->token;
651 json_scanner_cur_token (JsonScanner *scanner)
653 g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF);
655 return scanner->token;
659 json_scanner_cur_value (JsonScanner *scanner)
665 g_return_val_if_fail (scanner != NULL, v);
667 /* MSC isn't capable of handling return scanner->value; ? */
675 json_scanner_cur_line (JsonScanner *scanner)
677 g_return_val_if_fail (scanner != NULL, 0);
679 return scanner->line;
683 json_scanner_cur_position (JsonScanner *scanner)
685 g_return_val_if_fail (scanner != NULL, 0);
687 return scanner->position;
691 json_scanner_eof (JsonScanner *scanner)
693 g_return_val_if_fail (scanner != NULL, TRUE);
695 return scanner->token == G_TOKEN_EOF || scanner->token == G_TOKEN_ERROR;
699 json_scanner_input_file (JsonScanner *scanner,
702 g_return_if_fail (scanner != NULL);
703 g_return_if_fail (input_fd >= 0);
705 if (scanner->input_fd >= 0)
706 json_scanner_sync_file_offset (scanner);
708 scanner->token = G_TOKEN_NONE;
709 scanner->value.v_int64 = 0;
711 scanner->position = 0;
712 scanner->next_token = G_TOKEN_NONE;
714 scanner->input_fd = input_fd;
715 scanner->text = NULL;
716 scanner->text_end = NULL;
718 if (!scanner->buffer)
719 scanner->buffer = g_new (gchar, READ_BUFFER_SIZE + 1);
723 json_scanner_input_text (JsonScanner *scanner,
727 g_return_if_fail (scanner != NULL);
729 g_return_if_fail (text != NULL);
733 if (scanner->input_fd >= 0)
734 json_scanner_sync_file_offset (scanner);
736 scanner->token = G_TOKEN_NONE;
737 scanner->value.v_int64 = 0;
739 scanner->position = 0;
740 scanner->next_token = G_TOKEN_NONE;
742 scanner->input_fd = -1;
743 scanner->text = text;
744 scanner->text_end = text + text_len;
748 g_free (scanner->buffer);
749 scanner->buffer = NULL;
754 json_scanner_peek_next_char (JsonScanner *scanner)
756 if (scanner->text < scanner->text_end)
757 return *scanner->text;
758 else if (scanner->input_fd >= 0)
763 buffer = scanner->buffer;
766 count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
768 while (count == -1 && (errno == EINTR || errno == EAGAIN));
772 scanner->input_fd = -1;
778 scanner->text = buffer;
779 scanner->text_end = buffer + count;
789 json_scanner_sync_file_offset (JsonScanner *scanner)
791 g_return_if_fail (scanner != NULL);
793 /* for file input, rewind the filedescriptor to the current
794 * buffer position and blow the file read ahead buffer. useful
795 * for third party uses of our file descriptor, which hooks
796 * onto the current scanning position.
799 if (scanner->input_fd >= 0 && scanner->text_end > scanner->text)
803 buffered = scanner->text_end - scanner->text;
804 if (lseek (scanner->input_fd, - buffered, SEEK_CUR) >= 0)
806 /* we succeeded, blow our buffer's contents now */
807 scanner->text = NULL;
808 scanner->text_end = NULL;
816 json_scanner_get_char (JsonScanner *scanner,
822 if (scanner->text < scanner->text_end)
823 fchar = *(scanner->text++);
824 else if (scanner->input_fd >= 0)
829 buffer = scanner->buffer;
832 count = read (scanner->input_fd, buffer, READ_BUFFER_SIZE);
834 while (count == -1 && (errno == EINTR || errno == EAGAIN));
838 scanner->input_fd = -1;
843 scanner->text = buffer + 1;
844 scanner->text_end = buffer + count;
848 json_scanner_sync_file_offset (scanner);
849 scanner->text_end = scanner->text;
850 scanner->input_fd = -1;
870 #define is_hex_digit(c) (((c) >= '0' && (c) <= '9') || \
871 ((c) >= 'a' && (c) <= 'f') || \
872 ((c) >= 'A' && (c) <= 'F'))
873 #define to_hex_digit(c) (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9)
876 json_scanner_get_unichar (JsonScanner *scanner,
885 for (i = 0; i < 4; i++)
887 ch = json_scanner_get_char (scanner, line_p, position_p);
889 if (is_hex_digit (ch))
890 uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4));
895 g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE);
901 json_scanner_unexp_token (JsonScanner *scanner,
902 GTokenType expected_token,
903 const gchar *identifier_spec,
904 const gchar *symbol_spec,
905 const gchar *symbol_name,
906 const gchar *message,
910 guint token_string_len;
911 gchar *expected_string;
912 guint expected_string_len;
913 gchar *message_prefix;
914 gboolean print_unexp;
915 void (*msg_handler) (JsonScanner*, const gchar*, ...);
917 g_return_if_fail (scanner != NULL);
920 msg_handler = json_scanner_error;
922 msg_handler = json_scanner_warn;
924 if (!identifier_spec)
925 identifier_spec = "identifier";
927 symbol_spec = "symbol";
929 token_string_len = 56;
930 token_string = g_new (gchar, token_string_len + 1);
931 expected_string_len = 64;
932 expected_string = g_new (gchar, expected_string_len + 1);
935 switch (scanner->token)
938 g_snprintf (token_string, token_string_len, "end of file");
942 if (scanner->token >= 1 && scanner->token <= 255)
944 if ((scanner->token >= ' ' && scanner->token <= '~') ||
945 strchr (scanner->config->cset_identifier_first, scanner->token) ||
946 strchr (scanner->config->cset_identifier_nth, scanner->token))
947 g_snprintf (token_string, token_string_len, "character `%c'", scanner->token);
949 g_snprintf (token_string, token_string_len, "character `\\%o'", scanner->token);
952 else if (!scanner->config->symbol_2_token)
954 g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token);
959 if (expected_token == G_TOKEN_SYMBOL ||
960 (scanner->config->symbol_2_token &&
961 expected_token > G_TOKEN_LAST))
964 g_snprintf (token_string, token_string_len,
966 print_unexp ? "" : "invalid ",
970 g_snprintf (token_string, token_string_len,
972 print_unexp ? "" : "invalid ",
978 expected_token = G_TOKEN_NONE;
979 switch (scanner->value.v_error)
981 case G_ERR_UNEXP_EOF:
982 g_snprintf (token_string, token_string_len, "scanner: unexpected end of file");
985 case G_ERR_UNEXP_EOF_IN_STRING:
986 g_snprintf (token_string, token_string_len, "scanner: unterminated string constant");
989 case G_ERR_UNEXP_EOF_IN_COMMENT:
990 g_snprintf (token_string, token_string_len, "scanner: unterminated comment");
993 case G_ERR_NON_DIGIT_IN_CONST:
994 g_snprintf (token_string, token_string_len, "scanner: non digit in constant");
997 case G_ERR_FLOAT_RADIX:
998 g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant");
1001 case G_ERR_FLOAT_MALFORMED:
1002 g_snprintf (token_string, token_string_len, "scanner: malformed floating constant");
1005 case G_ERR_DIGIT_RADIX:
1006 g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix");
1011 g_snprintf (token_string, token_string_len, "scanner: unknown error");
1017 g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char);
1020 case G_TOKEN_IDENTIFIER:
1021 case G_TOKEN_IDENTIFIER_NULL:
1022 if (expected_token == G_TOKEN_IDENTIFIER ||
1023 expected_token == G_TOKEN_IDENTIFIER_NULL)
1024 print_unexp = FALSE;
1025 g_snprintf (token_string, token_string_len,
1027 print_unexp ? "" : "invalid ",
1029 scanner->token == G_TOKEN_IDENTIFIER ? scanner->value.v_string : "null");
1032 case G_TOKEN_BINARY:
1036 if (scanner->config->store_int64)
1037 g_snprintf (token_string, token_string_len, "number `%" G_GUINT64_FORMAT "'", scanner->value.v_int64);
1039 g_snprintf (token_string, token_string_len, "number `%lu'", scanner->value.v_int);
1043 g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float);
1046 case G_TOKEN_STRING:
1047 if (expected_token == G_TOKEN_STRING)
1048 print_unexp = FALSE;
1049 g_snprintf (token_string, token_string_len,
1050 "%s%sstring constant \"%s\"",
1051 print_unexp ? "" : "invalid ",
1052 scanner->value.v_string[0] == 0 ? "empty " : "",
1053 scanner->value.v_string);
1054 token_string[token_string_len - 2] = '"';
1055 token_string[token_string_len - 1] = 0;
1058 case G_TOKEN_COMMENT_SINGLE:
1059 case G_TOKEN_COMMENT_MULTI:
1060 g_snprintf (token_string, token_string_len, "comment");
1064 /* somehow the user's parsing code is screwed, there isn't much
1065 * we can do about it.
1066 * Note, a common case to trigger this is
1067 * json_scanner_peek_next_token(); json_scanner_unexp_token();
1068 * without an intermediate json_scanner_get_next_token().
1070 g_assert_not_reached ();
1075 switch (expected_token)
1077 gboolean need_valid;
1080 g_snprintf (expected_string, expected_string_len, "end of file");
1083 if (expected_token >= 1 && expected_token <= 255)
1085 if ((expected_token >= ' ' && expected_token <= '~') ||
1086 strchr (scanner->config->cset_identifier_first, expected_token) ||
1087 strchr (scanner->config->cset_identifier_nth, expected_token))
1088 g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token);
1090 g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token);
1093 else if (!scanner->config->symbol_2_token)
1095 g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token);
1099 case G_TOKEN_SYMBOL:
1100 need_valid = (scanner->token == G_TOKEN_SYMBOL ||
1101 (scanner->config->symbol_2_token &&
1102 scanner->token > G_TOKEN_LAST));
1103 g_snprintf (expected_string, expected_string_len,
1105 need_valid ? "valid " : "",
1107 /* FIXME: should we attempt to lookup the symbol_name for symbol_2_token? */
1110 g_snprintf (expected_string, expected_string_len, "%scharacter",
1111 scanner->token == G_TOKEN_CHAR ? "valid " : "");
1113 case G_TOKEN_BINARY:
1115 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1116 scanner->token == expected_token ? "valid " : "", tstring);
1120 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1121 scanner->token == expected_token ? "valid " : "", tstring);
1124 tstring = "integer";
1125 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1126 scanner->token == expected_token ? "valid " : "", tstring);
1129 tstring = "hexadecimal";
1130 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1131 scanner->token == expected_token ? "valid " : "", tstring);
1135 g_snprintf (expected_string, expected_string_len, "%snumber (%s)",
1136 scanner->token == expected_token ? "valid " : "", tstring);
1138 case G_TOKEN_STRING:
1139 g_snprintf (expected_string,
1140 expected_string_len,
1141 "%sstring constant",
1142 scanner->token == G_TOKEN_STRING ? "valid " : "");
1144 case G_TOKEN_IDENTIFIER:
1145 case G_TOKEN_IDENTIFIER_NULL:
1146 need_valid = (scanner->token == G_TOKEN_IDENTIFIER_NULL ||
1147 scanner->token == G_TOKEN_IDENTIFIER);
1148 g_snprintf (expected_string,
1149 expected_string_len,
1151 need_valid ? "valid " : "",
1154 case G_TOKEN_COMMENT_SINGLE:
1155 tstring = "single-line";
1156 g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1157 scanner->token == expected_token ? "valid " : "", tstring);
1159 case G_TOKEN_COMMENT_MULTI:
1160 tstring = "multi-line";
1161 g_snprintf (expected_string, expected_string_len, "%scomment (%s)",
1162 scanner->token == expected_token ? "valid " : "", tstring);
1166 /* this is handled upon printout */
1170 if (message && message[0] != 0)
1171 message_prefix = " - ";
1174 message_prefix = "";
1177 if (expected_token == G_TOKEN_ERROR)
1179 msg_handler (scanner,
1180 "failure around %s%s%s",
1185 else if (expected_token == G_TOKEN_NONE)
1188 msg_handler (scanner,
1189 "unexpected %s%s%s",
1194 msg_handler (scanner,
1203 msg_handler (scanner,
1204 "unexpected %s, expected %s%s%s",
1210 msg_handler (scanner,
1211 "%s, expected %s%s%s",
1218 g_free (token_string);
1219 g_free (expected_string);
1223 json_scanner_get_token_i (JsonScanner *scanner,
1224 GTokenType *token_p,
1225 GTokenValue *value_p,
1231 json_scanner_free_value (token_p, value_p);
1232 json_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p);
1234 while (((*token_p > 0 && *token_p < 256) &&
1235 strchr (scanner->config->cset_skip_characters, *token_p)) ||
1236 (*token_p == G_TOKEN_CHAR &&
1237 strchr (scanner->config->cset_skip_characters, value_p->v_char)) ||
1238 (*token_p == G_TOKEN_COMMENT_MULTI &&
1239 scanner->config->skip_comment_multi) ||
1240 (*token_p == G_TOKEN_COMMENT_SINGLE &&
1241 scanner->config->skip_comment_single));
1245 case G_TOKEN_IDENTIFIER:
1246 if (scanner->config->identifier_2_string)
1247 *token_p = G_TOKEN_STRING;
1250 case G_TOKEN_SYMBOL:
1251 if (scanner->config->symbol_2_token)
1252 *token_p = (GTokenType) value_p->v_symbol;
1255 case G_TOKEN_BINARY:
1258 if (scanner->config->numbers_2_int)
1259 *token_p = G_TOKEN_INT;
1266 if (*token_p == G_TOKEN_INT &&
1267 scanner->config->int_2_float)
1269 *token_p = G_TOKEN_FLOAT;
1270 if (scanner->config->store_int64)
1273 /* work around error C2520, see gvaluetransform.c */
1274 value_p->v_float = (__int64)value_p->v_int64;
1276 value_p->v_float = value_p->v_int64;
1280 value_p->v_float = value_p->v_int;
1287 json_scanner_get_token_ll (JsonScanner *scanner,
1288 GTokenType *token_p,
1289 GTokenValue *value_p,
1293 JsonScannerConfig *config;
1295 gboolean in_comment_multi;
1296 gboolean in_comment_single;
1297 gboolean in_string_sq;
1298 gboolean in_string_dq;
1303 config = scanner->config;
1304 (*value_p).v_int64 = 0;
1306 if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) ||
1307 scanner->token == G_TOKEN_EOF)
1309 *token_p = G_TOKEN_EOF;
1313 in_comment_multi = FALSE;
1314 in_comment_single = FALSE;
1315 in_string_sq = FALSE;
1316 in_string_dq = FALSE;
1319 do /* while (ch != 0) */
1321 gboolean dotted_float = FALSE;
1323 ch = json_scanner_get_char (scanner, line_p, position_p);
1326 token = G_TOKEN_NONE;
1328 /* this is *evil*, but needed ;(
1329 * we first check for identifier first character, because it
1330 * might interfere with other key chars like slashes or numbers
1332 if (config->scan_identifier &&
1333 ch && strchr (config->cset_identifier_first, ch))
1334 goto identifier_precedence;
1339 token = G_TOKEN_EOF;
1345 if (!config->scan_comment_multi ||
1346 json_scanner_peek_next_char (scanner) != '*')
1348 json_scanner_get_char (scanner, line_p, position_p);
1349 token = G_TOKEN_COMMENT_MULTI;
1350 in_comment_multi = TRUE;
1351 gstring = g_string_new (NULL);
1352 while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1354 if (ch == '*' && json_scanner_peek_next_char (scanner) == '/')
1356 json_scanner_get_char (scanner, line_p, position_p);
1357 in_comment_multi = FALSE;
1361 gstring = g_string_append_c (gstring, ch);
1367 if (!config->scan_string_sq)
1369 token = G_TOKEN_STRING;
1370 in_string_sq = TRUE;
1371 gstring = g_string_new (NULL);
1372 while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1376 in_string_sq = FALSE;
1380 gstring = g_string_append_c (gstring, ch);
1386 if (!config->scan_string_dq)
1388 token = G_TOKEN_STRING;
1389 in_string_dq = TRUE;
1390 gstring = g_string_new (NULL);
1391 while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0)
1395 in_string_dq = FALSE;
1402 ch = json_scanner_get_char (scanner, line_p, position_p);
1412 gstring = g_string_append_c (gstring, '\\');
1416 gstring = g_string_append_c (gstring, '\n');
1420 gstring = g_string_append_c (gstring, '\t');
1424 gstring = g_string_append_c (gstring, '\r');
1428 gstring = g_string_append_c (gstring, '\b');
1432 gstring = g_string_append_c (gstring, '\f');
1436 fchar = json_scanner_peek_next_char (scanner);
1437 if (is_hex_digit (fchar))
1441 ucs = json_scanner_get_unichar (scanner, line_p, position_p);
1443 if (g_unichar_type (ucs) == G_UNICODE_SURROGATE)
1445 /* read next surrogate */
1446 if ('\\' == json_scanner_get_char (scanner, line_p, position_p)
1447 && 'u' == json_scanner_get_char (scanner, line_p, position_p))
1449 gunichar ucs_lo = json_scanner_get_unichar (scanner, line_p, position_p);
1450 g_assert (g_unichar_type (ucs_lo) == G_UNICODE_SURROGATE);
1451 ucs = (((ucs & 0x3ff) << 10) | (ucs_lo & 0x3ff)) + 0x10000;
1455 g_assert (g_unichar_validate (ucs));
1456 gstring = g_string_append_unichar (gstring, ucs);
1469 fchar = json_scanner_peek_next_char (scanner);
1470 if (fchar >= '0' && fchar <= '7')
1472 ch = json_scanner_get_char (scanner, line_p, position_p);
1473 i = i * 8 + ch - '0';
1474 fchar = json_scanner_peek_next_char (scanner);
1475 if (fchar >= '0' && fchar <= '7')
1477 ch = json_scanner_get_char (scanner, line_p, position_p);
1478 i = i * 8 + ch - '0';
1481 gstring = g_string_append_c (gstring, i);
1485 gstring = g_string_append_c (gstring, ch);
1490 gstring = g_string_append_c (gstring, ch);
1497 if (!config->scan_float)
1499 token = G_TOKEN_FLOAT;
1500 dotted_float = TRUE;
1501 ch = json_scanner_get_char (scanner, line_p, position_p);
1502 goto number_parsing;
1505 if (!config->scan_hex_dollar)
1507 token = G_TOKEN_HEX;
1508 ch = json_scanner_get_char (scanner, line_p, position_p);
1509 goto number_parsing;
1512 if (config->scan_octal)
1513 token = G_TOKEN_OCTAL;
1515 token = G_TOKEN_INT;
1516 ch = json_scanner_peek_next_char (scanner);
1517 if (config->scan_hex && (ch == 'x' || ch == 'X'))
1519 token = G_TOKEN_HEX;
1520 json_scanner_get_char (scanner, line_p, position_p);
1521 ch = json_scanner_get_char (scanner, line_p, position_p);
1524 token = G_TOKEN_ERROR;
1525 value.v_error = G_ERR_UNEXP_EOF;
1529 if (json_scanner_char_2_num (ch, 16) < 0)
1531 token = G_TOKEN_ERROR;
1532 value.v_error = G_ERR_DIGIT_RADIX;
1537 else if (config->scan_binary && (ch == 'b' || ch == 'B'))
1539 token = G_TOKEN_BINARY;
1540 json_scanner_get_char (scanner, line_p, position_p);
1541 ch = json_scanner_get_char (scanner, line_p, position_p);
1544 token = G_TOKEN_ERROR;
1545 value.v_error = G_ERR_UNEXP_EOF;
1549 if (json_scanner_char_2_num (ch, 10) < 0)
1551 token = G_TOKEN_ERROR;
1552 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1571 gboolean in_number = TRUE;
1574 if (token == G_TOKEN_NONE)
1575 token = G_TOKEN_INT;
1577 gstring = g_string_new (dotted_float ? "0." : "");
1578 gstring = g_string_append_c (gstring, ch);
1580 do /* while (in_number) */
1584 is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E');
1586 ch = json_scanner_peek_next_char (scanner);
1588 if (json_scanner_char_2_num (ch, 36) >= 0 ||
1589 (config->scan_float && ch == '.') ||
1590 (is_E && (ch == '+' || ch == '-')))
1592 ch = json_scanner_get_char (scanner, line_p, position_p);
1597 if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL)
1599 value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX;
1600 token = G_TOKEN_ERROR;
1605 token = G_TOKEN_FLOAT;
1606 gstring = g_string_append_c (gstring, ch);
1620 gstring = g_string_append_c (gstring, ch);
1625 if (token != G_TOKEN_FLOAT)
1627 token = G_TOKEN_ERROR;
1628 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1632 gstring = g_string_append_c (gstring, ch);
1637 if ((token != G_TOKEN_HEX && !config->scan_float) ||
1638 (token != G_TOKEN_HEX &&
1639 token != G_TOKEN_OCTAL &&
1640 token != G_TOKEN_FLOAT &&
1641 token != G_TOKEN_INT))
1643 token = G_TOKEN_ERROR;
1644 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1649 if (token != G_TOKEN_HEX)
1650 token = G_TOKEN_FLOAT;
1651 gstring = g_string_append_c (gstring, ch);
1656 if (token != G_TOKEN_HEX)
1658 token = G_TOKEN_ERROR;
1659 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1663 gstring = g_string_append_c (gstring, ch);
1673 if (token == G_TOKEN_FLOAT)
1674 value.v_float = g_strtod (gstring->str, &endptr);
1680 case G_TOKEN_BINARY:
1681 ui64 = g_ascii_strtoull (gstring->str, &endptr, 2);
1684 ui64 = g_ascii_strtoull (gstring->str, &endptr, 8);
1687 ui64 = g_ascii_strtoull (gstring->str, &endptr, 10);
1690 ui64 = g_ascii_strtoull (gstring->str, &endptr, 16);
1694 if (scanner->config->store_int64)
1695 value.v_int64 = ui64;
1699 if (endptr && *endptr)
1701 token = G_TOKEN_ERROR;
1702 if (*endptr == 'e' || *endptr == 'E')
1703 value.v_error = G_ERR_NON_DIGIT_IN_CONST;
1705 value.v_error = G_ERR_DIGIT_RADIX;
1707 g_string_free (gstring, TRUE);
1710 } /* number_parsing:... */
1716 if (config->cpair_comment_single &&
1717 ch == config->cpair_comment_single[0])
1719 token = G_TOKEN_COMMENT_SINGLE;
1720 in_comment_single = TRUE;
1721 gstring = g_string_new (NULL);
1722 ch = json_scanner_get_char (scanner, line_p, position_p);
1725 if (ch == config->cpair_comment_single[1])
1727 in_comment_single = FALSE;
1732 gstring = g_string_append_c (gstring, ch);
1733 ch = json_scanner_get_char (scanner, line_p, position_p);
1735 /* ignore a missing newline at EOF for single line comments */
1736 if (in_comment_single &&
1737 config->cpair_comment_single[1] == '\n')
1738 in_comment_single = FALSE;
1740 else if (config->scan_identifier && ch &&
1741 strchr (config->cset_identifier_first, ch))
1743 identifier_precedence:
1745 if (config->cset_identifier_nth && ch &&
1746 strchr (config->cset_identifier_nth,
1747 json_scanner_peek_next_char (scanner)))
1749 token = G_TOKEN_IDENTIFIER;
1750 gstring = g_string_new (NULL);
1751 gstring = g_string_append_c (gstring, ch);
1754 ch = json_scanner_get_char (scanner, line_p, position_p);
1755 gstring = g_string_append_c (gstring, ch);
1756 ch = json_scanner_peek_next_char (scanner);
1758 while (ch && strchr (config->cset_identifier_nth, ch));
1761 else if (config->scan_identifier_1char)
1763 token = G_TOKEN_IDENTIFIER;
1764 value.v_identifier = g_new0 (gchar, 2);
1765 value.v_identifier[0] = ch;
1771 if (config->char_2_token)
1775 token = G_TOKEN_CHAR;
1780 } /* default_case:... */
1783 g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */
1787 if (in_comment_multi || in_comment_single ||
1788 in_string_sq || in_string_dq)
1790 token = G_TOKEN_ERROR;
1793 g_string_free (gstring, TRUE);
1797 if (in_comment_multi || in_comment_single)
1798 value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT;
1799 else /* (in_string_sq || in_string_dq) */
1800 value.v_error = G_ERR_UNEXP_EOF_IN_STRING;
1805 value.v_string = g_string_free (gstring, FALSE);
1809 if (token == G_TOKEN_IDENTIFIER)
1811 if (config->scan_symbols)
1813 JsonScannerKey *key;
1816 scope_id = scanner->scope_id;
1817 key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier);
1818 if (!key && scope_id && scanner->config->scope_0_fallback)
1819 key = json_scanner_lookup_internal (scanner, 0, value.v_identifier);
1823 g_free (value.v_identifier);
1824 token = G_TOKEN_SYMBOL;
1825 value.v_symbol = key->value;
1829 if (token == G_TOKEN_IDENTIFIER &&
1830 config->scan_identifier_NULL &&
1831 strlen (value.v_identifier) == 4)
1833 gchar *null_upper = "NULL";
1834 gchar *null_lower = "null";
1836 if (scanner->config->case_sensitive)
1838 if (value.v_identifier[0] == null_upper[0] &&
1839 value.v_identifier[1] == null_upper[1] &&
1840 value.v_identifier[2] == null_upper[2] &&
1841 value.v_identifier[3] == null_upper[3])
1842 token = G_TOKEN_IDENTIFIER_NULL;
1846 if ((value.v_identifier[0] == null_upper[0] ||
1847 value.v_identifier[0] == null_lower[0]) &&
1848 (value.v_identifier[1] == null_upper[1] ||
1849 value.v_identifier[1] == null_lower[1]) &&
1850 (value.v_identifier[2] == null_upper[2] ||
1851 value.v_identifier[2] == null_lower[2]) &&
1852 (value.v_identifier[3] == null_upper[3] ||
1853 value.v_identifier[3] == null_lower[3]))
1854 token = G_TOKEN_IDENTIFIER_NULL;