12 #include "strbuffer.h"
14 #define TOKEN_INVALID -1
16 #define TOKEN_STRING 256
17 #define TOKEN_INTEGER 257
18 #define TOKEN_REAL 258
19 #define TOKEN_TRUE 259
20 #define TOKEN_FALSE 260
21 #define TOKEN_NULL 261
23 /* read one byte from stream, return EOF on end of file */
24 typedef int (*get_func)(void *data);
26 /* return non-zero if end of file has been reached */
27 typedef int (*eof_func)(void *data);
40 strbuffer_t saved_text;
51 /*** error reporting ***/
53 static void error_set(json_error_t *error, const lex_t *lex,
57 char text[JSON_ERROR_TEXT_LENGTH];
63 vsnprintf(text, JSON_ERROR_TEXT_LENGTH, msg, ap);
68 const char *saved_text = strbuffer_value(&lex->saved_text);
69 error->line = lex->line;
70 if(saved_text && saved_text[0])
72 snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
73 "%s near '%s'", text, saved_text);
77 snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
78 "%s near end of file", text);
84 snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text);
89 /*** lexical analyzer ***/
91 void stream_init(stream_t *stream, get_func get, eof_func eof, void *data)
96 stream->buffer[0] = '\0';
97 stream->buffer_pos = 0;
100 static char stream_get(stream_t *stream)
102 if(!stream->buffer[stream->buffer_pos])
104 stream->buffer[0] = stream->get(stream->data);
105 stream->buffer_pos = 0;
108 return (char)stream->buffer[stream->buffer_pos++];
111 static void stream_unget(stream_t *stream, char c)
113 assert(stream->buffer_pos > 0);
114 stream->buffer_pos--;
115 assert(stream->buffer[stream->buffer_pos] == (unsigned char)c);
119 static int lex_get(lex_t *lex)
121 return stream_get(&lex->stream);
124 static int lex_eof(lex_t *lex)
126 return lex->stream.eof(lex->stream.data);
129 static void lex_save(lex_t *lex, char c)
131 strbuffer_append_byte(&lex->saved_text, c);
134 static int lex_get_save(lex_t *lex)
136 char c = stream_get(&lex->stream);
141 static void lex_unget_unsave(lex_t *lex, char c)
144 stream_unget(&lex->stream, c);
145 d = strbuffer_pop(&lex->saved_text);
149 static void lex_scan_string(lex_t *lex)
155 lex->token = TOKEN_INVALID;
158 c = lex_get_save(lex);
161 if(c == EOF && lex_eof(lex))
164 else if(0 <= c && c <= 0x1F) {
165 /* control character */
166 lex_unget_unsave(lex, c);
171 c = lex_get_save(lex);
173 c = lex_get_save(lex);
174 for(int i = 0; i < 4; i++) {
176 lex_unget_unsave(lex, c);
179 c = lex_get_save(lex);
182 else if(c == '"' || c == '\\' || c == '/' || c == 'b' ||
183 c == 'f' || c == 'n' || c == 'r' || c == 't')
184 c = lex_get_save(lex);
186 lex_unget_unsave(lex, c);
191 c = lex_get_save(lex);
194 /* the actual value is at most of the same length as the source
196 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
197 - a single \uXXXX escape (length 6) is converted to at most 3 bytes
198 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
199 are converted to 4 bytes
201 lex->value.string = malloc(lex->saved_text.length + 1);
202 if(!lex->value.string) {
203 /* this is not very nice, since TOKEN_INVALID is returned */
208 t = lex->value.string;
210 /* + 1 to skip the " */
211 p = strbuffer_value(&lex->saved_text) + 1;
217 /* TODO: \uXXXX not supported yet */
218 free(lex->value.string);
219 lex->value.string = NULL;
223 case '"': case '\\': case '/':
225 case 'b': *t = '\b'; break;
226 case 'f': *t = '\f'; break;
227 case 'n': *t = '\n'; break;
228 case 'r': *t = '\r'; break;
229 case 't': *t = '\t'; break;
241 lex->token = TOKEN_STRING;
247 static void lex_scan_number(lex_t *lex, char c)
249 const char *saved_text;
252 lex->token = TOKEN_INVALID;
255 c = lex_get_save(lex);
258 c = lex_get_save(lex);
260 lex_unget_unsave(lex, c);
264 else /* c != '0' */ {
265 c = lex_get_save(lex);
267 c = lex_get_save(lex);
270 if(c != '.' && c != 'E' && c != 'e') {
271 lex_unget_unsave(lex, c);
272 lex->token = TOKEN_INTEGER;
274 saved_text = strbuffer_value(&lex->saved_text);
275 lex->value.integer = strtol(saved_text, &end, 10);
276 assert(end == saved_text + lex->saved_text.length);
287 c = lex_get_save(lex);
289 c = lex_get_save(lex);
292 if(c == 'E' || c == 'e') {
293 c = lex_get_save(lex);
294 if(c == '+' || c == '-')
295 c = lex_get_save(lex);
298 lex_unget_unsave(lex, c);
302 c = lex_get_save(lex);
304 c = lex_get_save(lex);
307 lex_unget_unsave(lex, c);
308 lex->token = TOKEN_REAL;
310 saved_text = strbuffer_value(&lex->saved_text);
311 lex->value.real = strtod(saved_text, &end);
312 assert(end == saved_text + lex->saved_text.length);
318 static int lex_scan(lex_t *lex)
322 strbuffer_clear(&lex->saved_text);
324 if(lex->token == TOKEN_STRING) {
325 free(lex->value.string);
326 lex->value.string = NULL;
330 while(c == ' ' || c == '\t' || c == '\n' || c == '\r')
338 if(c == EOF && lex_eof(lex)) {
339 lex->token = TOKEN_EOF;
345 if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
349 lex_scan_string(lex);
351 else if(isdigit(c) || c == '-')
352 lex_scan_number(lex, c);
354 else if(isupper(c) || islower(c)) {
355 /* eat up the whole identifier for clearer error messages */
356 const char *saved_text;
358 c = lex_get_save(lex);
359 while(isupper(c) || islower(c))
360 c = lex_get_save(lex);
361 lex_unget_unsave(lex, c);
363 saved_text = strbuffer_value(&lex->saved_text);
365 if(strcmp(saved_text, "true") == 0)
366 lex->token = TOKEN_TRUE;
367 else if(strcmp(saved_text, "false") == 0)
368 lex->token = TOKEN_FALSE;
369 else if(strcmp(saved_text, "null") == 0)
370 lex->token = TOKEN_NULL;
372 lex->token = TOKEN_INVALID;
376 lex->token = TOKEN_INVALID;
382 static int lex_init(lex_t *lex, get_func get, eof_func eof, void *data)
384 stream_init(&lex->stream, get, eof, data);
385 if(strbuffer_init(&lex->saved_text))
388 lex->token = TOKEN_INVALID;
394 static void lex_close(lex_t *lex)
396 if(lex->token == TOKEN_STRING)
397 free(lex->value.string);
403 static json_t *parse_value(lex_t *lex, json_error_t *error);
405 static json_t *parse_object(lex_t *lex, json_error_t *error)
407 json_t *object = json_object();
412 if(lex->token == '}')
419 if(lex->token != TOKEN_STRING) {
420 error_set(error, lex, "string or '}' expected");
424 key = strdup(lex->value.string);
429 if(lex->token != ':') {
431 error_set(error, lex, "':' expected");
436 value = parse_value(lex, error);
442 if(json_object_set(object, key, value)) {
452 if(lex->token != ',')
458 if(lex->token != '}') {
459 error_set(error, lex, "'}' expected");
470 static json_t *parse_array(lex_t *lex, json_error_t *error)
472 json_t *array = json_array();
477 if(lex->token == ']')
481 json_t *elem = parse_value(lex, error);
485 if(json_array_append(array, elem)) {
492 if(lex->token != ',')
498 if(lex->token != ']') {
499 error_set(error, lex, "']' expected");
510 static json_t *parse_value(lex_t *lex, json_error_t *error)
516 json = json_string(lex->value.string);
520 case TOKEN_INTEGER: {
521 json = json_integer(lex->value.integer);
526 json = json_real(lex->value.real);
543 json = parse_object(lex, error);
547 json = parse_array(lex, error);
551 error_set(error, lex, "invalid token");
555 error_set(error, lex, "unexpected token");
565 json_t *parse_json(lex_t *lex, json_error_t *error)
569 if(lex->token != '[' && lex->token != '{') {
570 error_set(error, lex, "'[' or '{' expected");
574 return parse_value(lex, error);
577 json_t *json_load(const char *path, json_error_t *error)
582 fp = fopen(path, "r");
585 error_set(error, NULL, "unable to open %s: %s",
586 path, strerror(errno));
590 result = json_loadf(fp, error);
602 static int string_get(void *data)
605 string_data_t *stream = (string_data_t *)data;
606 c = stream->data[stream->pos++];
613 static int string_eof(void *data)
615 string_data_t *stream = (string_data_t *)data;
616 return (stream->data[stream->pos] == '\0');
619 json_t *json_loads(const char *string, json_error_t *error)
624 string_data_t stream_data = {
629 if(lex_init(&lex, string_get, string_eof, (void *)&stream_data))
632 result = parse_json(&lex, error);
637 if(lex.token != TOKEN_EOF) {
638 error_set(error, &lex, "end of file expected");
648 json_t *json_loadf(FILE *input, json_error_t *error)
653 if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input))
656 result = parse_json(&lex, error);