"Initial commit to Gerrit"
[profile/ivi/json-c.git] / json_tokener.c
1 /*
2  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3  *
4  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5  * Michael Clark <michael@metaparadigm.com>
6  *
7  * This library is free software; you can redistribute it and/or modify
8  * it under the terms of the MIT license. See COPYING for details.
9  *
10  *
11  * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
12  * The copyrights to the contents of this file are licensed under the MIT License
13  * (http://www.opensource.org/licenses/mit-license.php)
14  */
15
16 #include "config.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stddef.h>
21 #include <ctype.h>
22 #include <string.h>
23
24 #include "bits.h"
25 #include "debug.h"
26 #include "printbuf.h"
27 #include "arraylist.h"
28 #include "json_object.h"
29 #include "json_tokener.h"
30
31
32 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
33   /* MSC has the version as _strnicmp */
34 # define strncasecmp _strnicmp
35 #elif !HAVE_STRNCASECMP
36 # error You do not have strncasecmp on your system.
37 #endif /* HAVE_STRNCASECMP */
38
39
40 static const char* json_null_str = "null";
41 static const char* json_true_str = "true";
42 static const char* json_false_str = "false";
43
44 const char* json_tokener_errors[] = {
45   "success",
46   "continue",
47   "nesting to deep",
48   "unexpected end of data",
49   "unexpected character",
50   "null expected",
51   "boolean expected",
52   "number expected",
53   "array value separator ',' expected",
54   "quoted object property name expected",
55   "object property name separator ':' expected",
56   "object value separator ',' expected",
57   "invalid string sequence",
58   "expected comment",
59 };
60
61
62 struct json_tokener* json_tokener_new(void)
63 {
64   struct json_tokener *tok;
65
66   tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
67   if (!tok) return NULL;
68   tok->pb = printbuf_new();
69   json_tokener_reset(tok);
70   return tok;
71 }
72
73 void json_tokener_free(struct json_tokener *tok)
74 {
75   json_tokener_reset(tok);
76   if(tok) printbuf_free(tok->pb);
77   free(tok);
78 }
79
80 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
81 {
82   tok->stack[depth].state = json_tokener_state_eatws;
83   tok->stack[depth].saved_state = json_tokener_state_start;
84   json_object_put(tok->stack[depth].current);
85   tok->stack[depth].current = NULL;
86   free(tok->stack[depth].obj_field_name);
87   tok->stack[depth].obj_field_name = NULL;
88 }
89
90 void json_tokener_reset(struct json_tokener *tok)
91 {
92   int i;
93   if (!tok)
94     return;
95
96   for(i = tok->depth; i >= 0; i--)
97     json_tokener_reset_level(tok, i);
98   tok->depth = 0;
99   tok->err = json_tokener_success;
100 }
101
102 struct json_object* json_tokener_parse(const char *str)
103 {
104   struct json_tokener* tok;
105   struct json_object* obj;
106
107   tok = json_tokener_new();
108   obj = json_tokener_parse_ex(tok, str, -1);
109   if(tok->err != json_tokener_success)
110     obj = (struct json_object*)error_ptr(-tok->err);
111   json_tokener_free(tok);
112   return obj;
113 }
114
115
116 #if !HAVE_STRNDUP
117 /* CAW: compliant version of strndup() */
118 char* strndup(const char* str, size_t n)
119 {
120   if(str) {
121     size_t len = strlen(str);
122     size_t nn = json_min(len,n);
123     char* s = (char*)malloc(sizeof(char) * (nn + 1));
124
125     if(s) {
126       memcpy(s, str, nn);
127       s[nn] = '\0';
128     }
129
130     return s;
131   }
132
133   return NULL;
134 }
135 #endif
136
137
138 #define state  tok->stack[tok->depth].state
139 #define saved_state  tok->stack[tok->depth].saved_state
140 #define current tok->stack[tok->depth].current
141 #define obj_field_name tok->stack[tok->depth].obj_field_name
142
143 /* Optimization:
144  * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
145  * iterating character-by character.  A large performance boost is
146  * achieved by using tighter loops to locally handle units such as
147  * comments and strings.  Loops that handle an entire token within 
148  * their scope also gather entire strings and pass them to 
149  * printbuf_memappend() in a single call, rather than calling
150  * printbuf_memappend() one char at a time.
151  *
152  * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
153  * common to both the main loop and the tighter loops.
154  */
155
156 /* POP_CHAR(dest, tok) macro:
157  *   Not really a pop()...peeks at the current char and stores it in dest.
158  *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
159  *   Implicit inputs:  str, len vars
160  */
161 #define POP_CHAR(dest, tok)                                                  \
162   (((tok)->char_offset == len) ?                                          \
163    (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
164     (((tok)->err = json_tokener_success), 0)                              \
165     :                                                                   \
166     (((tok)->err = json_tokener_continue), 0)                             \
167     ) :                                                                 \
168    (((dest) = *str), 1)                                                 \
169    )
170  
171 /* ADVANCE_CHAR() macro:
172  *   Incrementes str & tok->char_offset.
173  *   For convenience of existing conditionals, returns the old value of c (0 on eof)
174  *   Implicit inputs:  c var
175  */
176 #define ADVANCE_CHAR(str, tok) \
177   ( ++(str), ((tok)->char_offset)++, c)
178
179 /* End optimization macro defs */
180
181
182 struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
183                                           const char *str, int len)
184 {
185   struct json_object *obj = NULL;
186   char c = '\1';
187
188   tok->char_offset = 0;
189   tok->err = json_tokener_success;
190
191   while (POP_CHAR(c, tok)) {
192
193   redo_char:
194     switch(state) {
195
196     case json_tokener_state_eatws:
197       /* Advance until we change state */
198       while (isspace(c)) {
199         if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
200           goto out;
201       }
202       if(c == '/') {
203         printbuf_reset(tok->pb);
204         printbuf_memappend_fast(tok->pb, &c, 1);
205         state = json_tokener_state_comment_start;
206       } else {
207         state = saved_state;
208         goto redo_char;
209       }
210       break;
211
212     case json_tokener_state_start:
213       switch(c) {
214       case '{':
215         state = json_tokener_state_eatws;
216         saved_state = json_tokener_state_object_field_start;
217         current = json_object_new_object();
218         break;
219       case '[':
220         state = json_tokener_state_eatws;
221         saved_state = json_tokener_state_array;
222         current = json_object_new_array();
223         break;
224       case 'N':
225       case 'n':
226         state = json_tokener_state_null;
227         printbuf_reset(tok->pb);
228         tok->st_pos = 0;
229         goto redo_char;
230       case '"':
231       case '\'':
232         state = json_tokener_state_string;
233         printbuf_reset(tok->pb);
234         tok->quote_char = c;
235         break;
236       case 'T':
237       case 't':
238       case 'F':
239       case 'f':
240         state = json_tokener_state_boolean;
241         printbuf_reset(tok->pb);
242         tok->st_pos = 0;
243         goto redo_char;
244 #if defined(__GNUC__)
245           case '0' ... '9':
246 #else
247           case '0':
248       case '1':
249       case '2':
250       case '3':
251       case '4':
252       case '5':
253       case '6':
254       case '7':
255       case '8':
256       case '9':
257 #endif
258       case '-':
259         state = json_tokener_state_number;
260         printbuf_reset(tok->pb);
261         tok->is_double = 0;
262         goto redo_char;
263       default:
264         tok->err = json_tokener_error_parse_unexpected;
265         goto out;
266       }
267       break;
268
269     case json_tokener_state_finish:
270       if(tok->depth == 0) goto out;
271       obj = json_object_get(current);
272       json_tokener_reset_level(tok, tok->depth);
273       tok->depth--;
274       goto redo_char;
275
276     case json_tokener_state_null:
277       printbuf_memappend_fast(tok->pb, &c, 1);
278       if(strncasecmp(json_null_str, tok->pb->buf,
279                      json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
280         if(tok->st_pos == strlen(json_null_str)) {
281           current = NULL;
282           saved_state = json_tokener_state_finish;
283           state = json_tokener_state_eatws;
284           goto redo_char;
285         }
286       } else {
287         tok->err = json_tokener_error_parse_null;
288         goto out;
289       }
290       tok->st_pos++;
291       break;
292
293     case json_tokener_state_comment_start:
294       if(c == '*') {
295         state = json_tokener_state_comment;
296       } else if(c == '/') {
297         state = json_tokener_state_comment_eol;
298       } else {
299         tok->err = json_tokener_error_parse_comment;
300         goto out;
301       }
302       printbuf_memappend_fast(tok->pb, &c, 1);
303       break;
304
305     case json_tokener_state_comment:
306               {
307           /* Advance until we change state */
308           const char *case_start = str;
309           while(c != '*') {
310             if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
311               printbuf_memappend_fast(tok->pb, case_start, str-case_start);
312               goto out;
313             } 
314           }
315           printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
316           state = json_tokener_state_comment_end;
317         }
318             break;
319
320     case json_tokener_state_comment_eol:
321       {
322         /* Advance until we change state */
323         const char *case_start = str;
324         while(c != '\n') {
325           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
326             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
327             goto out;
328           }
329         }
330         printbuf_memappend_fast(tok->pb, case_start, str-case_start);
331         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
332         state = json_tokener_state_eatws;
333       }
334       break;
335
336     case json_tokener_state_comment_end:
337       printbuf_memappend_fast(tok->pb, &c, 1);
338       if(c == '/') {
339         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
340         state = json_tokener_state_eatws;
341       } else {
342         state = json_tokener_state_comment;
343       }
344       break;
345
346     case json_tokener_state_string:
347       {
348         /* Advance until we change state */
349         const char *case_start = str;
350         while(1) {
351           if(c == tok->quote_char) {
352             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
353             current = json_object_new_string(tok->pb->buf);
354             saved_state = json_tokener_state_finish;
355             state = json_tokener_state_eatws;
356             break;
357           } else if(c == '\\') {
358             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
359             saved_state = json_tokener_state_string;
360             state = json_tokener_state_string_escape;
361             break;
362           }
363           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
364             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
365             goto out;
366           }
367         }
368       }
369       break;
370
371     case json_tokener_state_string_escape:
372       switch(c) {
373       case '"':
374       case '\\':
375       case '/':
376         printbuf_memappend_fast(tok->pb, &c, 1);
377         state = saved_state;
378         break;
379       case 'b':
380       case 'n':
381       case 'r':
382       case 't':
383         if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
384         else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
385         else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
386         else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
387         state = saved_state;
388         break;
389       case 'u':
390         tok->ucs_char = 0;
391         tok->st_pos = 0;
392         state = json_tokener_state_escape_unicode;
393         break;
394       default:
395         tok->err = json_tokener_error_parse_string;
396         goto out;
397       }
398       break;
399
400     case json_tokener_state_escape_unicode:
401             /* Note that the following code is inefficient for handling large
402        * chunks of extended chars, calling printbuf_memappend() once
403        * for each multi-byte character of input.
404        * This is a good area for future optimization.
405        */
406         {
407           /* Advance until we change state */
408           while(1) {
409             if(strchr(json_hex_chars, c)) {
410               tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
411               if(tok->st_pos == 4) {
412                 unsigned char utf_out[3];
413                 if (tok->ucs_char < 0x80) {
414                   utf_out[0] = tok->ucs_char;
415                   printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
416                 } else if (tok->ucs_char < 0x800) {
417                   utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
418                   utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
419                   printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
420                 } else {
421                   utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
422                   utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
423                   utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
424                   printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
425                 }
426                 state = saved_state;
427                 break;
428               }
429             } else {
430               tok->err = json_tokener_error_parse_string;
431               goto out;
432                   }
433           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
434             goto out;
435         }
436       }
437       break;
438
439     case json_tokener_state_boolean:
440       printbuf_memappend_fast(tok->pb, &c, 1);
441       if(strncasecmp(json_true_str, tok->pb->buf,
442                      json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
443         if(tok->st_pos == strlen(json_true_str)) {
444           current = json_object_new_boolean(1);
445           saved_state = json_tokener_state_finish;
446           state = json_tokener_state_eatws;
447           goto redo_char;
448         }
449       } else if(strncasecmp(json_false_str, tok->pb->buf,
450                             json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
451         if(tok->st_pos == strlen(json_false_str)) {
452           current = json_object_new_boolean(0);
453           saved_state = json_tokener_state_finish;
454           state = json_tokener_state_eatws;
455           goto redo_char;
456         }
457       } else {
458         tok->err = json_tokener_error_parse_boolean;
459         goto out;
460       }
461       tok->st_pos++;
462       break;
463
464     case json_tokener_state_number:
465       {
466         /* Advance until we change state */
467         const char *case_start = str;
468         int case_len=0;
469         while(c && strchr(json_number_chars, c)) {
470           ++case_len;
471           if(c == '.' || c == 'e') tok->is_double = 1;
472           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
473             printbuf_memappend_fast(tok->pb, case_start, case_len);
474             goto out;
475           }
476         }
477         if (case_len>0)
478           printbuf_memappend_fast(tok->pb, case_start, case_len);
479       }
480       {
481         int numi;
482         double numd;
483         if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
484           current = json_object_new_int(numi);
485         } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
486           current = json_object_new_double(numd);
487         } else {
488           tok->err = json_tokener_error_parse_number;
489           goto out;
490         }
491         saved_state = json_tokener_state_finish;
492         state = json_tokener_state_eatws;
493         goto redo_char;
494       }
495       break;
496
497     case json_tokener_state_array:
498       if(c == ']') {
499         saved_state = json_tokener_state_finish;
500         state = json_tokener_state_eatws;
501       } else {
502         if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
503           tok->err = json_tokener_error_depth;
504           goto out;
505         }
506         state = json_tokener_state_array_add;
507         tok->depth++;
508         json_tokener_reset_level(tok, tok->depth);
509         goto redo_char;
510       }
511       break;
512
513     case json_tokener_state_array_add:
514       json_object_array_add(current, obj);
515       saved_state = json_tokener_state_array_sep;
516       state = json_tokener_state_eatws;
517       goto redo_char;
518
519     case json_tokener_state_array_sep:
520       if(c == ']') {
521         saved_state = json_tokener_state_finish;
522         state = json_tokener_state_eatws;
523       } else if(c == ',') {
524         saved_state = json_tokener_state_array;
525         state = json_tokener_state_eatws;
526       } else {
527         tok->err = json_tokener_error_parse_array;
528         goto out;
529       }
530       break;
531
532     case json_tokener_state_object_field_start:
533       if(c == '}') {
534         saved_state = json_tokener_state_finish;
535         state = json_tokener_state_eatws;
536       } else if (c == '"' || c == '\'') {
537         tok->quote_char = c;
538         printbuf_reset(tok->pb);
539         state = json_tokener_state_object_field;
540       } else {
541         tok->err = json_tokener_error_parse_object_key_name;
542         goto out;
543       }
544       break;
545
546     case json_tokener_state_object_field:
547       {
548         /* Advance until we change state */
549         const char *case_start = str;
550         while(1) {
551           if(c == tok->quote_char) {
552             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
553             obj_field_name = strdup(tok->pb->buf);
554             saved_state = json_tokener_state_object_field_end;
555             state = json_tokener_state_eatws;
556             break;
557           } else if(c == '\\') {
558             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
559             saved_state = json_tokener_state_object_field;
560             state = json_tokener_state_string_escape;
561             break;
562           }
563           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
564             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
565             goto out;
566           }
567         }
568       }
569       break;
570
571     case json_tokener_state_object_field_end:
572       if(c == ':') {
573         saved_state = json_tokener_state_object_value;
574         state = json_tokener_state_eatws;
575       } else {
576         tok->err = json_tokener_error_parse_object_key_sep;
577         goto out;
578       }
579       break;
580
581     case json_tokener_state_object_value:
582       if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
583         tok->err = json_tokener_error_depth;
584         goto out;
585       }
586       state = json_tokener_state_object_value_add;
587       tok->depth++;
588       json_tokener_reset_level(tok, tok->depth);
589       goto redo_char;
590
591     case json_tokener_state_object_value_add:
592       json_object_object_add(current, obj_field_name, obj);
593       free(obj_field_name);
594       obj_field_name = NULL;
595       saved_state = json_tokener_state_object_sep;
596       state = json_tokener_state_eatws;
597       goto redo_char;
598
599     case json_tokener_state_object_sep:
600       if(c == '}') {
601         saved_state = json_tokener_state_finish;
602         state = json_tokener_state_eatws;
603       } else if(c == ',') {
604         saved_state = json_tokener_state_object_field_start;
605         state = json_tokener_state_eatws;
606       } else {
607         tok->err = json_tokener_error_parse_object_value_sep;
608         goto out;
609       }
610       break;
611
612     }
613     if (!ADVANCE_CHAR(str, tok))
614       goto out;
615   } /* while(POP_CHAR) */
616
617  out:
618   if (!c) { /* We hit an eof char (0) */
619     if(state != json_tokener_state_finish &&
620        saved_state != json_tokener_state_finish)
621       tok->err = json_tokener_error_parse_eof;
622   }
623
624   if(tok->err == json_tokener_success) return json_object_get(current);
625   MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
626            json_tokener_errors[tok->err], tok->char_offset);
627   return NULL;
628 }