json_tokener.c

   1 /*
   2  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
   3  *
   4  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
   5  * Michael Clark <michael@metaparadigm.com>
   6  *
   7  * This library is free software; you can redistribute it and/or modify
   8  * it under the terms of the MIT license. See COPYING for details.
   9  *
  10  *
  11  * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
  12  * The copyrights to the contents of this file are licensed under the MIT License
  13  * (http://www.opensource.org/licenses/mit-license.php)
  14  */
  15
  16 #include "config.h"
  17
  18 #include <math.h>
  19 #include "math_compat.h"
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <stddef.h>
  23 #include <ctype.h>
  24 #include <string.h>
  25 #include <limits.h>
  26
  27 #include "debug.h"
  28 #include "printbuf.h"
  29 #include "arraylist.h"
  30 #include "json_inttypes.h"
  31 #include "json_object.h"
  32 #include "json_object_private.h"
  33 #include "json_tokener.h"
  34 #include "json_util.h"
  35 #include "strdup_compat.h"
  36
  37 #ifdef HAVE_LOCALE_H
  38 #include <locale.h>
  39 #endif /* HAVE_LOCALE_H */
  40 #ifdef HAVE_XLOCALE_H
  41 #include <xlocale.h>
  42 #endif
  43
  44 #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
  45
  46 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
  47   /* MSC has the version as _strnicmp */
  48 # define strncasecmp _strnicmp
  49 #elif !HAVE_STRNCASECMP
  50 # error You do not have strncasecmp on your system.
  51 #endif /* HAVE_STRNCASECMP */
  52
  53 /* Use C99 NAN by default; if not available, nan("") should work too. */
  54 #ifndef NAN
  55 #define NAN nan("")
  56 #endif /* !NAN */
  57
  58 static const char json_null_str[] = "null";
  59 static const int json_null_str_len = sizeof(json_null_str) - 1;
  60 static const char json_inf_str[] = "Infinity";
  61 static const char json_inf_str_lower[] = "infinity";
  62 static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
  63 static const char json_nan_str[] = "NaN";
  64 static const int json_nan_str_len = sizeof(json_nan_str) - 1;
  65 static const char json_true_str[] = "true";
  66 static const int json_true_str_len = sizeof(json_true_str) - 1;
  67 static const char json_false_str[] = "false";
  68 static const int json_false_str_len = sizeof(json_false_str) - 1;
  69
  70 static const char* json_tokener_errors[] = {
  71   "success",
  72   "continue",
  73   "nesting too deep",
  74   "unexpected end of data",
  75   "unexpected character",
  76   "null expected",
  77   "boolean expected",
  78   "number expected",
  79   "array value separator ',' expected",
  80   "quoted object property name expected",
  81   "object property name separator ':' expected",
  82   "object value separator ',' expected",
  83   "invalid string sequence",
  84   "expected comment",
  85   "buffer size overflow"
  86 };
  87
  88 const char *json_tokener_error_desc(enum json_tokener_error jerr)
  89 {
  90         int jerr_int = (int) jerr;
  91         if (jerr_int < 0 ||
  92             jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
  93                 return "Unknown error, "
  94                        "invalid json_tokener_error value passed to json_tokener_error_desc()";
  95         return json_tokener_errors[jerr];
  96 }
  97
  98 enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
  99 {
 100         return tok->err;
 101 }
 102
 103 /* Stuff for decoding unicode sequences */
 104 #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
 105 #define IS_LOW_SURROGATE(uc)  (((uc) & 0xFC00) == 0xDC00)
 106 #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
 107 static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD };
 108
 109 struct json_tokener* json_tokener_new_ex(int depth)
 110 {
 111   struct json_tokener *tok;
 112
 113   tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
 114   if (!tok) return NULL;
 115   tok->stack = (struct json_tokener_srec *) calloc(depth,
 116                                                    sizeof(struct json_tokener_srec));
 117   if (!tok->stack) {
 118     free(tok);
 119     return NULL;
 120   }
 121   tok->pb = printbuf_new();
 122   tok->max_depth = depth;
 123   json_tokener_reset(tok);
 124   return tok;
 125 }
 126
 127 struct json_tokener* json_tokener_new(void)
 128 {
 129   return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
 130 }
 131
 132 void json_tokener_free(struct json_tokener *tok)
 133 {
 134   json_tokener_reset(tok);
 135   if (tok->pb) printbuf_free(tok->pb);
 136   free(tok->stack);
 137   free(tok);
 138 }
 139
 140 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
 141 {
 142   tok->stack[depth].state = json_tokener_state_eatws;
 143   tok->stack[depth].saved_state = json_tokener_state_start;
 144   json_object_put(tok->stack[depth].current);
 145   tok->stack[depth].current = NULL;
 146   free(tok->stack[depth].obj_field_name);
 147   tok->stack[depth].obj_field_name = NULL;
 148 }
 149
 150 void json_tokener_reset(struct json_tokener *tok)
 151 {
 152   int i;
 153   if (!tok)
 154     return;
 155
 156   for(i = tok->depth; i >= 0; i--)
 157     json_tokener_reset_level(tok, i);
 158   tok->depth = 0;
 159   tok->err = json_tokener_success;
 160 }
 161
 162 struct json_object* json_tokener_parse(const char *str)
 163 {
 164     enum json_tokener_error jerr_ignored;
 165     struct json_object* obj;
 166     obj = json_tokener_parse_verbose(str, &jerr_ignored);
 167     return obj;
 168 }
 169
 170 struct json_object* json_tokener_parse_verbose(const char *str,
 171                                                enum json_tokener_error *error)
 172 {
 173     struct json_tokener* tok;
 174     struct json_object* obj;
 175
 176     tok = json_tokener_new();
 177     if (!tok)
 178       return NULL;
 179     obj = json_tokener_parse_ex(tok, str, -1);
 180     *error = tok->err;
 181     if(tok->err != json_tokener_success) {
 182                 if (obj != NULL)
 183                         json_object_put(obj);
 184         obj = NULL;
 185     }
 186
 187     json_tokener_free(tok);
 188     return obj;
 189 }
 190
 191 #define state  tok->stack[tok->depth].state
 192 #define saved_state  tok->stack[tok->depth].saved_state
 193 #define current tok->stack[tok->depth].current
 194 #define obj_field_name tok->stack[tok->depth].obj_field_name
 195
 196 /* Optimization:
 197  * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
 198  * iterating character-by character.  A large performance boost is
 199  * achieved by using tighter loops to locally handle units such as
 200  * comments and strings.  Loops that handle an entire token within
 201  * their scope also gather entire strings and pass them to
 202  * printbuf_memappend() in a single call, rather than calling
 203  * printbuf_memappend() one char at a time.
 204  *
 205  * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
 206  * common to both the main loop and the tighter loops.
 207  */
 208
 209 /* PEEK_CHAR(dest, tok) macro:
 210  *   Peeks at the current char and stores it in dest.
 211  *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
 212  *   Implicit inputs:  str, len vars
 213  */
 214 #define PEEK_CHAR(dest, tok)                    \
 215   (((tok)->char_offset == len) ?                \
 216    (((tok)->depth == 0 &&                       \
 217      state == json_tokener_state_eatws &&       \
 218      saved_state == json_tokener_state_finish   \
 219      ) ?                                        \
 220     (((tok)->err = json_tokener_success), 0)    \
 221     :                                           \
 222     (((tok)->err = json_tokener_continue), 0)   \
 223     ) :                                         \
 224    (((dest) = *str), 1)                         \
 225    )
 226
 227 /* ADVANCE_CHAR() macro:
 228  *   Incrementes str & tok->char_offset.
 229  *   For convenience of existing conditionals, returns the old value of c (0 on eof)
 230  *   Implicit inputs:  c var
 231  */
 232 #define ADVANCE_CHAR(str, tok) \
 233   ( ++(str), ((tok)->char_offset)++, c)
 234
 235
 236 /* End optimization macro defs */
 237
 238
 239 struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
 240                                           const char *str, int len)
 241 {
 242   struct json_object *obj = NULL;
 243   char c = '\1';
 244 #ifdef HAVE_USELOCALE
 245   locale_t oldlocale = uselocale(NULL);
 246   locale_t newloc;
 247 #elif defined(HAVE_SETLOCALE)
 248   char *oldlocale = NULL;
 249 #endif
 250
 251   tok->char_offset = 0;
 252   tok->err = json_tokener_success;
 253
 254   /* this interface is presently not 64-bit clean due to the int len argument
 255      and the internal printbuf interface that takes 32-bit int len arguments
 256      so the function limits the maximum string size to INT32_MAX (2GB).
 257      If the function is called with len == -1 then strlen is called to check
 258      the string length is less than INT32_MAX (2GB) */
 259   if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) {
 260     tok->err = json_tokener_error_size;
 261     return NULL;
 262   }
 263
 264 #ifdef HAVE_USELOCALE
 265   {
 266     locale_t duploc = duplocale(oldlocale);
 267     newloc = newlocale(LC_NUMERIC, "C", duploc);
 268     // XXX at least Debian 8.4 has a bug in newlocale where it doesn't
 269     //  change the decimal separator unless you set LC_TIME!
 270     if (newloc)
 271     {
 272       duploc = newloc; // original duploc has been freed by newlocale()
 273       newloc = newlocale(LC_TIME, "C", duploc);
 274     }
 275     if (newloc == NULL)
 276     {
 277       freelocale(duploc);
 278       return NULL;
 279     }
 280     uselocale(newloc);
 281   }
 282 #elif defined(HAVE_SETLOCALE)
 283   {
 284     char *tmplocale;
 285     tmplocale = setlocale(LC_NUMERIC, NULL);
 286     if (tmplocale) oldlocale = strdup(tmplocale);
 287     setlocale(LC_NUMERIC, "C");
 288   }
 289 #endif
 290
 291   while (PEEK_CHAR(c, tok)) {
 292
 293   redo_char:
 294     switch(state) {
 295
 296     case json_tokener_state_eatws:
 297       /* Advance until we change state */
 298       while (isspace((int)c)) {
 299         if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
 300           goto out;
 301       }
 302       if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) {
 303         printbuf_reset(tok->pb);
 304         printbuf_memappend_fast(tok->pb, &c, 1);
 305         state = json_tokener_state_comment_start;
 306       } else {
 307         state = saved_state;
 308         goto redo_char;
 309       }
 310       break;
 311
 312     case json_tokener_state_start:
 313       switch(c) {
 314       case '{':
 315         state = json_tokener_state_eatws;
 316         saved_state = json_tokener_state_object_field_start;
 317         current = json_object_new_object();
 318         if(current == NULL)
 319                 goto out;
 320         break;
 321       case '[':
 322         state = json_tokener_state_eatws;
 323         saved_state = json_tokener_state_array;
 324         current = json_object_new_array();
 325         if(current == NULL)
 326                 goto out;
 327         break;
 328       case 'I':
 329       case 'i':
 330         state = json_tokener_state_inf;
 331         printbuf_reset(tok->pb);
 332         tok->st_pos = 0;
 333         goto redo_char;
 334       case 'N':
 335       case 'n':
 336         state = json_tokener_state_null; // or NaN
 337         printbuf_reset(tok->pb);
 338         tok->st_pos = 0;
 339         goto redo_char;
 340       case '\'':
 341         if (tok->flags & JSON_TOKENER_STRICT) {
 342             /* in STRICT mode only double-quote are allowed */
 343             tok->err = json_tokener_error_parse_unexpected;
 344             goto out;
 345         }
 346         /* FALLTHRU */
 347       case '"':
 348         state = json_tokener_state_string;
 349         printbuf_reset(tok->pb);
 350         tok->quote_char = c;
 351         break;
 352       case 'T':
 353       case 't':
 354       case 'F':
 355       case 'f':
 356         state = json_tokener_state_boolean;
 357         printbuf_reset(tok->pb);
 358         tok->st_pos = 0;
 359         goto redo_char;
 360       case '0':
 361       case '1':
 362       case '2':
 363       case '3':
 364       case '4':
 365       case '5':
 366       case '6':
 367       case '7':
 368       case '8':
 369       case '9':
 370       case '-':
 371         state = json_tokener_state_number;
 372         printbuf_reset(tok->pb);
 373         tok->is_double = 0;
 374         goto redo_char;
 375       default:
 376         tok->err = json_tokener_error_parse_unexpected;
 377         goto out;
 378       }
 379       break;
 380
 381     case json_tokener_state_finish:
 382       if(tok->depth == 0) goto out;
 383       obj = json_object_get(current);
 384       json_tokener_reset_level(tok, tok->depth);
 385       tok->depth--;
 386       goto redo_char;
 387
 388     case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
 389       {
 390         /* If we were guaranteed to have len set, then we could (usually) handle
 391          * the entire "Infinity" check in a single strncmp (strncasecmp), but
 392          * since len might be -1 (i.e. "read until \0"), we need to check it
 393          * a character at a time.
 394          * Trying to handle it both ways would make this code considerably more
 395          * complicated with likely little performance benefit.
 396          */
 397         int is_negative = 0;
 398         const char *_json_inf_str = json_inf_str;
 399         if (!(tok->flags & JSON_TOKENER_STRICT))
 400                 _json_inf_str = json_inf_str_lower;
 401
 402         /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
 403         while (tok->st_pos < (int)json_inf_str_len)
 404         {
 405                 char inf_char = *str;
 406                 if (!(tok->flags & JSON_TOKENER_STRICT))
 407                         inf_char = tolower((int)*str);
 408                 if (inf_char != _json_inf_str[tok->st_pos])
 409                 {
 410                         tok->err = json_tokener_error_parse_unexpected;
 411                         goto out;
 412                 }
 413                 tok->st_pos++;
 414                 (void)ADVANCE_CHAR(str, tok);
 415                 if (!PEEK_CHAR(c, tok))
 416                 {
 417                         /* out of input chars, for now at least */
 418                         goto out;
 419                 }
 420         }
 421         /* We checked the full length of "Infinity", so create the object.
 422          * When handling -Infinity, the number parsing code will have dropped
 423          * the "-" into tok->pb for us, so check it now.
 424          */
 425         if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
 426         {
 427                 is_negative = 1;
 428         }
 429         current = json_object_new_double(is_negative
 430                                          ? -INFINITY : INFINITY);
 431         if (current == NULL)
 432                 goto out;
 433         saved_state = json_tokener_state_finish;
 434         state = json_tokener_state_eatws;
 435         goto redo_char;
 436
 437       }
 438       break;
 439     case json_tokener_state_null: /* aka starts with 'n' */
 440       {
 441         int size;
 442         int size_nan;
 443         printbuf_memappend_fast(tok->pb, &c, 1);
 444         size = json_min(tok->st_pos+1, json_null_str_len);
 445         size_nan = json_min(tok->st_pos+1, json_nan_str_len);
 446         if((!(tok->flags & JSON_TOKENER_STRICT) &&
 447           strncasecmp(json_null_str, tok->pb->buf, size) == 0)
 448           || (strncmp(json_null_str, tok->pb->buf, size) == 0)
 449           ) {
 450           if (tok->st_pos == json_null_str_len) {
 451             current = NULL;
 452             saved_state = json_tokener_state_finish;
 453             state = json_tokener_state_eatws;
 454             goto redo_char;
 455           }
 456         }
 457         else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
 458                   strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
 459                  (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)
 460                 )
 461         {
 462                 if (tok->st_pos == json_nan_str_len)
 463                 {
 464                         current = json_object_new_double(NAN);
 465                         if (current == NULL)
 466                             goto out;
 467                         saved_state = json_tokener_state_finish;
 468                         state = json_tokener_state_eatws;
 469                         goto redo_char;
 470                 }
 471         } else {
 472           tok->err = json_tokener_error_parse_null;
 473           goto out;
 474         }
 475         tok->st_pos++;
 476       }
 477       break;
 478
 479     case json_tokener_state_comment_start:
 480       if(c == '*') {
 481         state = json_tokener_state_comment;
 482       } else if(c == '/') {
 483         state = json_tokener_state_comment_eol;
 484       } else {
 485         tok->err = json_tokener_error_parse_comment;
 486         goto out;
 487       }
 488       printbuf_memappend_fast(tok->pb, &c, 1);
 489       break;
 490
 491     case json_tokener_state_comment:
 492               {
 493           /* Advance until we change state */
 494           const char *case_start = str;
 495           while(c != '*') {
 496             if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 497               printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 498               goto out;
 499             }
 500           }
 501           printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
 502           state = json_tokener_state_comment_end;
 503         }
 504             break;
 505
 506     case json_tokener_state_comment_eol:
 507       {
 508         /* Advance until we change state */
 509         const char *case_start = str;
 510         while(c != '\n') {
 511           if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 512             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 513             goto out;
 514           }
 515         }
 516         printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 517         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
 518         state = json_tokener_state_eatws;
 519       }
 520       break;
 521
 522     case json_tokener_state_comment_end:
 523       printbuf_memappend_fast(tok->pb, &c, 1);
 524       if(c == '/') {
 525         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
 526         state = json_tokener_state_eatws;
 527       } else {
 528         state = json_tokener_state_comment;
 529       }
 530       break;
 531
 532     case json_tokener_state_string:
 533       {
 534         /* Advance until we change state */
 535         const char *case_start = str;
 536         while(1) {
 537           if(c == tok->quote_char) {
 538             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 539             current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
 540             if(current == NULL)
 541                 goto out;
 542             saved_state = json_tokener_state_finish;
 543             state = json_tokener_state_eatws;
 544             break;
 545           } else if(c == '\\') {
 546             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 547             saved_state = json_tokener_state_string;
 548             state = json_tokener_state_string_escape;
 549             break;
 550           }
 551           if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 552             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 553             goto out;
 554           }
 555         }
 556       }
 557       break;
 558
 559     case json_tokener_state_string_escape:
 560       switch(c) {
 561       case '"':
 562       case '\\':
 563       case '/':
 564         printbuf_memappend_fast(tok->pb, &c, 1);
 565         state = saved_state;
 566         break;
 567       case 'b':
 568       case 'n':
 569       case 'r':
 570       case 't':
 571       case 'f':
 572         if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
 573         else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
 574         else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
 575         else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
 576         else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1);
 577         state = saved_state;
 578         break;
 579       case 'u':
 580         tok->ucs_char = 0;
 581         tok->st_pos = 0;
 582         state = json_tokener_state_escape_unicode;
 583         break;
 584       default:
 585         tok->err = json_tokener_error_parse_string;
 586         goto out;
 587       }
 588       break;
 589
 590     case json_tokener_state_escape_unicode:
 591         {
 592           unsigned int got_hi_surrogate = 0;
 593
 594           /* Handle a 4-byte sequence, or two sequences if a surrogate pair */
 595           while(1) {
 596             if (c && strchr(json_hex_chars, c)) {
 597               tok->ucs_char += ((unsigned int)jt_hexdigit(c) << ((3-tok->st_pos++)*4));
 598               if(tok->st_pos == 4) {
 599                 unsigned char unescaped_utf[4];
 600
 601                 if (got_hi_surrogate) {
 602                   if (IS_LOW_SURROGATE(tok->ucs_char)) {
 603                     /* Recalculate the ucs_char, then fall thru to process normally */
 604                     tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char);
 605                   } else {
 606                     /* Hi surrogate was not followed by a low surrogate */
 607                     /* Replace the hi and process the rest normally */
 608                     printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
 609                   }
 610                   got_hi_surrogate = 0;
 611                 }
 612
 613                 if (tok->ucs_char < 0x80) {
 614                   unescaped_utf[0] = tok->ucs_char;
 615                   printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1);
 616                 } else if (tok->ucs_char < 0x800) {
 617                   unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
 618                   unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
 619                   printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2);
 620                 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) {
 621                   /* Got a high surrogate.  Remember it and look for the
 622                    * the beginning of another sequence, which should be the
 623                    * low surrogate.
 624                    */
 625                   got_hi_surrogate = tok->ucs_char;
 626                   /* Not at end, and the next two chars should be "\u" */
 627                   if ((len == -1 || len > (tok->char_offset + 2)) &&
 628                       // str[0] != '0' &&  // implied by json_hex_chars, above.
 629                       (str[1] == '\\') &&
 630                       (str[2] == 'u'))
 631                   {
 632                 /* Advance through the 16 bit surrogate, and move on to the
 633                  * next sequence. The next step is to process the following
 634                  * characters.
 635                  */
 636                     if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) {
 637                     printbuf_memappend_fast(tok->pb,
 638                                             (char*) utf8_replacement_char, 3);
 639                     }
 640                     /* Advance to the first char of the next sequence and
 641                      * continue processing with the next sequence.
 642                      */
 643                     if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 644                       printbuf_memappend_fast(tok->pb,
 645                                               (char*) utf8_replacement_char, 3);
 646                       goto out;
 647                     }
 648                     tok->ucs_char = 0;
 649                     tok->st_pos = 0;
 650                     continue; /* other json_tokener_state_escape_unicode */
 651                   } else {
 652                     /* Got a high surrogate without another sequence following
 653                      * it.  Put a replacement char in for the hi surrogate
 654                      * and pretend we finished.
 655                      */
 656                     printbuf_memappend_fast(tok->pb,
 657                                             (char*) utf8_replacement_char, 3);
 658                   }
 659                 } else if (IS_LOW_SURROGATE(tok->ucs_char)) {
 660                   /* Got a low surrogate not preceded by a high */
 661                   printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
 662                 } else if (tok->ucs_char < 0x10000) {
 663                   unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
 664                   unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
 665                   unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
 666                   printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3);
 667                 } else if (tok->ucs_char < 0x110000) {
 668                   unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
 669                   unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
 670                   unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
 671                   unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
 672                   printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4);
 673                 } else {
 674                   /* Don't know what we got--insert the replacement char */
 675                   printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
 676                 }
 677                 state = saved_state;
 678                 break;
 679               }
 680             } else {
 681               tok->err = json_tokener_error_parse_string;
 682               goto out;
 683             }
 684           if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 685             if (got_hi_surrogate) /* Clean up any pending chars */
 686               printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3);
 687             goto out;
 688           }
 689         }
 690       }
 691       break;
 692
 693     case json_tokener_state_boolean:
 694       {
 695         int size1, size2;
 696         printbuf_memappend_fast(tok->pb, &c, 1);
 697         size1 = json_min(tok->st_pos+1, json_true_str_len);
 698         size2 = json_min(tok->st_pos+1, json_false_str_len);
 699         if((!(tok->flags & JSON_TOKENER_STRICT) &&
 700           strncasecmp(json_true_str, tok->pb->buf, size1) == 0)
 701           || (strncmp(json_true_str, tok->pb->buf, size1) == 0)
 702           ) {
 703           if(tok->st_pos == json_true_str_len) {
 704             current = json_object_new_boolean(1);
 705             if(current == NULL)
 706                 goto out;
 707             saved_state = json_tokener_state_finish;
 708             state = json_tokener_state_eatws;
 709             goto redo_char;
 710           }
 711         } else if((!(tok->flags & JSON_TOKENER_STRICT) &&
 712           strncasecmp(json_false_str, tok->pb->buf, size2) == 0)
 713           || (strncmp(json_false_str, tok->pb->buf, size2) == 0)) {
 714           if(tok->st_pos == json_false_str_len) {
 715             current = json_object_new_boolean(0);
 716             if(current == NULL)
 717                 goto out;
 718             saved_state = json_tokener_state_finish;
 719             state = json_tokener_state_eatws;
 720             goto redo_char;
 721           }
 722         } else {
 723           tok->err = json_tokener_error_parse_boolean;
 724           goto out;
 725         }
 726         tok->st_pos++;
 727       }
 728       break;
 729
 730     case json_tokener_state_number:
 731       {
 732         /* Advance until we change state */
 733         const char *case_start = str;
 734         int case_len=0;
 735         int is_exponent=0;
 736         int negativesign_next_possible_location=1;
 737         while(c && strchr(json_number_chars, c)) {
 738           ++case_len;
 739
 740           /* non-digit characters checks */
 741           /* note: since the main loop condition to get here was
 742                    an input starting with 0-9 or '-', we are
 743                    protected from input starting with '.' or
 744                    e/E. */
 745           if (c == '.') {
 746             if (tok->is_double != 0) {
 747               /* '.' can only be found once, and out of the exponent part.
 748                  Thus, if the input is already flagged as double, it
 749                  is invalid. */
 750               tok->err = json_tokener_error_parse_number;
 751               goto out;
 752             }
 753             tok->is_double = 1;
 754           }
 755           if (c == 'e' || c == 'E') {
 756             if (is_exponent != 0) {
 757               /* only one exponent possible */
 758               tok->err = json_tokener_error_parse_number;
 759               goto out;
 760             }
 761             is_exponent = 1;
 762             tok->is_double = 1;
 763             /* the exponent part can begin with a negative sign */
 764             negativesign_next_possible_location = case_len + 1;
 765           }
 766           if (c == '-' && case_len != negativesign_next_possible_location) {
 767             /* If the negative sign is not where expected (ie
 768                start of input or start of exponent part), the
 769                input is invalid. */
 770             tok->err = json_tokener_error_parse_number;
 771             goto out;
 772           }
 773
 774           if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 775             printbuf_memappend_fast(tok->pb, case_start, case_len);
 776             goto out;
 777           }
 778         }
 779         if (case_len>0)
 780           printbuf_memappend_fast(tok->pb, case_start, case_len);
 781
 782         // Check for -Infinity
 783         if (tok->pb->buf[0] == '-' && case_len <= 1 &&
 784             (c == 'i' || c == 'I'))
 785         {
 786                 state = json_tokener_state_inf;
 787                 tok->st_pos = 0;
 788                 goto redo_char;
 789         }
 790       }
 791       {
 792         int64_t num64;
 793         double  numd;
 794         if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) {
 795                 if (num64 && tok->pb->buf[0]=='0' &&
 796                     (tok->flags & JSON_TOKENER_STRICT)) {
 797                         /* in strict mode, number must not start with 0 */
 798                         tok->err = json_tokener_error_parse_number;
 799                         goto out;
 800                 }
 801                 current = json_object_new_int64(num64);
 802                 if(current == NULL)
 803                     goto out;
 804         }
 805         else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0)
 806         {
 807           current = json_object_new_double_s(numd, tok->pb->buf);
 808           if(current == NULL)
 809                 goto out;
 810         } else {
 811           tok->err = json_tokener_error_parse_number;
 812           goto out;
 813         }
 814         saved_state = json_tokener_state_finish;
 815         state = json_tokener_state_eatws;
 816         goto redo_char;
 817       }
 818       break;
 819
 820     case json_tokener_state_array_after_sep:
 821     case json_tokener_state_array:
 822       if(c == ']') {
 823         if (state == json_tokener_state_array_after_sep &&
 824             (tok->flags & JSON_TOKENER_STRICT))
 825           {
 826             tok->err = json_tokener_error_parse_unexpected;
 827             goto out;
 828           }
 829         saved_state = json_tokener_state_finish;
 830         state = json_tokener_state_eatws;
 831       } else {
 832         if(tok->depth >= tok->max_depth-1) {
 833           tok->err = json_tokener_error_depth;
 834           goto out;
 835         }
 836         state = json_tokener_state_array_add;
 837         tok->depth++;
 838         json_tokener_reset_level(tok, tok->depth);
 839         goto redo_char;
 840       }
 841       break;
 842
 843     case json_tokener_state_array_add:
 844       if( json_object_array_add(current, obj) != 0 )
 845         goto out;
 846       saved_state = json_tokener_state_array_sep;
 847       state = json_tokener_state_eatws;
 848       goto redo_char;
 849
 850     case json_tokener_state_array_sep:
 851       if(c == ']') {
 852         saved_state = json_tokener_state_finish;
 853         state = json_tokener_state_eatws;
 854       } else if(c == ',') {
 855         saved_state = json_tokener_state_array_after_sep;
 856         state = json_tokener_state_eatws;
 857       } else {
 858         tok->err = json_tokener_error_parse_array;
 859         goto out;
 860       }
 861       break;
 862
 863     case json_tokener_state_object_field_start:
 864     case json_tokener_state_object_field_start_after_sep:
 865       if(c == '}') {
 866                 if (state == json_tokener_state_object_field_start_after_sep &&
 867                     (tok->flags & JSON_TOKENER_STRICT))
 868                 {
 869                         tok->err = json_tokener_error_parse_unexpected;
 870                         goto out;
 871                 }
 872         saved_state = json_tokener_state_finish;
 873         state = json_tokener_state_eatws;
 874       } else if (c == '"' || c == '\'') {
 875         tok->quote_char = c;
 876         printbuf_reset(tok->pb);
 877         state = json_tokener_state_object_field;
 878       } else {
 879         tok->err = json_tokener_error_parse_object_key_name;
 880         goto out;
 881       }
 882       break;
 883
 884     case json_tokener_state_object_field:
 885       {
 886         /* Advance until we change state */
 887         const char *case_start = str;
 888         while(1) {
 889           if(c == tok->quote_char) {
 890             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 891             obj_field_name = strdup(tok->pb->buf);
 892             saved_state = json_tokener_state_object_field_end;
 893             state = json_tokener_state_eatws;
 894             break;
 895           } else if(c == '\\') {
 896             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 897             saved_state = json_tokener_state_object_field;
 898             state = json_tokener_state_string_escape;
 899             break;
 900           }
 901           if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
 902             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
 903             goto out;
 904           }
 905         }
 906       }
 907       break;
 908
 909     case json_tokener_state_object_field_end:
 910       if(c == ':') {
 911         saved_state = json_tokener_state_object_value;
 912         state = json_tokener_state_eatws;
 913       } else {
 914         tok->err = json_tokener_error_parse_object_key_sep;
 915         goto out;
 916       }
 917       break;
 918
 919     case json_tokener_state_object_value:
 920       if(tok->depth >= tok->max_depth-1) {
 921         tok->err = json_tokener_error_depth;
 922         goto out;
 923       }
 924       state = json_tokener_state_object_value_add;
 925       tok->depth++;
 926       json_tokener_reset_level(tok, tok->depth);
 927       goto redo_char;
 928
 929     case json_tokener_state_object_value_add:
 930       json_object_object_add(current, obj_field_name, obj);
 931       free(obj_field_name);
 932       obj_field_name = NULL;
 933       saved_state = json_tokener_state_object_sep;
 934       state = json_tokener_state_eatws;
 935       goto redo_char;
 936
 937     case json_tokener_state_object_sep:
 938       /* { */
 939       if(c == '}') {
 940         saved_state = json_tokener_state_finish;
 941         state = json_tokener_state_eatws;
 942       } else if(c == ',') {
 943         saved_state = json_tokener_state_object_field_start_after_sep;
 944         state = json_tokener_state_eatws;
 945       } else {
 946         tok->err = json_tokener_error_parse_object_value_sep;
 947         goto out;
 948       }
 949       break;
 950
 951     }
 952     if (!ADVANCE_CHAR(str, tok))
 953       goto out;
 954   } /* while(PEEK_CHAR) */
 955
 956  out:
 957   if (c &&
 958      (state == json_tokener_state_finish) &&
 959      (tok->depth == 0) &&
 960      (tok->flags & JSON_TOKENER_STRICT)) {
 961       /* unexpected char after JSON data */
 962       tok->err = json_tokener_error_parse_unexpected;
 963   }
 964   if (!c) { /* We hit an eof char (0) */
 965     if(state != json_tokener_state_finish &&
 966        saved_state != json_tokener_state_finish)
 967       tok->err = json_tokener_error_parse_eof;
 968   }
 969
 970 #ifdef HAVE_USELOCALE
 971   uselocale(oldlocale);
 972   freelocale(newloc);
 973 #elif defined(HAVE_SETLOCALE)
 974   setlocale(LC_NUMERIC, oldlocale);
 975   free(oldlocale);
 976 #endif
 977
 978   if (tok->err == json_tokener_success)
 979   {
 980     json_object *ret = json_object_get(current);
 981         int ii;
 982
 983         /* Partially reset, so we parse additional objects on subsequent calls. */
 984     for(ii = tok->depth; ii >= 0; ii--)
 985       json_tokener_reset_level(tok, ii);
 986     return ret;
 987   }
 988
 989   MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
 990            json_tokener_errors[tok->err], tok->char_offset);
 991   return NULL;
 992 }
 993
 994 void json_tokener_set_flags(struct json_tokener *tok, int flags)
 995 {
 996         tok->flags = flags;
 997 }