onert-micro/externals/flatbuffers/util.h

   1 /*
   2  * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2014 Google Inc. All rights reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef FLATBUFFERS_UTIL_H_
  19 #define FLATBUFFERS_UTIL_H_
  20
  21 #include <errno.h>
  22
  23 #include "flatbuffers/base.h"
  24 #include "flatbuffers/stl_emulation.h"
  25
  26 #ifndef FLATBUFFERS_PREFER_PRINTF
  27 #include <sstream>
  28 #else // FLATBUFFERS_PREFER_PRINTF
  29 #include <float.h>
  30 #include <stdio.h>
  31 #endif // FLATBUFFERS_PREFER_PRINTF
  32
  33 #include <iomanip>
  34 #include <string>
  35
  36 namespace flatbuffers
  37 {
  38
  39 // @locale-independent functions for ASCII characters set.
  40
  41 // Fast checking that character lies in closed range: [a <= x <= b]
  42 // using one compare (conditional branch) operator.
  43 inline bool check_ascii_range(char x, char a, char b)
  44 {
  45   FLATBUFFERS_ASSERT(a <= b);
  46   // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
  47   // The x, a, b will be promoted to int and subtracted without overflow.
  48   return static_cast<unsigned int>(x - a) <= static_cast<unsigned int>(b - a);
  49 }
  50
  51 // Case-insensitive isalpha
  52 inline bool is_alpha(char c)
  53 {
  54   // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
  55   return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
  56 }
  57
  58 // Check for uppercase alpha
  59 inline bool is_alpha_upper(char c) { return check_ascii_range(c, 'A', 'Z'); }
  60
  61 // Check (case-insensitive) that `c` is equal to alpha.
  62 inline bool is_alpha_char(char c, char alpha)
  63 {
  64   FLATBUFFERS_ASSERT(is_alpha(alpha));
  65   // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
  66   return ((c & 0xDF) == (alpha & 0xDF));
  67 }
  68
  69 // https://en.cppreference.com/w/cpp/string/byte/isxdigit
  70 // isdigit and isxdigit are the only standard narrow character classification
  71 // functions that are not affected by the currently installed C locale. although
  72 // some implementations (e.g. Microsoft in 1252 codepage) may classify
  73 // additional single-byte characters as digits.
  74 inline bool is_digit(char c) { return check_ascii_range(c, '0', '9'); }
  75
  76 inline bool is_xdigit(char c)
  77 {
  78   // Replace by look-up table.
  79   return is_digit(c) || check_ascii_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
  80 }
  81
  82 // Case-insensitive isalnum
  83 inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
  84
  85 inline char CharToUpper(char c)
  86 {
  87   return static_cast<char>(::toupper(static_cast<unsigned char>(c)));
  88 }
  89
  90 inline char CharToLower(char c)
  91 {
  92   return static_cast<char>(::tolower(static_cast<unsigned char>(c)));
  93 }
  94
  95 // @end-locale-independent functions for ASCII character set
  96
  97 #ifdef FLATBUFFERS_PREFER_PRINTF
  98 template <typename T> size_t IntToDigitCount(T t)
  99 {
 100   size_t digit_count = 0;
 101   // Count the sign for negative numbers
 102   if (t < 0)
 103     digit_count++;
 104   // Count a single 0 left of the dot for fractional numbers
 105   if (-1 < t && t < 1)
 106     digit_count++;
 107   // Count digits until fractional part
 108   T eps = std::numeric_limits<float>::epsilon();
 109   while (t <= (-1 + eps) || (1 - eps) <= t)
 110   {
 111     t /= 10;
 112     digit_count++;
 113   }
 114   return digit_count;
 115 }
 116
 117 template <typename T> size_t NumToStringWidth(T t, int precision = 0)
 118 {
 119   size_t string_width = IntToDigitCount(t);
 120   // Count the dot for floating point numbers
 121   if (precision)
 122     string_width += (precision + 1);
 123   return string_width;
 124 }
 125
 126 template <typename T> std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0)
 127 {
 128   size_t string_width = NumToStringWidth(t, precision);
 129   std::string s(string_width, 0x00);
 130   // Allow snprintf to use std::string trailing null to detect buffer overflow
 131   snprintf(const_cast<char *>(s.data()), (s.size() + 1), fmt, string_width, t);
 132   return s;
 133 }
 134 #endif // FLATBUFFERS_PREFER_PRINTF
 135
 136 // Convert an integer or floating point value to a string.
 137 // In contrast to std::stringstream, "char" values are
 138 // converted to a string of digits, and we don't use scientific notation.
 139 template <typename T> std::string NumToString(T t)
 140 {
 141   // clang-format off
 142
 143   #ifndef FLATBUFFERS_PREFER_PRINTF
 144     std::stringstream ss;
 145     ss << t;
 146     return ss.str();
 147   #else // FLATBUFFERS_PREFER_PRINTF
 148     auto v = static_cast<long long>(t);
 149     return NumToStringImplWrapper(v, "%.*lld");
 150   #endif // FLATBUFFERS_PREFER_PRINTF
 151   // clang-format on
 152 }
 153 // Avoid char types used as character data.
 154 template <> inline std::string NumToString<signed char>(signed char t)
 155 {
 156   return NumToString(static_cast<int>(t));
 157 }
 158 template <> inline std::string NumToString<unsigned char>(unsigned char t)
 159 {
 160   return NumToString(static_cast<int>(t));
 161 }
 162 template <> inline std::string NumToString<char>(char t)
 163 {
 164   return NumToString(static_cast<int>(t));
 165 }
 166 #if defined(FLATBUFFERS_CPP98_STL)
 167 template <> inline std::string NumToString<long long>(long long t)
 168 {
 169   char buf[21]; // (log((1 << 63) - 1) / log(10)) + 2
 170   snprintf(buf, sizeof(buf), "%lld", t);
 171   return std::string(buf);
 172 }
 173
 174 template <> inline std::string NumToString<unsigned long long>(unsigned long long t)
 175 {
 176   char buf[22]; // (log((1 << 63) - 1) / log(10)) + 1
 177   snprintf(buf, sizeof(buf), "%llu", t);
 178   return std::string(buf);
 179 }
 180 #endif // defined(FLATBUFFERS_CPP98_STL)
 181
 182 // Special versions for floats/doubles.
 183 template <typename T> std::string FloatToString(T t, int precision)
 184 {
 185   // clang-format off
 186
 187   #ifndef FLATBUFFERS_PREFER_PRINTF
 188     // to_string() prints different numbers of digits for floats depending on
 189     // platform and isn't available on Android, so we use stringstream
 190     std::stringstream ss;
 191     // Use std::fixed to suppress scientific notation.
 192     ss << std::fixed;
 193     // Default precision is 6, we want that to be higher for doubles.
 194     ss << std::setprecision(precision);
 195     ss << t;
 196     auto s = ss.str();
 197   #else // FLATBUFFERS_PREFER_PRINTF
 198     auto v = static_cast<double>(t);
 199     auto s = NumToStringImplWrapper(v, "%0.*f", precision);
 200   #endif // FLATBUFFERS_PREFER_PRINTF
 201   // clang-format on
 202   // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
 203   auto p = s.find_last_not_of('0');
 204   if (p != std::string::npos)
 205   {
 206     // Strip trailing zeroes. If it is a whole number, keep one zero.
 207     s.resize(p + (s[p] == '.' ? 2 : 1));
 208   }
 209   return s;
 210 }
 211
 212 template <> inline std::string NumToString<double>(double t) { return FloatToString(t, 12); }
 213 template <> inline std::string NumToString<float>(float t) { return FloatToString(t, 6); }
 214
 215 // Convert an integer value to a hexadecimal string.
 216 // The returned string length is always xdigits long, prefixed by 0 digits.
 217 // For example, IntToStringHex(0x23, 8) returns the string "00000023".
 218 inline std::string IntToStringHex(int i, int xdigits)
 219 {
 220   FLATBUFFERS_ASSERT(i >= 0);
 221   // clang-format off
 222
 223   #ifndef FLATBUFFERS_PREFER_PRINTF
 224     std::stringstream ss;
 225     ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase
 226        << i;
 227     return ss.str();
 228   #else // FLATBUFFERS_PREFER_PRINTF
 229     return NumToStringImplWrapper(i, "%.*X", xdigits);
 230   #endif // FLATBUFFERS_PREFER_PRINTF
 231   // clang-format on
 232 }
 233
 234 // clang-format off
 235 // Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}.
 236 #if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0)
 237   class ClassicLocale {
 238     #ifdef _MSC_VER
 239       typedef _locale_t locale_type;
 240     #else
 241       typedef locale_t locale_type;  // POSIX.1-2008 locale_t type
 242     #endif
 243     ClassicLocale();
 244     ~ClassicLocale();
 245     locale_type locale_;
 246     static ClassicLocale instance_;
 247   public:
 248     static locale_type Get() { return instance_.locale_; }
 249   };
 250
 251   #ifdef _MSC_VER
 252     #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get())
 253     #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get())
 254     #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get())
 255     #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get())
 256   #else
 257     #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get())
 258     #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get())
 259     #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get())
 260     #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get())
 261   #endif
 262 #else
 263   #define __strtod_impl(s, pe) strtod(s, pe)
 264   #define __strtof_impl(s, pe) static_cast<float>(strtod(s, pe))
 265   #ifdef _MSC_VER
 266     #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b)
 267     #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b)
 268   #else
 269     #define __strtoull_impl(s, pe, b) strtoull(s, pe, b)
 270     #define __strtoll_impl(s, pe, b) strtoll(s, pe, b)
 271   #endif
 272 #endif
 273
 274 inline void strtoval_impl(int64_t *val, const char *str, char **endptr,
 275                                  int base) {
 276     *val = __strtoll_impl(str, endptr, base);
 277 }
 278
 279 inline void strtoval_impl(uint64_t *val, const char *str, char **endptr,
 280                                  int base) {
 281   *val = __strtoull_impl(str, endptr, base);
 282 }
 283
 284 inline void strtoval_impl(double *val, const char *str, char **endptr) {
 285   *val = __strtod_impl(str, endptr);
 286 }
 287
 288 // UBSAN: double to float is safe if numeric_limits<float>::is_iec559 is true.
 289 __supress_ubsan__("float-cast-overflow")
 290 inline void strtoval_impl(float *val, const char *str, char **endptr) {
 291   *val = __strtof_impl(str, endptr);
 292 }
 293 #undef __strtoull_impl
 294 #undef __strtoll_impl
 295 #undef __strtod_impl
 296 #undef __strtof_impl
 297 // clang-format on
 298
 299 // Adaptor for strtoull()/strtoll().
 300 // Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
 301 // while strtoll with base=0 interprets first leading zero as octal prefix.
 302 // In future, it is possible to add prefixed 0b0101.
 303 // 1) Checks errno code for overflow condition (out of range).
 304 // 2) If base <= 0, function try to detect base of number by prefix.
 305 //
 306 // Return value (like strtoull and strtoll, but reject partial result):
 307 // - If successful, an integer value corresponding to the str is returned.
 308 // - If full string conversion can't be performed, 0 is returned.
 309 // - If the converted value falls out of range of corresponding return type, a
 310 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
 311 template <typename T>
 312 inline bool StringToIntegerImpl(T *val, const char *const str, const int base = 0,
 313                                 const bool check_errno = true)
 314 {
 315   // T is int64_t or uint64_T
 316   FLATBUFFERS_ASSERT(str);
 317   if (base <= 0)
 318   {
 319     auto s = str;
 320     while (*s && !is_digit(*s))
 321       s++;
 322     if (s[0] == '0' && is_alpha_char(s[1], 'X'))
 323       return StringToIntegerImpl(val, str, 16, check_errno);
 324     // if a prefix not match, try base=10
 325     return StringToIntegerImpl(val, str, 10, check_errno);
 326   }
 327   else
 328   {
 329     if (check_errno)
 330       errno = 0; // clear thread-local errno
 331     auto endptr = str;
 332     strtoval_impl(val, str, const_cast<char **>(&endptr), base);
 333     if ((*endptr != '\0') || (endptr == str))
 334     {
 335       *val = 0;     // erase partial result
 336       return false; // invalid string
 337     }
 338     // errno is out-of-range, return MAX/MIN
 339     if (check_errno && errno)
 340       return false;
 341     return true;
 342   }
 343 }
 344
 345 template <typename T> inline bool StringToFloatImpl(T *val, const char *const str)
 346 {
 347   // Type T must be either float or double.
 348   FLATBUFFERS_ASSERT(str && val);
 349   auto end = str;
 350   strtoval_impl(val, str, const_cast<char **>(&end));
 351   auto done = (end != str) && (*end == '\0');
 352   if (!done)
 353     *val = 0; // erase partial result
 354   return done;
 355 }
 356
 357 // Convert a string to an instance of T.
 358 // Return value (matched with StringToInteger64Impl and strtod):
 359 // - If successful, a numeric value corresponding to the str is returned.
 360 // - If full string conversion can't be performed, 0 is returned.
 361 // - If the converted value falls out of range of corresponding return type, a
 362 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
 363 template <typename T> inline bool StringToNumber(const char *s, T *val)
 364 {
 365   // Assert on `unsigned long` and `signed long` on LP64.
 366   // If it is necessary, it could be solved with flatbuffers::enable_if<B,T>.
 367   static_assert(sizeof(T) < sizeof(int64_t), "unexpected type T");
 368   FLATBUFFERS_ASSERT(s && val);
 369   int64_t i64;
 370   // The errno check isn't needed, will return MAX/MIN on overflow.
 371   if (StringToIntegerImpl(&i64, s, 0, false))
 372   {
 373     const int64_t max = (flatbuffers::numeric_limits<T>::max)();
 374     const int64_t min = flatbuffers::numeric_limits<T>::lowest();
 375     if (i64 > max)
 376     {
 377       *val = static_cast<T>(max);
 378       return false;
 379     }
 380     if (i64 < min)
 381     {
 382       // For unsigned types return max to distinguish from
 383       // "no conversion can be performed" when 0 is returned.
 384       *val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
 385       return false;
 386     }
 387     *val = static_cast<T>(i64);
 388     return true;
 389   }
 390   *val = 0;
 391   return false;
 392 }
 393
 394 template <> inline bool StringToNumber<int64_t>(const char *str, int64_t *val)
 395 {
 396   return StringToIntegerImpl(val, str);
 397 }
 398
 399 template <> inline bool StringToNumber<uint64_t>(const char *str, uint64_t *val)
 400 {
 401   if (!StringToIntegerImpl(val, str))
 402     return false;
 403   // The strtoull accepts negative numbers:
 404   // If the minus sign was part of the input sequence, the numeric value
 405   // calculated from the sequence of digits is negated as if by unary minus
 406   // in the result type, which applies unsigned integer wraparound rules.
 407   // Fix this behaviour (except -0).
 408   if (*val)
 409   {
 410     auto s = str;
 411     while (*s && !is_digit(*s))
 412       s++;
 413     s = (s > str) ? (s - 1) : s; // step back to one symbol
 414     if (*s == '-')
 415     {
 416       // For unsigned types return the max to distinguish from
 417       // "no conversion can be performed".
 418       *val = (flatbuffers::numeric_limits<uint64_t>::max)();
 419       return false;
 420     }
 421   }
 422   return true;
 423 }
 424
 425 template <> inline bool StringToNumber(const char *s, float *val)
 426 {
 427   return StringToFloatImpl(val, s);
 428 }
 429
 430 template <> inline bool StringToNumber(const char *s, double *val)
 431 {
 432   return StringToFloatImpl(val, s);
 433 }
 434
 435 inline int64_t StringToInt(const char *s, int base = 10)
 436 {
 437   int64_t val;
 438   return StringToIntegerImpl(&val, s, base) ? val : 0;
 439 }
 440
 441 inline uint64_t StringToUInt(const char *s, int base = 10)
 442 {
 443   uint64_t val;
 444   return StringToIntegerImpl(&val, s, base) ? val : 0;
 445 }
 446
 447 typedef bool (*LoadFileFunction)(const char *filename, bool binary, std::string *dest);
 448 typedef bool (*FileExistsFunction)(const char *filename);
 449
 450 LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);
 451
 452 FileExistsFunction SetFileExistsFunction(FileExistsFunction file_exists_function);
 453
 454 // Check if file "name" exists.
 455 bool FileExists(const char *name);
 456
 457 // Check if "name" exists and it is also a directory.
 458 bool DirExists(const char *name);
 459
 460 // Load file "name" into "buf" returning true if successful
 461 // false otherwise.  If "binary" is false data is read
 462 // using ifstream's text mode, otherwise data is read with
 463 // no transcoding.
 464 bool LoadFile(const char *name, bool binary, std::string *buf);
 465
 466 // Save data "buf" of length "len" bytes into a file
 467 // "name" returning true if successful, false otherwise.
 468 // If "binary" is false data is written using ifstream's
 469 // text mode, otherwise data is written with no
 470 // transcoding.
 471 bool SaveFile(const char *name, const char *buf, size_t len, bool binary);
 472
 473 // Save data "buf" into file "name" returning true if
 474 // successful, false otherwise.  If "binary" is false
 475 // data is written using ifstream's text mode, otherwise
 476 // data is written with no transcoding.
 477 inline bool SaveFile(const char *name, const std::string &buf, bool binary)
 478 {
 479   return SaveFile(name, buf.c_str(), buf.size(), binary);
 480 }
 481
 482 // Functionality for minimalistic portable path handling.
 483
 484 // The functions below behave correctly regardless of whether posix ('/') or
 485 // Windows ('/' or '\\') separators are used.
 486
 487 // Any new separators inserted are always posix.
 488 FLATBUFFERS_CONSTEXPR char kPathSeparator = '/';
 489
 490 // Returns the path with the extension, if any, removed.
 491 std::string StripExtension(const std::string &filepath);
 492
 493 // Returns the extension, if any.
 494 std::string GetExtension(const std::string &filepath);
 495
 496 // Return the last component of the path, after the last separator.
 497 std::string StripPath(const std::string &filepath);
 498
 499 // Strip the last component of the path + separator.
 500 std::string StripFileName(const std::string &filepath);
 501
 502 // Concatenates a path with a filename, regardless of whether the path
 503 // ends in a separator or not.
 504 std::string ConCatPathFileName(const std::string &path, const std::string &filename);
 505
 506 // Replaces any '\\' separators with '/'
 507 std::string PosixPath(const char *path);
 508
 509 // This function ensure a directory exists, by recursively
 510 // creating dirs for any parts of the path that don't exist yet.
 511 void EnsureDirExists(const std::string &filepath);
 512
 513 // Obtains the absolute path from any other path.
 514 // Returns the input path if the absolute path couldn't be resolved.
 515 std::string AbsolutePath(const std::string &filepath);
 516
 517 // To and from UTF-8 unicode conversion functions
 518
 519 // Convert a unicode code point into a UTF-8 representation by appending it
 520 // to a string. Returns the number of bytes generated.
 521 inline int ToUTF8(uint32_t ucc, std::string *out)
 522 {
 523   FLATBUFFERS_ASSERT(!(ucc & 0x80000000)); // Top bit can't be set.
 524   // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
 525   for (int i = 0; i < 6; i++)
 526   {
 527     // Max bits this encoding can represent.
 528     uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
 529     if (ucc < (1u << max_bits))
 530     { // does it fit?
 531       // Remaining bits not encoded in the first byte, store 6 bits each
 532       uint32_t remain_bits = i * 6;
 533       // Store first byte:
 534       (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) | (ucc >> remain_bits));
 535       // Store remaining bytes:
 536       for (int j = i - 1; j >= 0; j--)
 537       {
 538         (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
 539       }
 540       return i + 1; // Return the number of bytes added.
 541     }
 542   }
 543   FLATBUFFERS_ASSERT(0); // Impossible to arrive here.
 544   return -1;
 545 }
 546
 547 // Converts whatever prefix of the incoming string corresponds to a valid
 548 // UTF-8 sequence into a unicode code. The incoming pointer will have been
 549 // advanced past all bytes parsed.
 550 // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
 551 // this case).
 552 inline int FromUTF8(const char **in)
 553 {
 554   int len = 0;
 555   // Count leading 1 bits.
 556   for (int mask = 0x80; mask >= 0x04; mask >>= 1)
 557   {
 558     if (**in & mask)
 559     {
 560       len++;
 561     }
 562     else
 563     {
 564       break;
 565     }
 566   }
 567   if ((static_cast<unsigned char>(**in) << len) & 0x80)
 568     return -1; // Bit after leading 1's must be 0.
 569   if (!len)
 570     return *(*in)++;
 571   // UTF-8 encoded values with a length are between 2 and 4 bytes.
 572   if (len < 2 || len > 4)
 573   {
 574     return -1;
 575   }
 576   // Grab initial bits of the code.
 577   int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
 578   for (int i = 0; i < len - 1; i++)
 579   {
 580     if ((**in & 0xC0) != 0x80)
 581       return -1; // Upper bits must 1 0.
 582     ucc <<= 6;
 583     ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
 584   }
 585   // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
 586   // UTF-16 surrogate pairs).
 587   if (ucc >= 0xD800 && ucc <= 0xDFFF)
 588   {
 589     return -1;
 590   }
 591   // UTF-8 must represent code points in their shortest possible encoding.
 592   switch (len)
 593   {
 594     case 2:
 595       // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
 596       if (ucc < 0x0080 || ucc > 0x07FF)
 597       {
 598         return -1;
 599       }
 600       break;
 601     case 3:
 602       // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
 603       if (ucc < 0x0800 || ucc > 0xFFFF)
 604       {
 605         return -1;
 606       }
 607       break;
 608     case 4:
 609       // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
 610       if (ucc < 0x10000 || ucc > 0x10FFFF)
 611       {
 612         return -1;
 613       }
 614       break;
 615   }
 616   return ucc;
 617 }
 618
 619 #ifndef FLATBUFFERS_PREFER_PRINTF
 620 // Wraps a string to a maximum length, inserting new lines where necessary. Any
 621 // existing whitespace will be collapsed down to a single space. A prefix or
 622 // suffix can be provided, which will be inserted before or after a wrapped
 623 // line, respectively.
 624 inline std::string WordWrap(const std::string in, size_t max_length,
 625                             const std::string wrapped_line_prefix,
 626                             const std::string wrapped_line_suffix)
 627 {
 628   std::istringstream in_stream(in);
 629   std::string wrapped, line, word;
 630
 631   in_stream >> word;
 632   line = word;
 633
 634   while (in_stream >> word)
 635   {
 636     if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) < max_length)
 637     {
 638       line += " " + word;
 639     }
 640     else
 641     {
 642       wrapped += line + wrapped_line_suffix + "\n";
 643       line = wrapped_line_prefix + word;
 644     }
 645   }
 646   wrapped += line;
 647
 648   return wrapped;
 649 }
 650 #endif // !FLATBUFFERS_PREFER_PRINTF
 651
 652 inline bool EscapeString(const char *s, size_t length, std::string *_text, bool allow_non_utf8,
 653                          bool natural_utf8)
 654 {
 655   std::string &text = *_text;
 656   text += "\"";
 657   for (uoffset_t i = 0; i < length; i++)
 658   {
 659     char c = s[i];
 660     switch (c)
 661     {
 662       case '\n':
 663         text += "\\n";
 664         break;
 665       case '\t':
 666         text += "\\t";
 667         break;
 668       case '\r':
 669         text += "\\r";
 670         break;
 671       case '\b':
 672         text += "\\b";
 673         break;
 674       case '\f':
 675         text += "\\f";
 676         break;
 677       case '\"':
 678         text += "\\\"";
 679         break;
 680       case '\\':
 681         text += "\\\\";
 682         break;
 683       default:
 684         if (c >= ' ' && c <= '~')
 685         {
 686           text += c;
 687         }
 688         else
 689         {
 690           // Not printable ASCII data. Let's see if it's valid UTF-8 first:
 691           const char *utf8 = s + i;
 692           int ucc = FromUTF8(&utf8);
 693           if (ucc < 0)
 694           {
 695             if (allow_non_utf8)
 696             {
 697               text += "\\x";
 698               text += IntToStringHex(static_cast<uint8_t>(c), 2);
 699             }
 700             else
 701             {
 702               // There are two cases here:
 703               //
 704               // 1) We reached here by parsing an IDL file. In that case,
 705               // we previously checked for non-UTF-8, so we shouldn't reach
 706               // here.
 707               //
 708               // 2) We reached here by someone calling GenerateText()
 709               // on a previously-serialized flatbuffer. The data might have
 710               // non-UTF-8 Strings, or might be corrupt.
 711               //
 712               // In both cases, we have to give up and inform the caller
 713               // they have no JSON.
 714               return false;
 715             }
 716           }
 717           else
 718           {
 719             if (natural_utf8)
 720             {
 721               // utf8 points to past all utf-8 bytes parsed
 722               text.append(s + i, static_cast<size_t>(utf8 - s - i));
 723             }
 724             else if (ucc <= 0xFFFF)
 725             {
 726               // Parses as Unicode within JSON's \uXXXX range, so use that.
 727               text += "\\u";
 728               text += IntToStringHex(ucc, 4);
 729             }
 730             else if (ucc <= 0x10FFFF)
 731             {
 732               // Encode Unicode SMP values to a surrogate pair using two \u
 733               // escapes.
 734               uint32_t base = ucc - 0x10000;
 735               auto high_surrogate = (base >> 10) + 0xD800;
 736               auto low_surrogate = (base & 0x03FF) + 0xDC00;
 737               text += "\\u";
 738               text += IntToStringHex(high_surrogate, 4);
 739               text += "\\u";
 740               text += IntToStringHex(low_surrogate, 4);
 741             }
 742             // Skip past characters recognized.
 743             i = static_cast<uoffset_t>(utf8 - s - 1);
 744           }
 745         }
 746         break;
 747     }
 748   }
 749   text += "\"";
 750   return true;
 751 }
 752
 753 inline std::string BufferToHexText(const void *buffer, size_t buffer_size, size_t max_length,
 754                                    const std::string &wrapped_line_prefix,
 755                                    const std::string &wrapped_line_suffix)
 756 {
 757   std::string text = wrapped_line_prefix;
 758   size_t start_offset = 0;
 759   const char *s = reinterpret_cast<const char *>(buffer);
 760   for (size_t i = 0; s && i < buffer_size; i++)
 761   {
 762     // Last iteration or do we have more?
 763     bool have_more = i + 1 < buffer_size;
 764     text += "0x";
 765     text += IntToStringHex(static_cast<uint8_t>(s[i]), 2);
 766     if (have_more)
 767     {
 768       text += ',';
 769     }
 770     // If we have more to process and we reached max_length
 771     if (have_more && text.size() + wrapped_line_suffix.size() >= start_offset + max_length)
 772     {
 773       text += wrapped_line_suffix;
 774       text += '\n';
 775       start_offset = text.size();
 776       text += wrapped_line_prefix;
 777     }
 778   }
 779   text += wrapped_line_suffix;
 780   return text;
 781 }
 782
 783 // Remove paired quotes in a string: "text"|'text' -> text.
 784 std::string RemoveStringQuotes(const std::string &s);
 785
 786 // Change th global C-locale to locale with name <locale_name>.
 787 // Returns an actual locale name in <_value>, useful if locale_name is "" or
 788 // null.
 789 bool SetGlobalTestLocale(const char *locale_name, std::string *_value = nullptr);
 790
 791 // Read (or test) a value of environment variable.
 792 bool ReadEnvironmentVariable(const char *var_name, std::string *_value = nullptr);
 793
 794 // MSVC specific: Send all assert reports to STDOUT to prevent CI hangs.
 795 void SetupDefaultCRTReportMode();
 796
 797 } // namespace flatbuffers
 798
 799 #endif // FLATBUFFERS_UTIL_H_