2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2014 Google Inc. All rights reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef FLATBUFFERS_UTIL_H_
19 #define FLATBUFFERS_UTIL_H_
23 #include "flatbuffers/base.h"
24 #include "flatbuffers/stl_emulation.h"
26 #ifndef FLATBUFFERS_PREFER_PRINTF
28 #else // FLATBUFFERS_PREFER_PRINTF
31 #endif // FLATBUFFERS_PREFER_PRINTF
39 // @locale-independent functions for ASCII characters set.
41 // Fast checking that character lies in closed range: [a <= x <= b]
42 // using one compare (conditional branch) operator.
43 inline bool check_ascii_range(char x, char a, char b)
45 FLATBUFFERS_ASSERT(a <= b);
46 // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
47 // The x, a, b will be promoted to int and subtracted without overflow.
48 return static_cast<unsigned int>(x - a) <= static_cast<unsigned int>(b - a);
51 // Case-insensitive isalpha
52 inline bool is_alpha(char c)
54 // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
55 return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
58 // Check for uppercase alpha
59 inline bool is_alpha_upper(char c) { return check_ascii_range(c, 'A', 'Z'); }
61 // Check (case-insensitive) that `c` is equal to alpha.
62 inline bool is_alpha_char(char c, char alpha)
64 FLATBUFFERS_ASSERT(is_alpha(alpha));
65 // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
66 return ((c & 0xDF) == (alpha & 0xDF));
69 // https://en.cppreference.com/w/cpp/string/byte/isxdigit
70 // isdigit and isxdigit are the only standard narrow character classification
71 // functions that are not affected by the currently installed C locale. although
72 // some implementations (e.g. Microsoft in 1252 codepage) may classify
73 // additional single-byte characters as digits.
74 inline bool is_digit(char c) { return check_ascii_range(c, '0', '9'); }
76 inline bool is_xdigit(char c)
78 // Replace by look-up table.
79 return is_digit(c) || check_ascii_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
82 // Case-insensitive isalnum
83 inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
85 inline char CharToUpper(char c)
87 return static_cast<char>(::toupper(static_cast<unsigned char>(c)));
90 inline char CharToLower(char c)
92 return static_cast<char>(::tolower(static_cast<unsigned char>(c)));
95 // @end-locale-independent functions for ASCII character set
97 #ifdef FLATBUFFERS_PREFER_PRINTF
98 template <typename T> size_t IntToDigitCount(T t)
100 size_t digit_count = 0;
101 // Count the sign for negative numbers
104 // Count a single 0 left of the dot for fractional numbers
107 // Count digits until fractional part
108 T eps = std::numeric_limits<float>::epsilon();
109 while (t <= (-1 + eps) || (1 - eps) <= t)
117 template <typename T> size_t NumToStringWidth(T t, int precision = 0)
119 size_t string_width = IntToDigitCount(t);
120 // Count the dot for floating point numbers
122 string_width += (precision + 1);
126 template <typename T> std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0)
128 size_t string_width = NumToStringWidth(t, precision);
129 std::string s(string_width, 0x00);
130 // Allow snprintf to use std::string trailing null to detect buffer overflow
131 snprintf(const_cast<char *>(s.data()), (s.size() + 1), fmt, string_width, t);
134 #endif // FLATBUFFERS_PREFER_PRINTF
136 // Convert an integer or floating point value to a string.
137 // In contrast to std::stringstream, "char" values are
138 // converted to a string of digits, and we don't use scientific notation.
139 template <typename T> std::string NumToString(T t)
143 #ifndef FLATBUFFERS_PREFER_PRINTF
144 std::stringstream ss;
147 #else // FLATBUFFERS_PREFER_PRINTF
148 auto v = static_cast<long long>(t);
149 return NumToStringImplWrapper(v, "%.*lld");
150 #endif // FLATBUFFERS_PREFER_PRINTF
153 // Avoid char types used as character data.
154 template <> inline std::string NumToString<signed char>(signed char t)
156 return NumToString(static_cast<int>(t));
158 template <> inline std::string NumToString<unsigned char>(unsigned char t)
160 return NumToString(static_cast<int>(t));
162 template <> inline std::string NumToString<char>(char t)
164 return NumToString(static_cast<int>(t));
166 #if defined(FLATBUFFERS_CPP98_STL)
167 template <> inline std::string NumToString<long long>(long long t)
169 char buf[21]; // (log((1 << 63) - 1) / log(10)) + 2
170 snprintf(buf, sizeof(buf), "%lld", t);
171 return std::string(buf);
174 template <> inline std::string NumToString<unsigned long long>(unsigned long long t)
176 char buf[22]; // (log((1 << 63) - 1) / log(10)) + 1
177 snprintf(buf, sizeof(buf), "%llu", t);
178 return std::string(buf);
180 #endif // defined(FLATBUFFERS_CPP98_STL)
182 // Special versions for floats/doubles.
183 template <typename T> std::string FloatToString(T t, int precision)
187 #ifndef FLATBUFFERS_PREFER_PRINTF
188 // to_string() prints different numbers of digits for floats depending on
189 // platform and isn't available on Android, so we use stringstream
190 std::stringstream ss;
191 // Use std::fixed to suppress scientific notation.
193 // Default precision is 6, we want that to be higher for doubles.
194 ss << std::setprecision(precision);
197 #else // FLATBUFFERS_PREFER_PRINTF
198 auto v = static_cast<double>(t);
199 auto s = NumToStringImplWrapper(v, "%0.*f", precision);
200 #endif // FLATBUFFERS_PREFER_PRINTF
202 // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
203 auto p = s.find_last_not_of('0');
204 if (p != std::string::npos)
206 // Strip trailing zeroes. If it is a whole number, keep one zero.
207 s.resize(p + (s[p] == '.' ? 2 : 1));
212 template <> inline std::string NumToString<double>(double t) { return FloatToString(t, 12); }
213 template <> inline std::string NumToString<float>(float t) { return FloatToString(t, 6); }
215 // Convert an integer value to a hexadecimal string.
216 // The returned string length is always xdigits long, prefixed by 0 digits.
217 // For example, IntToStringHex(0x23, 8) returns the string "00000023".
218 inline std::string IntToStringHex(int i, int xdigits)
220 FLATBUFFERS_ASSERT(i >= 0);
223 #ifndef FLATBUFFERS_PREFER_PRINTF
224 std::stringstream ss;
225 ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase
228 #else // FLATBUFFERS_PREFER_PRINTF
229 return NumToStringImplWrapper(i, "%.*X", xdigits);
230 #endif // FLATBUFFERS_PREFER_PRINTF
235 // Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}.
236 #if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0)
237 class ClassicLocale {
239 typedef _locale_t locale_type;
241 typedef locale_t locale_type; // POSIX.1-2008 locale_t type
246 static ClassicLocale instance_;
248 static locale_type Get() { return instance_.locale_; }
252 #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get())
253 #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get())
254 #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get())
255 #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get())
257 #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get())
258 #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get())
259 #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get())
260 #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get())
263 #define __strtod_impl(s, pe) strtod(s, pe)
264 #define __strtof_impl(s, pe) static_cast<float>(strtod(s, pe))
266 #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b)
267 #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b)
269 #define __strtoull_impl(s, pe, b) strtoull(s, pe, b)
270 #define __strtoll_impl(s, pe, b) strtoll(s, pe, b)
274 inline void strtoval_impl(int64_t *val, const char *str, char **endptr,
276 *val = __strtoll_impl(str, endptr, base);
279 inline void strtoval_impl(uint64_t *val, const char *str, char **endptr,
281 *val = __strtoull_impl(str, endptr, base);
284 inline void strtoval_impl(double *val, const char *str, char **endptr) {
285 *val = __strtod_impl(str, endptr);
288 // UBSAN: double to float is safe if numeric_limits<float>::is_iec559 is true.
289 __supress_ubsan__("float-cast-overflow")
290 inline void strtoval_impl(float *val, const char *str, char **endptr) {
291 *val = __strtof_impl(str, endptr);
293 #undef __strtoull_impl
294 #undef __strtoll_impl
299 // Adaptor for strtoull()/strtoll().
300 // Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
301 // while strtoll with base=0 interprets first leading zero as octal prefix.
302 // In future, it is possible to add prefixed 0b0101.
303 // 1) Checks errno code for overflow condition (out of range).
304 // 2) If base <= 0, function try to detect base of number by prefix.
306 // Return value (like strtoull and strtoll, but reject partial result):
307 // - If successful, an integer value corresponding to the str is returned.
308 // - If full string conversion can't be performed, 0 is returned.
309 // - If the converted value falls out of range of corresponding return type, a
310 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
311 template <typename T>
312 inline bool StringToIntegerImpl(T *val, const char *const str, const int base = 0,
313 const bool check_errno = true)
315 // T is int64_t or uint64_T
316 FLATBUFFERS_ASSERT(str);
320 while (*s && !is_digit(*s))
322 if (s[0] == '0' && is_alpha_char(s[1], 'X'))
323 return StringToIntegerImpl(val, str, 16, check_errno);
324 // if a prefix not match, try base=10
325 return StringToIntegerImpl(val, str, 10, check_errno);
330 errno = 0; // clear thread-local errno
332 strtoval_impl(val, str, const_cast<char **>(&endptr), base);
333 if ((*endptr != '\0') || (endptr == str))
335 *val = 0; // erase partial result
336 return false; // invalid string
338 // errno is out-of-range, return MAX/MIN
339 if (check_errno && errno)
345 template <typename T> inline bool StringToFloatImpl(T *val, const char *const str)
347 // Type T must be either float or double.
348 FLATBUFFERS_ASSERT(str && val);
350 strtoval_impl(val, str, const_cast<char **>(&end));
351 auto done = (end != str) && (*end == '\0');
353 *val = 0; // erase partial result
357 // Convert a string to an instance of T.
358 // Return value (matched with StringToInteger64Impl and strtod):
359 // - If successful, a numeric value corresponding to the str is returned.
360 // - If full string conversion can't be performed, 0 is returned.
361 // - If the converted value falls out of range of corresponding return type, a
362 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
363 template <typename T> inline bool StringToNumber(const char *s, T *val)
365 // Assert on `unsigned long` and `signed long` on LP64.
366 // If it is necessary, it could be solved with flatbuffers::enable_if<B,T>.
367 static_assert(sizeof(T) < sizeof(int64_t), "unexpected type T");
368 FLATBUFFERS_ASSERT(s && val);
370 // The errno check isn't needed, will return MAX/MIN on overflow.
371 if (StringToIntegerImpl(&i64, s, 0, false))
373 const int64_t max = (flatbuffers::numeric_limits<T>::max)();
374 const int64_t min = flatbuffers::numeric_limits<T>::lowest();
377 *val = static_cast<T>(max);
382 // For unsigned types return max to distinguish from
383 // "no conversion can be performed" when 0 is returned.
384 *val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
387 *val = static_cast<T>(i64);
394 template <> inline bool StringToNumber<int64_t>(const char *str, int64_t *val)
396 return StringToIntegerImpl(val, str);
399 template <> inline bool StringToNumber<uint64_t>(const char *str, uint64_t *val)
401 if (!StringToIntegerImpl(val, str))
403 // The strtoull accepts negative numbers:
404 // If the minus sign was part of the input sequence, the numeric value
405 // calculated from the sequence of digits is negated as if by unary minus
406 // in the result type, which applies unsigned integer wraparound rules.
407 // Fix this behaviour (except -0).
411 while (*s && !is_digit(*s))
413 s = (s > str) ? (s - 1) : s; // step back to one symbol
416 // For unsigned types return the max to distinguish from
417 // "no conversion can be performed".
418 *val = (flatbuffers::numeric_limits<uint64_t>::max)();
425 template <> inline bool StringToNumber(const char *s, float *val)
427 return StringToFloatImpl(val, s);
430 template <> inline bool StringToNumber(const char *s, double *val)
432 return StringToFloatImpl(val, s);
435 inline int64_t StringToInt(const char *s, int base = 10)
438 return StringToIntegerImpl(&val, s, base) ? val : 0;
441 inline uint64_t StringToUInt(const char *s, int base = 10)
444 return StringToIntegerImpl(&val, s, base) ? val : 0;
447 typedef bool (*LoadFileFunction)(const char *filename, bool binary, std::string *dest);
448 typedef bool (*FileExistsFunction)(const char *filename);
450 LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);
452 FileExistsFunction SetFileExistsFunction(FileExistsFunction file_exists_function);
454 // Check if file "name" exists.
455 bool FileExists(const char *name);
457 // Check if "name" exists and it is also a directory.
458 bool DirExists(const char *name);
460 // Load file "name" into "buf" returning true if successful
461 // false otherwise. If "binary" is false data is read
462 // using ifstream's text mode, otherwise data is read with
464 bool LoadFile(const char *name, bool binary, std::string *buf);
466 // Save data "buf" of length "len" bytes into a file
467 // "name" returning true if successful, false otherwise.
468 // If "binary" is false data is written using ifstream's
469 // text mode, otherwise data is written with no
471 bool SaveFile(const char *name, const char *buf, size_t len, bool binary);
473 // Save data "buf" into file "name" returning true if
474 // successful, false otherwise. If "binary" is false
475 // data is written using ifstream's text mode, otherwise
476 // data is written with no transcoding.
477 inline bool SaveFile(const char *name, const std::string &buf, bool binary)
479 return SaveFile(name, buf.c_str(), buf.size(), binary);
482 // Functionality for minimalistic portable path handling.
484 // The functions below behave correctly regardless of whether posix ('/') or
485 // Windows ('/' or '\\') separators are used.
487 // Any new separators inserted are always posix.
488 FLATBUFFERS_CONSTEXPR char kPathSeparator = '/';
490 // Returns the path with the extension, if any, removed.
491 std::string StripExtension(const std::string &filepath);
493 // Returns the extension, if any.
494 std::string GetExtension(const std::string &filepath);
496 // Return the last component of the path, after the last separator.
497 std::string StripPath(const std::string &filepath);
499 // Strip the last component of the path + separator.
500 std::string StripFileName(const std::string &filepath);
502 // Concatenates a path with a filename, regardless of whether the path
503 // ends in a separator or not.
504 std::string ConCatPathFileName(const std::string &path, const std::string &filename);
506 // Replaces any '\\' separators with '/'
507 std::string PosixPath(const char *path);
509 // This function ensure a directory exists, by recursively
510 // creating dirs for any parts of the path that don't exist yet.
511 void EnsureDirExists(const std::string &filepath);
513 // Obtains the absolute path from any other path.
514 // Returns the input path if the absolute path couldn't be resolved.
515 std::string AbsolutePath(const std::string &filepath);
517 // To and from UTF-8 unicode conversion functions
519 // Convert a unicode code point into a UTF-8 representation by appending it
520 // to a string. Returns the number of bytes generated.
521 inline int ToUTF8(uint32_t ucc, std::string *out)
523 FLATBUFFERS_ASSERT(!(ucc & 0x80000000)); // Top bit can't be set.
524 // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
525 for (int i = 0; i < 6; i++)
527 // Max bits this encoding can represent.
528 uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
529 if (ucc < (1u << max_bits))
531 // Remaining bits not encoded in the first byte, store 6 bits each
532 uint32_t remain_bits = i * 6;
534 (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) | (ucc >> remain_bits));
535 // Store remaining bytes:
536 for (int j = i - 1; j >= 0; j--)
538 (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
540 return i + 1; // Return the number of bytes added.
543 FLATBUFFERS_ASSERT(0); // Impossible to arrive here.
547 // Converts whatever prefix of the incoming string corresponds to a valid
548 // UTF-8 sequence into a unicode code. The incoming pointer will have been
549 // advanced past all bytes parsed.
550 // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
552 inline int FromUTF8(const char **in)
555 // Count leading 1 bits.
556 for (int mask = 0x80; mask >= 0x04; mask >>= 1)
567 if ((static_cast<unsigned char>(**in) << len) & 0x80)
568 return -1; // Bit after leading 1's must be 0.
571 // UTF-8 encoded values with a length are between 2 and 4 bytes.
572 if (len < 2 || len > 4)
576 // Grab initial bits of the code.
577 int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
578 for (int i = 0; i < len - 1; i++)
580 if ((**in & 0xC0) != 0x80)
581 return -1; // Upper bits must 1 0.
583 ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
585 // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
586 // UTF-16 surrogate pairs).
587 if (ucc >= 0xD800 && ucc <= 0xDFFF)
591 // UTF-8 must represent code points in their shortest possible encoding.
595 // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
596 if (ucc < 0x0080 || ucc > 0x07FF)
602 // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
603 if (ucc < 0x0800 || ucc > 0xFFFF)
609 // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
610 if (ucc < 0x10000 || ucc > 0x10FFFF)
619 #ifndef FLATBUFFERS_PREFER_PRINTF
620 // Wraps a string to a maximum length, inserting new lines where necessary. Any
621 // existing whitespace will be collapsed down to a single space. A prefix or
622 // suffix can be provided, which will be inserted before or after a wrapped
623 // line, respectively.
624 inline std::string WordWrap(const std::string in, size_t max_length,
625 const std::string wrapped_line_prefix,
626 const std::string wrapped_line_suffix)
628 std::istringstream in_stream(in);
629 std::string wrapped, line, word;
634 while (in_stream >> word)
636 if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) < max_length)
642 wrapped += line + wrapped_line_suffix + "\n";
643 line = wrapped_line_prefix + word;
650 #endif // !FLATBUFFERS_PREFER_PRINTF
652 inline bool EscapeString(const char *s, size_t length, std::string *_text, bool allow_non_utf8,
655 std::string &text = *_text;
657 for (uoffset_t i = 0; i < length; i++)
684 if (c >= ' ' && c <= '~')
690 // Not printable ASCII data. Let's see if it's valid UTF-8 first:
691 const char *utf8 = s + i;
692 int ucc = FromUTF8(&utf8);
698 text += IntToStringHex(static_cast<uint8_t>(c), 2);
702 // There are two cases here:
704 // 1) We reached here by parsing an IDL file. In that case,
705 // we previously checked for non-UTF-8, so we shouldn't reach
708 // 2) We reached here by someone calling GenerateText()
709 // on a previously-serialized flatbuffer. The data might have
710 // non-UTF-8 Strings, or might be corrupt.
712 // In both cases, we have to give up and inform the caller
713 // they have no JSON.
721 // utf8 points to past all utf-8 bytes parsed
722 text.append(s + i, static_cast<size_t>(utf8 - s - i));
724 else if (ucc <= 0xFFFF)
726 // Parses as Unicode within JSON's \uXXXX range, so use that.
728 text += IntToStringHex(ucc, 4);
730 else if (ucc <= 0x10FFFF)
732 // Encode Unicode SMP values to a surrogate pair using two \u
734 uint32_t base = ucc - 0x10000;
735 auto high_surrogate = (base >> 10) + 0xD800;
736 auto low_surrogate = (base & 0x03FF) + 0xDC00;
738 text += IntToStringHex(high_surrogate, 4);
740 text += IntToStringHex(low_surrogate, 4);
742 // Skip past characters recognized.
743 i = static_cast<uoffset_t>(utf8 - s - 1);
753 inline std::string BufferToHexText(const void *buffer, size_t buffer_size, size_t max_length,
754 const std::string &wrapped_line_prefix,
755 const std::string &wrapped_line_suffix)
757 std::string text = wrapped_line_prefix;
758 size_t start_offset = 0;
759 const char *s = reinterpret_cast<const char *>(buffer);
760 for (size_t i = 0; s && i < buffer_size; i++)
762 // Last iteration or do we have more?
763 bool have_more = i + 1 < buffer_size;
765 text += IntToStringHex(static_cast<uint8_t>(s[i]), 2);
770 // If we have more to process and we reached max_length
771 if (have_more && text.size() + wrapped_line_suffix.size() >= start_offset + max_length)
773 text += wrapped_line_suffix;
775 start_offset = text.size();
776 text += wrapped_line_prefix;
779 text += wrapped_line_suffix;
783 // Remove paired quotes in a string: "text"|'text' -> text.
784 std::string RemoveStringQuotes(const std::string &s);
786 // Change th global C-locale to locale with name <locale_name>.
787 // Returns an actual locale name in <_value>, useful if locale_name is "" or
789 bool SetGlobalTestLocale(const char *locale_name, std::string *_value = nullptr);
791 // Read (or test) a value of environment variable.
792 bool ReadEnvironmentVariable(const char *var_name, std::string *_value = nullptr);
794 // MSVC specific: Send all assert reports to STDOUT to prevent CI hangs.
795 void SetupDefaultCRTReportMode();
797 } // namespace flatbuffers
799 #endif // FLATBUFFERS_UTIL_H_