1 /****************************************************************************
3 ** Copyright (C) 2015 Intel Corporation
5 ** Permission is hereby granted, free of charge, to any person obtaining a copy
6 ** of this software and associated documentation files (the "Software"), to deal
7 ** in the Software without restriction, including without limitation the rights
8 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 ** copies of the Software, and to permit persons to whom the Software is
10 ** furnished to do so, subject to the following conditions:
12 ** The above copyright notice and this permission notice shall be included in
13 ** all copies or substantial portions of the Software.
15 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 ****************************************************************************/
27 #include "cborconstants_p.h"
28 #include "compilersupport_p.h"
29 #include "extract_number_p.h"
35 #include "assert_p.h" /* Always include last */
37 #ifndef CBOR_PARSER_MAX_RECURSIONS
38 # define CBOR_PARSER_MAX_RECURSIONS 1024
43 * This type contains one value parsed from the CBOR stream.
45 * To get the actual type, use cbor_value_get_type(). Then extract the value
46 * using one of the corresponding functions: cbor_value_get_boolean(), cbor_value_get_int64(),
47 * cbor_value_get_int(), cbor_value_copy_string(), cbor_value_get_array(), cbor_value_get_map(),
48 * cbor_value_get_double(), cbor_value_get_float().
50 * In C++ and C11 modes, you can additionally use the cbor_value_get_integer()
51 * and cbor_value_get_floating_point() generic functions.
54 * Implementation details: the CborValue contains these fields:
56 * \li ptr: pointer to the actual data
57 * \li flags: flags from the decoder
58 * \li extra: partially decoded integer value (0, 1 or 2 bytes)
59 * \li remaining: remaining items in this collection after this item or UINT32_MAX if length is unknown
64 static CborError extract_length(const CborParser *parser, const uint8_t **ptr, size_t *len)
67 CborError err = extract_number(ptr, parser->end, &v);
73 return CborErrorDataTooLarge;
77 static bool is_fixed_type(uint8_t type)
79 return type != CborTextStringType && type != CborByteStringType && type != CborArrayType &&
83 static CborError preparse_value(CborValue *it)
85 const CborParser *parser = it->parser;
86 it->type = CborInvalidType;
89 if (it->ptr == parser->end)
90 return CborErrorUnexpectedEOF;
92 uint8_t descriptor = *it->ptr;
93 uint8_t type = descriptor & MajorTypeMask;
96 it->extra = (descriptor &= SmallValueMask);
98 if (descriptor > Value64Bit) {
99 if (unlikely(descriptor != IndefiniteLength))
100 return type == CborSimpleType ? CborErrorUnknownType : CborErrorIllegalNumber;
101 if (likely(!is_fixed_type(type))) {
103 it->flags |= CborIteratorFlag_UnknownLength;
107 return type == CborSimpleType ? CborErrorUnexpectedBreak : CborErrorIllegalNumber;
110 size_t bytesNeeded = descriptor < Value8Bit ? 0 : (1 << (descriptor - Value8Bit));
111 if (bytesNeeded + 1 > (size_t)(parser->end - it->ptr))
112 return CborErrorUnexpectedEOF;
114 uint8_t majortype = type >> MajorTypeShift;
115 if (majortype == NegativeIntegerType) {
116 it->flags |= CborIteratorFlag_NegativeInteger;
117 it->type = CborIntegerType;
118 } else if (majortype == SimpleTypesType) {
119 switch (descriptor) {
122 it->type = CborBooleanType;
125 case SinglePrecisionFloat:
126 case DoublePrecisionFloat:
127 it->flags |= CborIteratorFlag_IntegerValueTooLarge;
132 case HalfPrecisionFloat:
136 case SimpleTypeInNextByte:
137 it->extra = (uint8_t)it->ptr[1];
138 #ifndef CBOR_PARSER_NO_STRICT_CHECKS
139 if (unlikely(it->extra < 32)) {
140 it->type = CborInvalidType;
141 return CborErrorIllegalSimpleType;
150 assert(false); // these conditions can't be reached
151 return CborErrorUnexpectedBreak;
156 // try to decode up to 16 bits
157 if (descriptor < Value8Bit)
160 if (descriptor == Value8Bit)
161 it->extra = (uint8_t)it->ptr[1];
162 else if (descriptor == Value16Bit)
163 it->extra = get16(it->ptr + 1);
165 it->flags |= CborIteratorFlag_IntegerValueTooLarge; // Value32Bit or Value64Bit
169 static CborError preparse_next_value(CborValue *it)
171 if (it->remaining != UINT32_MAX) {
172 // don't decrement the item count if the current item is tag: they don't count
173 if (it->type != CborTagType && !--it->remaining) {
174 it->type = CborInvalidType;
177 } else if (it->remaining == UINT32_MAX && it->ptr != it->parser->end && *it->ptr == (uint8_t)BreakByte) {
178 // end of map or array
180 it->type = CborInvalidType;
185 return preparse_value(it);
188 static CborError advance_internal(CborValue *it)
191 CborError err = extract_number(&it->ptr, it->parser->end, &length);
192 assert(err == CborNoError);
194 if (it->type == CborByteStringType || it->type == CborTextStringType) {
195 assert(length == (size_t)length);
196 assert((it->flags & CborIteratorFlag_UnknownLength) == 0);
200 return preparse_next_value(it);
205 * Decodes the CBOR integer value when it is larger than the 16 bits available
206 * in value->extra. This function requires that value->flags have the
207 * CborIteratorFlag_IntegerValueTooLarge flag set.
209 * This function is also used to extract single- and double-precision floating
210 * point values (SinglePrecisionFloat == Value32Bit and DoublePrecisionFloat ==
213 uint64_t _cbor_value_decode_int64_internal(const CborValue *value)
215 assert(value->flags & CborIteratorFlag_IntegerValueTooLarge ||
216 value->type == CborFloatType || value->type == CborDoubleType);
218 // since the additional information can only be Value32Bit or Value64Bit,
219 // we just need to test for the one bit those two options differ
220 assert((*value->ptr & SmallValueMask) == Value32Bit || (*value->ptr & SmallValueMask) == Value64Bit);
221 if ((*value->ptr & 1) == (Value32Bit & 1))
222 return get32(value->ptr + 1);
224 assert((*value->ptr & SmallValueMask) == Value64Bit);
225 return get64(value->ptr + 1);
229 * Initializes the CBOR parser for parsing \a size bytes beginning at \a
230 * buffer. Parsing will use flags set in \a flags. The iterator to the first
231 * element is returned in \a it.
233 * The \a parser structure needs to remain valid throughout the decoding
234 * process. It is not thread-safe to share one CborParser among multiple
235 * threads iterating at the same time, but the object can be copied so multiple
236 * threads can iterate.
238 * ### Write how to determine the end pointer
239 * ### Write how to do limited-buffer windowed decoding
241 CborError cbor_parser_init(const uint8_t *buffer, size_t size, int flags, CborParser *parser, CborValue *it)
243 memset(parser, 0, sizeof(*parser));
244 parser->end = buffer + size;
245 parser->flags = flags;
248 it->remaining = 1; // there's one type altogether, usually an array or map
249 return preparse_value(it);
253 * Advances the CBOR value \a it by one fixed-size position. Fixed-size types
254 * are: integers, tags, simple types (including boolean, null and undefined
255 * values) and floating point types.
257 * \sa cbor_value_at_end(), cbor_value_advance(), cbor_value_begin_recurse(), cbor_value_end_recurse()
259 CborError cbor_value_advance_fixed(CborValue *it)
261 assert(it->type != CborInvalidType);
262 assert(is_fixed_type(it->type));
264 return CborErrorAdvancePastEOF;
265 return advance_internal(it);
268 static CborError advance_recursive(CborValue *it, int nestingLevel)
270 if (is_fixed_type(it->type))
271 return advance_internal(it);
273 if (!cbor_value_is_container(it)) {
274 size_t len = SIZE_MAX;
275 return _cbor_value_copy_string(it, NULL, &len, it);
279 if (nestingLevel == CBOR_PARSER_MAX_RECURSIONS)
280 return CborErrorNestingTooDeep;
284 err = cbor_value_enter_container(it, &recursed);
287 while (!cbor_value_at_end(&recursed)) {
288 err = advance_recursive(&recursed, nestingLevel + 1);
292 return cbor_value_leave_container(it, &recursed);
297 * Advances the CBOR value \a it by one element, skipping over containers.
298 * Unlike cbor_value_advance_fixed(), this function can be called on a CBOR
299 * value of any type. However, if the type is a container (map or array) or a
300 * string with a chunked payload, this function will not run in constant time
301 * and will recurse into itself (it will run on O(n) time for the number of
302 * elements or chunks and will use O(n) memory for the number of nested
305 * \sa cbor_value_at_end(), cbor_value_advance_fixed(), cbor_value_begin_recurse(), cbor_value_end_recurse()
307 CborError cbor_value_advance(CborValue *it)
309 assert(it->type != CborInvalidType);
311 return CborErrorAdvancePastEOF;
312 return advance_recursive(it, 0);
316 * Advances the CBOR value \a it until it no longer points to a tag. If \a it is
317 * already not pointing to a tag, then this function returns it unchanged.
319 * \sa cbor_value_advance_fixed(), cbor_value_advance()
321 CborError cbor_value_skip_tag(CborValue *it)
323 while (cbor_value_is_tag(it)) {
324 CborError err = cbor_value_advance_fixed(it);
332 * \fn bool cbor_value_is_container(const CborValue *it)
334 * Returns true if the \a it value is a container and requires recursion in
335 * order to decode (maps and arrays), false otherwise.
339 * Creates a CborValue iterator pointing to the first element of the container
340 * represented by \a it and saves it in \a recursed. The \a it container object
341 * needs to be kept and passed again to cbor_value_leave_container() in order
342 * to continue iterating past this container.
344 * \sa cbor_value_is_container(), cbor_value_leave_container(), cbor_value_advance()
346 CborError cbor_value_enter_container(const CborValue *it, CborValue *recursed)
349 assert(cbor_value_is_container(it));
352 if (it->flags & CborIteratorFlag_UnknownLength) {
353 recursed->remaining = UINT32_MAX;
355 err = preparse_value(recursed);
356 if (err != CborErrorUnexpectedBreak)
358 // actually, break was expected here
359 // it's just an empty container
363 err = extract_number(&recursed->ptr, recursed->parser->end, &len);
364 assert(err == CborNoError);
366 recursed->remaining = len;
367 if (recursed->remaining != len || len == UINT32_MAX) {
368 // back track the pointer to indicate where the error occurred
369 recursed->ptr = it->ptr;
370 return CborErrorDataTooLarge;
372 if (recursed->type == CborMapType) {
373 // maps have keys and values, so we need to multiply by 2
374 if (recursed->remaining > UINT32_MAX / 2) {
375 // back track the pointer to indicate where the error occurred
376 recursed->ptr = it->ptr;
377 return CborErrorDataTooLarge;
379 recursed->remaining *= 2;
382 return preparse_value(recursed);
385 // the case of the empty container
386 recursed->type = CborInvalidType;
387 recursed->remaining = 0;
392 * Updates \a it to point to the next element after the container. The \a
393 * recursed object needs to point to the element obtained either by advancing
394 * the last element of the container (via cbor_value_advance(),
395 * cbor_value_advance_fixed(), a nested cbor_value_leave_container(), or the \c
396 * next pointer from cbor_value_copy_string() or cbor_value_dup_string()).
398 * \sa cbor_value_enter_container(), cbor_value_at_end()
400 CborError cbor_value_leave_container(CborValue *it, const CborValue *recursed)
402 assert(cbor_value_is_container(it));
403 assert(recursed->type == CborInvalidType);
404 it->ptr = recursed->ptr;
405 return preparse_next_value(it);
409 * Calculates the length of the string in \a value and stores the result in \a
410 * len. This function is different from cbor_value_get_string_length() in that
411 * it calculates the length even for strings sent in chunks. For that reason,
412 * this function may not run in constant time (it will run in O(n) time on the
415 * \note On 32-bit platforms, this function will return error condition of \ref
416 * CborErrorDataTooLarge if the stream indicates a length that is too big to
419 * \sa cbor_value_get_string_length(), cbor_value_copy_string(), cbor_value_is_length_known()
421 CborError cbor_value_calculate_string_length(const CborValue *value, size_t *len)
424 return _cbor_value_copy_string(value, NULL, len, NULL);
428 * \fn CborError cbor_value_dup_text_string(const CborValue *value, char **buffer, size_t *buflen, CborValue *next)
430 * Allocates memory for the string pointed by \a value and copies it into this
431 * buffer. The pointer to the buffer is stored in \a buffer and the number of
432 * bytes copied is stored in \a len (those variables must not be NULL).
434 * If \c malloc returns a NULL pointer, this function will return error
435 * condition \ref CborErrorOutOfMemory.
437 * On success, \c{*buffer} will contain a valid pointer that must be freed by
438 * calling \c{free()}. This is the case even for zero-length strings.
440 * The \a next pointer, if not null, will be updated to point to the next item
441 * after this string. If \a value points to the last item, then \a next will be
444 * \note This function does not perform UTF-8 validation on the incoming text
447 * \sa cbor_value_copy_text_string(), cbor_value_dup_byte_string()
451 * \fn CborError cbor_value_dup_byte_string(const CborValue *value, uint8_t **buffer, size_t *buflen, CborValue *next)
453 * Allocates memory for the string pointed by \a value and copies it into this
454 * buffer. The pointer to the buffer is stored in \a buffer and the number of
455 * bytes copied is stored in \a len (those variables must not be NULL).
457 * If \c malloc returns a NULL pointer, this function will return error
458 * condition \ref CborErrorOutOfMemory.
460 * On success, \c{*buffer} will contain a valid pointer that must be freed by
461 * calling \c{free()}. This is the case even for zero-length strings.
463 * The \a next pointer, if not null, will be updated to point to the next item
464 * after this string. If \a value points to the last item, then \a next will be
467 * \sa cbor_value_copy_byte_string(), cbor_value_dup_text_string()
469 CborError _cbor_value_dup_string(const CborValue *value, void **buffer, size_t *buflen, CborValue *next)
474 CborError err = _cbor_value_copy_string(value, NULL, buflen, NULL);
479 *buffer = malloc(*buflen);
482 return CborErrorOutOfMemory;
484 err = _cbor_value_copy_string(value, *buffer, buflen, next);
492 // We return uintptr_t so that we can pass memcpy directly as the iteration
493 // function. The choice is to optimize for memcpy, which is used in the base
494 // parser API (cbor_value_copy_string), while memcmp is used in convenience API
496 typedef uintptr_t (*IterateFunction)(char *, const uint8_t *, size_t);
498 static uintptr_t iterate_noop(char *dest, const uint8_t *src, size_t len)
506 static uintptr_t iterate_memcmp(char *s1, const uint8_t *s2, size_t len)
508 return memcmp(s1, (const char *)s2, len) == 0;
511 static CborError iterate_string_chunks(const CborValue *value, char *buffer, size_t *buflen,
512 bool *result, CborValue *next, IterateFunction func)
514 assert(cbor_value_is_byte_string(value) || cbor_value_is_text_string(value));
518 const uint8_t *ptr = value->ptr;
519 if (cbor_value_is_length_known(value)) {
520 // easy case: fixed length
521 err = extract_length(value->parser, &ptr, &total);
524 if (total > (size_t)(value->parser->end - ptr))
525 return CborErrorUnexpectedEOF;
526 if (total <= *buflen)
527 *result = func(buffer, ptr, total);
540 if (ptr == value->parser->end)
541 return CborErrorUnexpectedEOF;
543 if (*ptr == (uint8_t)BreakByte) {
548 // is this the right type?
549 if ((*ptr & MajorTypeMask) != value->type)
550 return CborErrorIllegalType;
552 err = extract_length(value->parser, &ptr, &chunkLen);
556 if (unlikely(add_check_overflow(total, chunkLen, &newTotal)))
557 return CborErrorDataTooLarge;
559 if (chunkLen > (size_t)(value->parser->end - ptr))
560 return CborErrorUnexpectedEOF;
562 if (*result && *buflen >= newTotal)
563 *result = func(buffer + total, ptr, chunkLen);
572 // is there enough room for the ending NUL byte?
573 if (*result && *buflen > total)
574 *result = func(buffer + total, (const uint8_t *)"", 1);
580 return preparse_next_value(next);
586 * \fn CborError cbor_value_copy_text_string(const CborValue *value, char *buffer, size_t *buflen, CborValue *next)
588 * Copies the string pointed by \a value into the buffer provided at \a buffer
589 * of \a buflen bytes. If \a buffer is a NULL pointer, this function will not
590 * copy anything and will only update the \a next value.
592 * If the provided buffer length was too small, this function returns an error
593 * condition of \ref CborErrorOutOfMemory. If you need to calculate the length
594 * of the string in order to preallocate a buffer, use
595 * cbor_value_calculate_string_length().
597 * On success, this function sets the number of bytes copied to \c{*buflen}. If
598 * the buffer is large enough, this function will insert a null byte after the
599 * last copied byte, to facilitate manipulation of text strings. That byte is
600 * not included in the returned value of \c{*buflen}.
602 * The \a next pointer, if not null, will be updated to point to the next item
603 * after this string. If \a value points to the last item, then \a next will be
606 * \note This function does not perform UTF-8 validation on the incoming text
609 * \sa cbor_value_dup_text_string(), cbor_value_copy_byte_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
613 * \fn CborError cbor_value_copy_byte_string(const CborValue *value, uint8_t *buffer, size_t *buflen, CborValue *next)
615 * Copies the string pointed by \a value into the buffer provided at \a buffer
616 * of \a buflen bytes. If \a buffer is a NULL pointer, this function will not
617 * copy anything and will only update the \a next value.
619 * If the provided buffer length was too small, this function returns an error
620 * condition of \ref CborErrorOutOfMemory. If you need to calculate the length
621 * of the string in order to preallocate a buffer, use
622 * cbor_value_calculate_string_length().
624 * On success, this function sets the number of bytes copied to \c{*buflen}. If
625 * the buffer is large enough, this function will insert a null byte after the
626 * last copied byte, to facilitate manipulation of null-terminated strings.
627 * That byte is not included in the returned value of \c{*buflen}.
629 * The \a next pointer, if not null, will be updated to point to the next item
630 * after this string. If \a value points to the last item, then \a next will be
633 * \sa cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
636 CborError _cbor_value_copy_string(const CborValue *value, void *buffer,
637 size_t *buflen, CborValue *next)
640 CborError err = iterate_string_chunks(value, (char*)buffer, buflen, &copied_all, next,
641 buffer ? (IterateFunction)memcpy : iterate_noop);
643 copied_all ? CborNoError : CborErrorOutOfMemory;
647 * Compares the entry \a value with the string \a string and store the result
648 * in \a result. If the value is different from \a string or if it is not a
649 * text string, \a result will contain \c false.
651 * The entry at \a value may be a tagged string. If \a is not a string or a
652 * tagged string, the comparison result will be false.
654 CborError cbor_value_text_string_equals(const CborValue *value, const char *string, bool *result)
656 CborValue copy = *value;
657 CborError err = cbor_value_skip_tag(©);
660 if (!cbor_value_is_text_string(©)) {
665 size_t len = strlen(string);
666 return iterate_string_chunks(©, CONST_CAST(char *, string), &len, result, NULL, iterate_memcmp);
670 * Attempts to find the value in map \a map that corresponds to the text string
671 * entry \a string. If the item is found, it is stored in \a result. If no item
672 * is found matching the key, then \a result will contain an element of type
673 * \ref CborInvalidType.
675 * \note This function may be expensive to execute.
677 CborError cbor_value_map_find_value(const CborValue *map, const char *string, CborValue *element)
679 assert(cbor_value_is_map(map));
680 size_t len = strlen(string);
681 CborError err = cbor_value_enter_container(map, element);
685 while (!cbor_value_at_end(element)) {
686 // find the non-tag so we can compare
687 err = cbor_value_skip_tag(element);
690 if (cbor_value_is_text_string(element)) {
692 size_t dummyLen = len;
693 err = iterate_string_chunks(element, CONST_CAST(char *, string), &dummyLen,
694 &equals, element, iterate_memcmp);
698 return preparse_value(element);
701 err = cbor_value_advance(element);
707 err = cbor_value_skip_tag(element);
710 err = cbor_value_advance(element);
716 element->type = CborInvalidType;
720 element->type = CborInvalidType;
725 * Extracts a half-precision floating point from \a value and stores it in \a
728 CborError cbor_value_get_half_float(const CborValue *value, void *result)
730 assert(value->type == CborHalfFloatType);
732 // size has been computed already
733 uint16_t v = get16(value->ptr + 1);
734 memcpy(result, &v, sizeof(v));