From e1c8b5a4017d4509bc070c1766907e552890d10e Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Tue, 21 Mar 2023 13:05:27 +0900 Subject: [PATCH] Import ciborium-ll 0.2.0 --- .cargo_vcs_info.json | 5 + Cargo.toml | 46 +++++ Cargo.toml.orig | 31 ++++ README.md | 131 ++++++++++++++ src/dec.rs | 174 ++++++++++++++++++ src/enc.rs | 127 ++++++++++++++ src/hdr.rs | 163 +++++++++++++++++ src/lib.rs | 487 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/seg.rs | 213 ++++++++++++++++++++++ 9 files changed, 1377 insertions(+) create mode 100644 .cargo_vcs_info.json create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 100644 README.md create mode 100644 src/dec.rs create mode 100644 src/enc.rs create mode 100644 src/hdr.rs create mode 100644 src/lib.rs create mode 100644 src/seg.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..e3f78ef --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,5 @@ +{ + "git": { + "sha1": "e8512abee2f126ae60923be4362c175703550894" + } +} diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f70a861 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,46 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "ciborium-ll" +version = "0.2.0" +authors = ["Nathaniel McCallum "] +description = "Low-level CBOR codec primitives" +homepage = "https://github.com/enarx/ciborium" +readme = "README.md" +keywords = ["cbor"] +categories = ["data-structures", "embedded", "encoding", "no-std", "parsing"] +license = "Apache-2.0" +repository = "https://github.com/enarx/ciborium" +[dependencies.ciborium-io] +version = "0.2.0" + +[dependencies.half] +version = "1.6" +[dev-dependencies.hex] +version = "0.4" + +[features] +alloc = [] +std = ["alloc"] +[badges.github] +repository = "enarx/ciborium" +workflow = "test" + +[badges.is-it-maintained-issue-resolution] +repository = "enarx/ciborium" + +[badges.is-it-maintained-open-issues] +repository = "enarx/ciborium" + +[badges.maintenance] +status = "actively-developed" diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..97dc646 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,31 @@ +[package] +name = "ciborium-ll" +version = "0.2.0" +authors = ["Nathaniel McCallum "] +license = "Apache-2.0" +edition = "2021" +homepage = "https://github.com/enarx/ciborium" +repository = "https://github.com/enarx/ciborium" +description = "Low-level CBOR codec primitives" +readme = "README.md" +keywords = ["cbor"] +categories = ["data-structures", "embedded", "encoding", "no-std", "parsing"] + +[badges] +# See https://doc.rust-lang.org/cargo/reference/manifest.html#the-badges-section +github = { repository = "enarx/ciborium", workflow = "test" } +#github = { repository = "enarx/ciborium", workflow = "lint" } +maintenance = { status = "actively-developed" } +is-it-maintained-issue-resolution = { repository = "enarx/ciborium" } +is-it-maintained-open-issues = { repository = "enarx/ciborium" } + +[dependencies] +ciborium-io = { path = "../ciborium-io", version = "0.2.0" } +half = "1.6" + +[dev-dependencies] +hex = "0.4" + +[features] +alloc = [] +std = ["alloc"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..2ca6306 --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +[![Workflow Status](https://github.com/enarx/ciborium/workflows/test/badge.svg)](https://github.com/enarx/ciborium/actions?query=workflow%3A%22test%22) +[![Average time to resolve an issue](https://isitmaintained.com/badge/resolution/enarx/ciborium.svg)](https://isitmaintained.com/project/enarx/ciborium "Average time to resolve an issue") +[![Percentage of issues still open](https://isitmaintained.com/badge/open/enarx/ciborium.svg)](https://isitmaintained.com/project/enarx/ciborium "Percentage of issues still open") +![Maintenance](https://img.shields.io/badge/maintenance-activly--developed-brightgreen.svg) + +# ciborium-ll + +Low level CBOR parsing tools + +This crate contains low-level types for encoding and decoding items in +CBOR. This crate is usable in both `no_std` and `no_alloc` environments. +To understand how this crate works, first we will look at the structure +of a CBOR item on the wire. + +## Anatomy of a CBOR Item + +This is a brief anatomy of a CBOR item on the wire. + +``` ++------------+-----------+ +| | | +| Major | Minor | +| (3bits) | (5bits) | +| | | ++------------+-----------+ +^ ^ +| | ++-----+ +-----+ + | | + | | + +----------------------------+--------------+ + | | | | + | Prefix | Affix | Suffix | + | (1 byte) | (0-8 bytes) | (0+ bytes) | + | | | | + +------------+---------------+--------------+ + + | | | + +------------+---------------+--------------+ + | | + v v + + Header Body +``` + +The `ciborium` crate works by providing the `Decoder` and `Encoder` types +which provide input and output for a CBOR header (see: `Header`). From +there, you can either handle the body yourself or use the provided utility +functions. + +For more information on the CBOR format, see +[RFC 7049](https://tools.ietf.org/html/rfc7049). + +## Decoding + +In order to decode CBOR, you will create a `Decoder` from a reader. The +decoder instance will allow you to `Decoder::pull()` `Header` instances +from the input. + +Most CBOR items are fully contained in their headers and therefore have no +body. These items can be evaluated directly from the `Header` instance. + +Bytes and text items have a body but do not contain child items. Since +both bytes and text values may be segmented, parsing them can be a bit +tricky. Therefore, we provide helper functions to parse these types. See +`Decoder::bytes()` and `Decoder::text()` for more details. + +Array and map items have a body which contains child items. These can be +parsed by simply doing `Decoder::pull()` to parse the child items. + +### Example + +```rust +use ciborium_ll::{Decoder, Header}; +use ciborium_io::Read as _; + +let input = b"\x6dHello, World!"; +let mut decoder = Decoder::from(&input[..]); +let mut chunks = 0; + +match decoder.pull().unwrap() { + Header::Text(len) => { + let mut segments = decoder.text(len); + while let Some(mut segment) = segments.pull().unwrap() { + let mut buffer = [0u8; 7]; + while let Some(chunk) = segment.pull(&mut buffer[..]).unwrap() { + match chunk { + "Hello, " if chunks == 0 => chunks = 1, + "World!" if chunks == 1 => chunks = 2, + _ => panic!("received unexpected chunk"), + } + } + } + } + + _ => panic!("received unexpected value"), +} + +assert_eq!(chunks, 2); +``` + +## Encoding + +To encode values to CBOR, create an `Encoder` from a writer. The encoder +instance provides the `Encoder::push()` method to write a `Header` value +to the wire. CBOR item bodies can be written directly. + +For bytes and text, there are the `Encoder::bytes()` and `Encoder::text()` +utility functions, respectively, which will properly segment the output +on the wire for you. + +### Example + +```rust +use ciborium_ll::{Encoder, Header}; +use ciborium_io::Write as _; + +let mut buffer = [0u8; 19]; +let mut encoder = Encoder::from(&mut buffer[..]); + +// Write the structure +encoder.push(Header::Map(Some(1))).unwrap(); +encoder.push(Header::Positive(7)).unwrap(); +encoder.text("Hello, World!", 7).unwrap(); + +// Validate our output +encoder.flush().unwrap(); +assert_eq!(b"\xa1\x07\x7f\x67Hello, \x66World!\xff", &buffer[..]); +``` + +License: Apache-2.0 diff --git a/src/dec.rs b/src/dec.rs new file mode 100644 index 0000000..8329634 --- /dev/null +++ b/src/dec.rs @@ -0,0 +1,174 @@ +use super::*; + +use ciborium_io::Read; + +/// An error that occurred while decoding +#[derive(Debug)] +pub enum Error { + /// An error occurred while reading bytes + /// + /// Contains the underlying error reaturned while reading. + Io(T), + + /// An error occurred while parsing bytes + /// + /// Contains the offset into the stream where the syntax error occurred. + Syntax(usize), +} + +impl From for Error { + #[inline] + fn from(value: T) -> Self { + Self::Io(value) + } +} + +/// A decoder for deserializing CBOR items +/// +/// This decoder manages the low-level decoding of CBOR items into `Header` +/// objects. It also contains utility functions for parsing segmented bytes +/// and text inputs. +pub struct Decoder { + reader: R, + offset: usize, + buffer: Option, +} + +impl<R: Read> From<R> for Decoder<R> { + #[inline] + fn from(value: R) -> Self { + Self { + reader: value, + offset: 0, + buffer: None, + } + } +} + +impl<R: Read> Read for Decoder<R> { + type Error = R::Error; + + #[inline] + fn read_exact(&mut self, data: &mut [u8]) -> Result<(), Self::Error> { + assert!(self.buffer.is_none()); + self.reader.read_exact(data)?; + self.offset += data.len(); + Ok(()) + } +} + +impl<R: Read> Decoder<R> { + #[inline] + fn pull_title(&mut self) -> Result<Title, Error<R::Error>> { + if let Some(title) = self.buffer.take() { + self.offset += title.1.as_ref().len() + 1; + return Ok(title); + } + + let mut prefix = [0u8; 1]; + self.read_exact(&mut prefix[..])?; + + let major = match prefix[0] >> 5 { + 0 => Major::Positive, + 1 => Major::Negative, + 2 => Major::Bytes, + 3 => Major::Text, + 4 => Major::Array, + 5 => Major::Map, + 6 => Major::Tag, + 7 => Major::Other, + _ => unreachable!(), + }; + + let mut minor = match prefix[0] & 0b00011111 { + x if x < 24 => Minor::This(x), + 24 => Minor::Next1([0; 1]), + 25 => Minor::Next2([0; 2]), + 26 => Minor::Next4([0; 4]), + 27 => Minor::Next8([0; 8]), + 31 => Minor::More, + _ => return Err(Error::Syntax(self.offset - 1)), + }; + + self.read_exact(minor.as_mut())?; + Ok(Title(major, minor)) + } + + #[inline] + fn push_title(&mut self, item: Title) { + assert!(self.buffer.is_none()); + self.buffer = Some(item); + self.offset -= item.1.as_ref().len() + 1; + } + + /// Pulls the next header from the input + #[inline] + pub fn pull(&mut self) -> Result<Header, Error<R::Error>> { + let offset = self.offset; + self.pull_title()? + .try_into() + .map_err(|_| Error::Syntax(offset)) + } + + /// Push a single header into the input buffer + /// + /// # Panics + /// + /// This function panics if called while there is already a header in the + /// input buffer. You should take care to call this function only after + /// pulling a header to ensure there is nothing in the input buffer. + #[inline] + pub fn push(&mut self, item: Header) { + self.push_title(Title::from(item)) + } + + /// Gets the current byte offset into the stream + /// + /// The offset starts at zero when the decoder is created. Therefore, if + /// bytes were already read from the reader before the decoder was created, + /// you must account for this. + #[inline] + pub fn offset(&mut self) -> usize { + self.offset + } + + /// Process an incoming bytes item + /// + /// In CBOR, bytes can be segmented. The logic for this can be a bit tricky, + /// so we encapsulate that logic using this function. This function **MUST** + /// be called immediately after first pulling a `Header::Bytes(len)` from + /// the wire and `len` must be provided to this function from that value. + /// + /// The `buf` parameter provides a buffer used when reading in the segmented + /// bytes. A large buffer will result in fewer calls to read incoming bytes + /// at the cost of memory usage. You should consider this trade off when + /// deciding the size of your buffer. + #[inline] + pub fn bytes(&mut self, len: Option<usize>) -> Segments<R, crate::seg::Bytes> { + self.push(Header::Bytes(len)); + Segments::new(self, |header| match header { + Header::Bytes(len) => Ok(len), + _ => Err(()), + }) + } + + /// Process an incoming text item + /// + /// In CBOR, text can be segmented. The logic for this can be a bit tricky, + /// so we encapsulate that logic using this function. This function **MUST** + /// be called immediately after first pulling a `Header::Text(len)` from + /// the wire and `len` must be provided to this function from that value. + /// + /// The `buf` parameter provides a buffer used when reading in the segmented + /// text. A large buffer will result in fewer calls to read incoming bytes + /// at the cost of memory usage. You should consider this trade off when + /// deciding the size of your buffer. + #[inline] + pub fn text(&mut self, len: Option<usize>) -> Segments<R, crate::seg::Text> { + self.push(Header::Text(len)); + Segments::new(self, |header| match header { + Header::Text(len) => Ok(len), + _ => Err(()), + }) + } +} diff --git a/src/enc.rs b/src/enc.rs new file mode 100644 index 0000000..909728f --- /dev/null +++ b/src/enc.rs @@ -0,0 +1,127 @@ +use super::*; + +use ciborium_io::Write; + +/// An encoder for serializing CBOR items +/// +/// This structure wraps a writer and provides convenience functions for +/// writing `Header` objects to the wire. +pub struct Encoder<W: Write>(W); + +impl<W: Write> From<W> for Encoder<W> { + #[inline] + fn from(value: W) -> Self { + Self(value) + } +} + +impl<W: Write> Write for Encoder<W> { + type Error = W::Error; + + fn write_all(&mut self, data: &[u8]) -> Result<(), Self::Error> { + self.0.write_all(data) + } + + fn flush(&mut self) -> Result<(), Self::Error> { + self.0.flush() + } +} + +impl<W: Write> Encoder<W> { + /// Push a `Header` to the wire + #[inline] + pub fn push(&mut self, header: Header) -> Result<(), W::Error> { + let title = Title::from(header); + + let major = match title.0 { + Major::Positive => 0, + Major::Negative => 1, + Major::Bytes => 2, + Major::Text => 3, + Major::Array => 4, + Major::Map => 5, + Major::Tag => 6, + Major::Other => 7, + }; + + let minor = match title.1 { + Minor::This(x) => x, + Minor::Next1(..) => 24, + Minor::Next2(..) => 25, + Minor::Next4(..) => 26, + Minor::Next8(..) => 27, + Minor::More => 31, + }; + + self.0.write_all(&[major << 5 | minor])?; + self.0.write_all(title.1.as_ref()) + } + + /// Serialize a byte slice as CBOR + /// + /// Optionally, segment the output into `segment` size segments. Note that + /// if `segment == Some(0)` it will be silently upgraded to `Some(1)`. This + /// minimum value is highly inefficient and should not be relied upon. + #[inline] + pub fn bytes( + &mut self, + value: &[u8], + segment: impl Into<Option<usize>>, + ) -> Result<(), W::Error> { + let max = segment.into().unwrap_or_else(|| value.len()); + let max = core::cmp::max(max, 1); + + if max >= value.len() { + self.push(Header::Bytes(Some(value.len())))?; + self.write_all(value)?; + } else { + self.push(Header::Bytes(None))?; + + for chunk in value.chunks(max) { + self.push(Header::Bytes(Some(chunk.len())))?; + self.write_all(chunk)?; + } + + self.push(Header::Break)?; + } + + Ok(()) + } + + /// Serialize a string slice as CBOR + /// + /// Optionally, segment the output into `segment` size segments. Note that + /// since care is taken to ensure that each segment is itself a valid UTF-8 + /// string, if `segment` contains a value of less than 4, it will be + /// silently upgraded to 4. This minimum value is highly inefficient and + /// should not be relied upon. + #[inline] + pub fn text(&mut self, value: &str, segment: impl Into<Option<usize>>) -> Result<(), W::Error> { + let max = segment.into().unwrap_or_else(|| value.len()); + let max = core::cmp::max(max, 4); + + if max >= value.len() { + self.push(Header::Text(Some(value.len())))?; + self.write_all(value.as_bytes())?; + } else { + self.push(Header::Text(None))?; + + let mut bytes = value.as_bytes(); + while !bytes.is_empty() { + let mut len = core::cmp::min(bytes.len(), max); + while len > 0 && core::str::from_utf8(&bytes[..len]).is_err() { + len -= 1 + } + + let (prefix, suffix) = bytes.split_at(len); + self.push(Header::Text(Some(prefix.len())))?; + self.write_all(prefix)?; + bytes = suffix; + } + + self.push(Header::Break)?; + } + + Ok(()) + } +} diff --git a/src/hdr.rs b/src/hdr.rs new file mode 100644 index 0000000..dec1788 --- /dev/null +++ b/src/hdr.rs @@ -0,0 +1,163 @@ +use super::*; + +use half::f16; + +/// A semantic representation of a CBOR item header +/// +/// This structure represents the valid values of a CBOR item header and is +/// used extensively when serializing or deserializing CBOR items. Note well +/// that this structure **DOES NOT** represent the body (i.e. suffix) of the +/// CBOR item. You must parse the body yourself based on the contents of the +/// `Header`. However, utility functions are provided for this (see: +/// `Decoder::bytes()` and `Decoder::text()`). +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Header { + /// A positive integer + Positive(u64), + + /// A negative integer + /// + /// Note well that this value has all bits inverted from a normal signed + /// integer. For example, to convert the `u64` to a `i128` you would do + /// this: `neg as i128 ^ !0`. + Negative(u64), + + /// A floating point value + Float(f64), + + /// A "simple" value + Simple(u8), + + /// A tag + Tag(u64), + + /// The "break" value + /// + /// This value is used to terminate indefinite length arrays and maps, + /// as well as segmented byte or text items. + Break, + + /// A bytes item + /// + /// The value contained in this variant indicates the length of the bytes + /// which follow or, if `None`, segmented bytes input. + /// + /// A best practice is to call `Decoder::bytes()` immediately after + /// first pulling a bytes item header since this utility function + /// encapsulates all the logic needed to handle segmentation. + Bytes(Option<usize>), + + /// A text item + /// + /// The value contained in this variant indicates the length of the text + /// which follows (in bytes) or, if `None`, segmented text input. + /// + /// A best practice is to call `Decoder::text()` immediately after + /// first pulling a text item header since this utility function + /// encapsulates all the logic needed to handle segmentation. + Text(Option<usize>), + + /// An array item + /// + /// The value contained in this variant indicates the length of the array + /// which follows (in items) or, if `None`, an indefinite length array + /// terminated by a "break" value. + Array(Option<usize>), + + /// An map item + /// + /// The value contained in this variant indicates the length of the map + /// which follows (in item pairs) or, if `None`, an indefinite length map + /// terminated by a "break" value. + Map(Option<usize>), +} + +impl TryFrom<Title> for Header { + type Error = InvalidError; + + fn try_from(title: Title) -> Result<Self, Self::Error> { + let opt = |minor| { + Some(match minor { + Minor::This(x) => x.into(), + Minor::Next1(x) => u8::from_be_bytes(x).into(), + Minor::Next2(x) => u16::from_be_bytes(x).into(), + Minor::Next4(x) => u32::from_be_bytes(x).into(), + Minor::Next8(x) => u64::from_be_bytes(x), + Minor::More => return None, + }) + }; + + let int = |m| opt(m).ok_or(InvalidError(())); + + let len = |m| { + opt(m) + .map(usize::try_from) + .transpose() + .or(Err(InvalidError(()))) + }; + + Ok(match title { + Title(Major::Positive, minor) => Self::Positive(int(minor)?), + Title(Major::Negative, minor) => Self::Negative(int(minor)?), + Title(Major::Bytes, minor) => Self::Bytes(len(minor)?), + Title(Major::Text, minor) => Self::Text(len(minor)?), + Title(Major::Array, minor) => Self::Array(len(minor)?), + Title(Major::Map, minor) => Self::Map(len(minor)?), + Title(Major::Tag, minor) => Self::Tag(int(minor)?), + + Title(Major::Other, Minor::More) => Self::Break, + Title(Major::Other, Minor::This(x)) => Self::Simple(x), + Title(Major::Other, Minor::Next1(x)) => Self::Simple(x[0]), + Title(Major::Other, Minor::Next2(x)) => Self::Float(f16::from_be_bytes(x).into()), + Title(Major::Other, Minor::Next4(x)) => Self::Float(f32::from_be_bytes(x).into()), + Title(Major::Other, Minor::Next8(x)) => Self::Float(f64::from_be_bytes(x)), + }) + } +} + +impl From<Header> for Title { + fn from(header: Header) -> Self { + let int = |i: u64| match i { + x if x <= 23 => Minor::This(i as u8), + x if x <= core::u8::MAX as u64 => Minor::Next1([i as u8]), + x if x <= core::u16::MAX as u64 => Minor::Next2((i as u16).to_be_bytes()), + x if x <= core::u32::MAX as u64 => Minor::Next4((i as u32).to_be_bytes()), + x => Minor::Next8(x.to_be_bytes()), + }; + + let len = |l: Option<usize>| l.map(|x| int(x as u64)).unwrap_or(Minor::More); + + match header { + Header::Positive(x) => Title(Major::Positive, int(x)), + Header::Negative(x) => Title(Major::Negative, int(x)), + Header::Bytes(x) => Title(Major::Bytes, len(x)), + Header::Text(x) => Title(Major::Text, len(x)), + Header::Array(x) => Title(Major::Array, len(x)), + Header::Map(x) => Title(Major::Map, len(x)), + Header::Tag(x) => Title(Major::Tag, int(x)), + + Header::Break => Title(Major::Other, Minor::More), + + Header::Simple(x) => match x { + x @ 0..=23 => Title(Major::Other, Minor::This(x)), + x => Title(Major::Other, Minor::Next1([x])), + }, + + Header::Float(n64) => { + let n16 = f16::from_f64(n64); + let n32 = n64 as f32; + + Title( + Major::Other, + if f64::from(n16).to_bits() == n64.to_bits() { + Minor::Next2(n16.to_be_bytes()) + } else if f64::from(n32).to_bits() == n64.to_bits() { + Minor::Next4(n32.to_be_bytes()) + } else { + Minor::Next8(n64.to_be_bytes()) + }, + ) + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8a1fe90 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Low level CBOR parsing tools +//! +//! This crate contains low-level types for encoding and decoding items in +//! CBOR. This crate is usable in both `no_std` and `no_alloc` environments. +//! To understand how this crate works, first we will look at the structure +//! of a CBOR item on the wire. +//! +//! # Anatomy of a CBOR Item +//! +//! This is a brief anatomy of a CBOR item on the wire. +//! +//! ```text +//! +------------+-----------+ +//! | | | +//! | Major | Minor | +//! | (3bits) | (5bits) | +//! | | | +//! +------------+-----------+ +//! ^ ^ +//! | | +//! +-----+ +-----+ +//! | | +//! | | +//! +----------------------------+--------------+ +//! | | | | +//! | Prefix | Affix | Suffix | +//! | (1 byte) | (0-8 bytes) | (0+ bytes) | +//! | | | | +//! +------------+---------------+--------------+ +//! +//! | | | +//! +------------+---------------+--------------+ +//! | | +//! v v +//! +//! Header Body +//! ``` +//! +//! The `ciborium` crate works by providing the `Decoder` and `Encoder` types +//! which provide input and output for a CBOR header (see: `Header`). From +//! there, you can either handle the body yourself or use the provided utility +//! functions. +//! +//! For more information on the CBOR format, see +//! [RFC 7049](https://tools.ietf.org/html/rfc7049). +//! +//! # Decoding +//! +//! In order to decode CBOR, you will create a `Decoder` from a reader. The +//! decoder instance will allow you to `Decoder::pull()` `Header` instances +//! from the input. +//! +//! Most CBOR items are fully contained in their headers and therefore have no +//! body. These items can be evaluated directly from the `Header` instance. +//! +//! Bytes and text items have a body but do not contain child items. Since +//! both bytes and text values may be segmented, parsing them can be a bit +//! tricky. Therefore, we provide helper functions to parse these types. See +//! `Decoder::bytes()` and `Decoder::text()` for more details. +//! +//! Array and map items have a body which contains child items. These can be +//! parsed by simply doing `Decoder::pull()` to parse the child items. +//! +//! ## Example +//! +//! ```rust +//! use ciborium_ll::{Decoder, Header}; +//! use ciborium_io::Read as _; +//! +//! let input = b"\x6dHello, World!"; +//! let mut decoder = Decoder::from(&input[..]); +//! let mut chunks = 0; +//! +//! match decoder.pull().unwrap() { +//! Header::Text(len) => { +//! let mut segments = decoder.text(len); +//! while let Some(mut segment) = segments.pull().unwrap() { +//! let mut buffer = [0u8; 7]; +//! while let Some(chunk) = segment.pull(&mut buffer[..]).unwrap() { +//! match chunk { +//! "Hello, " if chunks == 0 => chunks = 1, +//! "World!" if chunks == 1 => chunks = 2, +//! _ => panic!("received unexpected chunk"), +//! } +//! } +//! } +//! } +//! +//! _ => panic!("received unexpected value"), +//! } +//! +//! assert_eq!(chunks, 2); +//! ``` +//! +//! # Encoding +//! +//! To encode values to CBOR, create an `Encoder` from a writer. The encoder +//! instance provides the `Encoder::push()` method to write a `Header` value +//! to the wire. CBOR item bodies can be written directly. +//! +//! For bytes and text, there are the `Encoder::bytes()` and `Encoder::text()` +//! utility functions, respectively, which will properly segment the output +//! on the wire for you. +//! +//! ## Example +//! +//! ```rust +//! use ciborium_ll::{Encoder, Header}; +//! use ciborium_io::Write as _; +//! +//! let mut buffer = [0u8; 19]; +//! let mut encoder = Encoder::from(&mut buffer[..]); +//! +//! // Write the structure +//! encoder.push(Header::Map(Some(1))).unwrap(); +//! encoder.push(Header::Positive(7)).unwrap(); +//! encoder.text("Hello, World!", 7).unwrap(); +//! +//! // Validate our output +//! encoder.flush().unwrap(); +//! assert_eq!(b"\xa1\x07\x7f\x67Hello, \x66World!\xff", &buffer[..]); +//! ``` + +#![cfg_attr(not(feature = "std"), no_std)] +#![deny(missing_docs)] +#![deny(clippy::all)] +#![deny(clippy::cargo)] + +#[cfg(feature = "alloc")] +extern crate alloc; + +mod dec; +mod enc; +mod hdr; +mod seg; + +pub use dec::*; +pub use enc::*; +pub use hdr::*; +pub use seg::{Segment, Segments}; + +/// Simple value constants +pub mod simple { + #![allow(missing_docs)] + + pub const FALSE: u8 = 20; + pub const TRUE: u8 = 21; + pub const NULL: u8 = 22; + pub const UNDEFINED: u8 = 23; +} + +/// Tag constants +pub mod tag { + #![allow(missing_docs)] + + pub const BIGPOS: u64 = 2; + pub const BIGNEG: u64 = 3; +} + +#[derive(Debug)] +struct InvalidError(()); + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Major { + Positive, + Negative, + Bytes, + Text, + Array, + Map, + Tag, + Other, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Minor { + This(u8), + Next1([u8; 1]), + Next2([u8; 2]), + Next4([u8; 4]), + Next8([u8; 8]), + More, +} + +impl AsRef<[u8]> for Minor { + #[inline] + fn as_ref(&self) -> &[u8] { + match self { + Self::More => &[], + Self::This(..) => &[], + Self::Next1(x) => x.as_ref(), + Self::Next2(x) => x.as_ref(), + Self::Next4(x) => x.as_ref(), + Self::Next8(x) => x.as_ref(), + } + } +} + +impl AsMut<[u8]> for Minor { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + match self { + Self::More => &mut [], + Self::This(..) => &mut [], + Self::Next1(x) => x.as_mut(), + Self::Next2(x) => x.as_mut(), + Self::Next4(x) => x.as_mut(), + Self::Next8(x) => x.as_mut(), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct Title(pub Major, pub Minor); + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! neg { + ($i:expr) => { + Header::Negative((($i as i128) ^ !0) as u64) + }; + } + + #[allow(clippy::excessive_precision)] + #[test] + fn leaf() { + use core::f64::{INFINITY, NAN}; + + let data = &[ + (Header::Positive(0), "00", true), + (Header::Positive(1), "01", true), + (Header::Positive(10), "0a", true), + (Header::Positive(23), "17", true), + (Header::Positive(24), "1818", true), + (Header::Positive(25), "1819", true), + (Header::Positive(100), "1864", true), + (Header::Positive(1000), "1903e8", true), + (Header::Positive(1000000), "1a000f4240", true), + (Header::Positive(1000000000000), "1b000000e8d4a51000", true), + ( + Header::Positive(18446744073709551615), + "1bffffffffffffffff", + true, + ), + (neg!(-18446744073709551616), "3bffffffffffffffff", true), + (neg!(-1), "20", true), + (neg!(-10), "29", true), + (neg!(-100), "3863", true), + (neg!(-1000), "3903e7", true), + (Header::Float(0.0), "f90000", true), + (Header::Float(-0.0), "f98000", true), + (Header::Float(1.0), "f93c00", true), + (Header::Float(1.1), "fb3ff199999999999a", true), + (Header::Float(1.5), "f93e00", true), + (Header::Float(65504.0), "f97bff", true), + (Header::Float(100000.0), "fa47c35000", true), + (Header::Float(3.4028234663852886e+38), "fa7f7fffff", true), + (Header::Float(1.0e+300), "fb7e37e43c8800759c", true), + (Header::Float(5.960464477539063e-8), "f90001", true), + (Header::Float(0.00006103515625), "f90400", true), + (Header::Float(-4.0), "f9c400", true), + (Header::Float(-4.1), "fbc010666666666666", true), + (Header::Float(INFINITY), "f97c00", true), + (Header::Float(NAN), "f97e00", true), + (Header::Float(-INFINITY), "f9fc00", true), + (Header::Float(INFINITY), "fa7f800000", false), + (Header::Float(NAN), "fa7fc00000", false), + (Header::Float(-INFINITY), "faff800000", false), + (Header::Float(INFINITY), "fb7ff0000000000000", false), + (Header::Float(NAN), "fb7ff8000000000000", false), + (Header::Float(-INFINITY), "fbfff0000000000000", false), + (Header::Simple(simple::FALSE), "f4", true), + (Header::Simple(simple::TRUE), "f5", true), + (Header::Simple(simple::NULL), "f6", true), + (Header::Simple(simple::UNDEFINED), "f7", true), + (Header::Simple(16), "f0", true), + (Header::Simple(24), "f818", true), + (Header::Simple(255), "f8ff", true), + (Header::Tag(0), "c0", true), + (Header::Tag(1), "c1", true), + (Header::Tag(23), "d7", true), + (Header::Tag(24), "d818", true), + (Header::Tag(32), "d820", true), + (Header::Bytes(Some(0)), "40", true), + (Header::Bytes(Some(4)), "44", true), + (Header::Text(Some(0)), "60", true), + (Header::Text(Some(4)), "64", true), + ]; + + for (header, bytes, encode) in data.iter().cloned() { + let bytes = hex::decode(bytes).unwrap(); + + let mut decoder = Decoder::from(&bytes[..]); + match (header, decoder.pull().unwrap()) { + // NaN equality... + (Header::Float(l), Header::Float(r)) if l.is_nan() && r.is_nan() => (), + + // Everything else... + (l, r) => assert_eq!(l, r), + } + + if encode { + let mut buffer = [0u8; 1024]; + let mut writer = &mut buffer[..]; + let mut encoder = Encoder::from(&mut writer); + encoder.push(header).unwrap(); + + let len = writer.len(); + assert_eq!(&bytes[..], &buffer[..1024 - len]); + } + } + } + + #[test] + fn node() { + let data: &[(&str, &[Header])] = &[ + ("80", &[Header::Array(Some(0))]), + ( + "83010203", + &[ + Header::Array(Some(3)), + Header::Positive(1), + Header::Positive(2), + Header::Positive(3), + ], + ), + ( + "98190102030405060708090a0b0c0d0e0f101112131415161718181819", + &[ + Header::Array(Some(25)), + Header::Positive(1), + Header::Positive(2), + Header::Positive(3), + Header::Positive(4), + Header::Positive(5), + Header::Positive(6), + Header::Positive(7), + Header::Positive(8), + Header::Positive(9), + Header::Positive(10), + Header::Positive(11), + Header::Positive(12), + Header::Positive(13), + Header::Positive(14), + Header::Positive(15), + Header::Positive(16), + Header::Positive(17), + Header::Positive(18), + Header::Positive(19), + Header::Positive(20), + Header::Positive(21), + Header::Positive(22), + Header::Positive(23), + Header::Positive(24), + Header::Positive(25), + ], + ), + ("a0", &[Header::Map(Some(0))]), + ( + "a201020304", + &[ + Header::Map(Some(2)), + Header::Positive(1), + Header::Positive(2), + Header::Positive(3), + Header::Positive(4), + ], + ), + ("9fff", &[Header::Array(None), Header::Break]), + ( + "9f018202039f0405ffff", + &[ + Header::Array(None), + Header::Positive(1), + Header::Array(Some(2)), + Header::Positive(2), + Header::Positive(3), + Header::Array(None), + Header::Positive(4), + Header::Positive(5), + Header::Break, + Header::Break, + ], + ), + ( + "9f01820203820405ff", + &[ + Header::Array(None), + Header::Positive(1), + Header::Array(Some(2)), + Header::Positive(2), + Header::Positive(3), + Header::Array(Some(2)), + Header::Positive(4), + Header::Positive(5), + Header::Break, + ], + ), + ( + "83018202039f0405ff", + &[ + Header::Array(Some(3)), + Header::Positive(1), + Header::Array(Some(2)), + Header::Positive(2), + Header::Positive(3), + Header::Array(None), + Header::Positive(4), + Header::Positive(5), + Header::Break, + ], + ), + ( + "83019f0203ff820405", + &[ + Header::Array(Some(3)), + Header::Positive(1), + Header::Array(None), + Header::Positive(2), + Header::Positive(3), + Header::Break, + Header::Array(Some(2)), + Header::Positive(4), + Header::Positive(5), + ], + ), + ( + "9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff", + &[ + Header::Array(None), + Header::Positive(1), + Header::Positive(2), + Header::Positive(3), + Header::Positive(4), + Header::Positive(5), + Header::Positive(6), + Header::Positive(7), + Header::Positive(8), + Header::Positive(9), + Header::Positive(10), + Header::Positive(11), + Header::Positive(12), + Header::Positive(13), + Header::Positive(14), + Header::Positive(15), + Header::Positive(16), + Header::Positive(17), + Header::Positive(18), + Header::Positive(19), + Header::Positive(20), + Header::Positive(21), + Header::Positive(22), + Header::Positive(23), + Header::Positive(24), + Header::Positive(25), + Header::Break, + ], + ), + ]; + + for (bytes, headers) in data { + let bytes = hex::decode(bytes).unwrap(); + + // Test decoding + let mut decoder = Decoder::from(&bytes[..]); + for header in headers.iter().cloned() { + assert_eq!(header, decoder.pull().unwrap()); + } + + // Test encoding + let mut buffer = [0u8; 1024]; + let mut writer = &mut buffer[..]; + let mut encoder = Encoder::from(&mut writer); + + for header in headers.iter().cloned() { + encoder.push(header).unwrap(); + } + + let len = writer.len(); + assert_eq!(&bytes[..], &buffer[..1024 - len]); + } + } +} diff --git a/src/seg.rs b/src/seg.rs new file mode 100644 index 0000000..d75ebae --- /dev/null +++ b/src/seg.rs @@ -0,0 +1,213 @@ +use super::*; + +use ciborium_io::Read; + +use core::marker::PhantomData; + +/// A parser for incoming segments +pub trait Parser: Default { + /// The type of item that is parsed + type Item: ?Sized; + + /// The parsing error that may occur + type Error; + + /// The main parsing function + /// + /// This function processes the incoming bytes and returns the item. + /// + /// One important detail that **MUST NOT** be overlooked is that the + /// parser may save data from a previous parsing attempt. The number of + /// bytes saved is indicated by the `Parser::saved()` function. The saved + /// bytes will be copied into the beginning of the `bytes` array before + /// processing. Therefore, two requirements should be met. + /// + /// First, the incoming byte slice should be larger than the saved bytes. + /// + /// Second, the incoming byte slice should contain new bytes only after + /// the saved byte prefix. + /// + /// If both criteria are met, this allows the parser to prepend its saved + /// bytes without any additional allocation. + fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>; + + /// Indicates the number of saved bytes in the parser + fn saved(&self) -> usize { + 0 + } +} + +/// A bytes parser +/// +/// No actual processing is performed and the input bytes are directly +/// returned. This implies that this parser never saves any bytes internally. +#[derive(Default)] +pub struct Bytes(()); + +impl Parser for Bytes { + type Item = [u8]; + type Error = core::convert::Infallible; + + fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> { + Ok(bytes) + } +} + +/// A text parser +/// +/// This parser converts the input bytes to a `str`. This parser preserves +/// trailing invalid UTF-8 sequences in the case that chunking fell in the +/// middle of a valid UTF-8 character. +#[derive(Default)] +pub struct Text { + stored: usize, + buffer: [u8; 3], +} + +impl Parser for Text { + type Item = str; + type Error = core::str::Utf8Error; + + fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> { + // If we cannot advance, return nothing. + if bytes.len() <= self.stored { + return Ok(""); + } + + // Copy previously invalid data into place. + bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]); + + Ok(match core::str::from_utf8(bytes) { + Ok(s) => s, + Err(e) => { + let valid_len = e.valid_up_to(); + let invalid_len = bytes.len() - valid_len; + + // If the size of the invalid UTF-8 is large enough to hold + // all valid UTF-8 characters, we have a syntax error. + if invalid_len > self.buffer.len() { + return Err(e); + } + + // Otherwise, store the invalid bytes for the next read cycle. + self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]); + self.stored = invalid_len; + + // Decode the valid part of the string. + core::str::from_utf8(&bytes[..valid_len]).unwrap() + } + }) + } + + fn saved(&self) -> usize { + self.stored + } +} + +/// A CBOR segment +/// +/// This type represents a single bytes or text segment on the wire. It can be +/// read out in parsed chunks based on the size of the input scratch buffer. +pub struct Segment<'r, R: Read, P: Parser> { + reader: &'r mut Decoder<R>, + unread: usize, + offset: usize, + parser: P, +} + +impl<'r, R: Read, P: Parser> Segment<'r, R, P> { + /// Gets the number of unprocessed bytes + #[inline] + pub fn left(&self) -> usize { + self.unread + self.parser.saved() + } + + /// Gets the next parsed chunk within the segment + /// + /// Returns `Ok(None)` when all chunks have been read. + #[inline] + pub fn pull<'a>( + &mut self, + buffer: &'a mut [u8], + ) -> Result<Option<&'a P::Item>, Error<R::Error>> { + use core::cmp::min; + + let prev = self.parser.saved(); + match self.unread { + 0 if prev == 0 => return Ok(None), + 0 => return Err(Error::Syntax(self.offset)), + _ => (), + } + + // Determine how many bytes to read. + let size = min(buffer.len(), prev + self.unread); + let full = &mut buffer[..size]; + let next = &mut full[min(size, prev)..]; + + // Read additional bytes. + self.reader.read_exact(next)?; + self.unread -= next.len(); + + self.parser + .parse(full) + .or(Err(Error::Syntax(self.offset))) + .map(Some) + } +} + +/// A sequence of CBOR segments +/// +/// CBOR allows for bytes or text items to be segmented. This type represents +/// the state of that segmented input stream. +pub struct Segments<'r, R: Read, P: Parser> { + reader: &'r mut Decoder<R>, + finish: bool, + nested: usize, + parser: PhantomData<P>, + unwrap: fn(Header) -> Result<Option<usize>, ()>, +} + +impl<'r, R: Read, P: Parser> Segments<'r, R, P> { + #[inline] + pub(crate) fn new( + decoder: &'r mut Decoder<R>, + unwrap: fn(Header) -> Result<Option<usize>, ()>, + ) -> Self { + Self { + reader: decoder, + finish: false, + nested: 0, + parser: PhantomData, + unwrap, + } + } + + /// Gets the next segment in the stream + /// + /// Returns `Ok(None)` at the conclusion of the stream. + #[inline] + pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> { + while !self.finish { + let offset = self.reader.offset(); + match self.reader.pull()? { + Header::Break if self.nested == 1 => return Ok(None), + Header::Break if self.nested > 1 => self.nested -= 1, + header => match (self.unwrap)(header) { + Err(..) => return Err(Error::Syntax(offset)), + Ok(None) => self.nested += 1, + Ok(Some(len)) => { + self.finish = self.nested == 0; + return Ok(Some(Segment { + reader: self.reader, + unread: len, + offset, + parser: P::default(), + })); + } + }, + } + } + + Ok(None) + } +} -- 2.7.4