Import ciborium-ll 0.2.0 upstream upstream/0.2.0
authorDongHun Kwak <dh0128.kwak@samsung.com>
Tue, 21 Mar 2023 04:05:27 +0000 (13:05 +0900)
committerDongHun Kwak <dh0128.kwak@samsung.com>
Tue, 21 Mar 2023 04:05:27 +0000 (13:05 +0900)
.cargo_vcs_info.json [new file with mode: 0644]
Cargo.toml [new file with mode: 0644]
Cargo.toml.orig [new file with mode: 0644]
README.md [new file with mode: 0644]
src/dec.rs [new file with mode: 0644]
src/enc.rs [new file with mode: 0644]
src/hdr.rs [new file with mode: 0644]
src/lib.rs [new file with mode: 0644]
src/seg.rs [new file with mode: 0644]

diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644 (file)
index 0000000..e3f78ef
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "git": {
+    "sha1": "e8512abee2f126ae60923be4362c175703550894"
+  }
+}
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644 (file)
index 0000000..f70a861
--- /dev/null
@@ -0,0 +1,46 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "ciborium-ll"
+version = "0.2.0"
+authors = ["Nathaniel McCallum <npmccallum@profian.com>"]
+description = "Low-level CBOR codec primitives"
+homepage = "https://github.com/enarx/ciborium"
+readme = "README.md"
+keywords = ["cbor"]
+categories = ["data-structures", "embedded", "encoding", "no-std", "parsing"]
+license = "Apache-2.0"
+repository = "https://github.com/enarx/ciborium"
+[dependencies.ciborium-io]
+version = "0.2.0"
+
+[dependencies.half]
+version = "1.6"
+[dev-dependencies.hex]
+version = "0.4"
+
+[features]
+alloc = []
+std = ["alloc"]
+[badges.github]
+repository = "enarx/ciborium"
+workflow = "test"
+
+[badges.is-it-maintained-issue-resolution]
+repository = "enarx/ciborium"
+
+[badges.is-it-maintained-open-issues]
+repository = "enarx/ciborium"
+
+[badges.maintenance]
+status = "actively-developed"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644 (file)
index 0000000..97dc646
--- /dev/null
@@ -0,0 +1,31 @@
+[package]
+name = "ciborium-ll"
+version = "0.2.0"
+authors = ["Nathaniel McCallum <npmccallum@profian.com>"]
+license = "Apache-2.0"
+edition = "2021"
+homepage = "https://github.com/enarx/ciborium"
+repository = "https://github.com/enarx/ciborium"
+description = "Low-level CBOR codec primitives"
+readme = "README.md"
+keywords = ["cbor"]
+categories = ["data-structures", "embedded", "encoding", "no-std", "parsing"]
+
+[badges]
+# See https://doc.rust-lang.org/cargo/reference/manifest.html#the-badges-section
+github = { repository = "enarx/ciborium", workflow = "test" }
+#github = { repository = "enarx/ciborium", workflow = "lint" }
+maintenance = { status = "actively-developed" }
+is-it-maintained-issue-resolution = { repository = "enarx/ciborium" }
+is-it-maintained-open-issues = { repository = "enarx/ciborium" }
+
+[dependencies]
+ciborium-io = { path = "../ciborium-io", version = "0.2.0" }
+half = "1.6"
+
+[dev-dependencies]
+hex = "0.4"
+
+[features]
+alloc = []
+std = ["alloc"]
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..2ca6306
--- /dev/null
+++ b/README.md
@@ -0,0 +1,131 @@
+[![Workflow Status](https://github.com/enarx/ciborium/workflows/test/badge.svg)](https://github.com/enarx/ciborium/actions?query=workflow%3A%22test%22)
+[![Average time to resolve an issue](https://isitmaintained.com/badge/resolution/enarx/ciborium.svg)](https://isitmaintained.com/project/enarx/ciborium "Average time to resolve an issue")
+[![Percentage of issues still open](https://isitmaintained.com/badge/open/enarx/ciborium.svg)](https://isitmaintained.com/project/enarx/ciborium "Percentage of issues still open")
+![Maintenance](https://img.shields.io/badge/maintenance-activly--developed-brightgreen.svg)
+
+# ciborium-ll
+
+Low level CBOR parsing tools
+
+This crate contains low-level types for encoding and decoding items in
+CBOR. This crate is usable in both `no_std` and `no_alloc` environments.
+To understand how this crate works, first we will look at the structure
+of a CBOR item on the wire.
+
+## Anatomy of a CBOR Item
+
+This is a brief anatomy of a CBOR item on the wire.
+
+```
++------------+-----------+
+|            |           |
+|   Major    |   Minor   |
+|  (3bits)   |  (5bits)  |
+|            |           |
++------------+-----------+
+^                        ^
+|                        |
++-----+            +-----+
+      |            |
+      |            |
+      +----------------------------+--------------+
+      |            |               |              |
+      |   Prefix   |     Affix     |    Suffix    |
+      |  (1 byte)  |  (0-8 bytes)  |  (0+ bytes)  |
+      |            |               |              |
+      +------------+---------------+--------------+
+
+      |                            |              |
+      +------------+---------------+--------------+
+                   |                       |
+                   v                       v
+
+                 Header                   Body
+```
+
+The `ciborium` crate works by providing the `Decoder` and `Encoder` types
+which provide input and output for a CBOR header (see: `Header`). From
+there, you can either handle the body yourself or use the provided utility
+functions.
+
+For more information on the CBOR format, see
+[RFC 7049](https://tools.ietf.org/html/rfc7049).
+
+## Decoding
+
+In order to decode CBOR, you will create a `Decoder` from a reader. The
+decoder instance will allow you to `Decoder::pull()` `Header` instances
+from the input.
+
+Most CBOR items are fully contained in their headers and therefore have no
+body. These items can be evaluated directly from the `Header` instance.
+
+Bytes and text items have a body but do not contain child items. Since
+both bytes and text values may be segmented, parsing them can be a bit
+tricky. Therefore, we provide helper functions to parse these types. See
+`Decoder::bytes()` and `Decoder::text()` for more details.
+
+Array and map items have a body which contains child items. These can be
+parsed by simply doing `Decoder::pull()` to parse the child items.
+
+### Example
+
+```rust
+use ciborium_ll::{Decoder, Header};
+use ciborium_io::Read as _;
+
+let input = b"\x6dHello, World!";
+let mut decoder = Decoder::from(&input[..]);
+let mut chunks = 0;
+
+match decoder.pull().unwrap() {
+    Header::Text(len) => {
+        let mut segments = decoder.text(len);
+        while let Some(mut segment) = segments.pull().unwrap() {
+            let mut buffer = [0u8; 7];
+            while let Some(chunk) = segment.pull(&mut buffer[..]).unwrap() {
+                 match chunk {
+                     "Hello, " if chunks == 0 => chunks = 1,
+                     "World!" if chunks == 1 => chunks = 2,
+                     _ => panic!("received unexpected chunk"),
+                 }
+            }
+        }
+    }
+
+    _ => panic!("received unexpected value"),
+}
+
+assert_eq!(chunks, 2);
+```
+
+## Encoding
+
+To encode values to CBOR, create an `Encoder` from a writer. The encoder
+instance provides the `Encoder::push()` method to write a `Header` value
+to the wire. CBOR item bodies can be written directly.
+
+For bytes and text, there are the `Encoder::bytes()` and `Encoder::text()`
+utility functions, respectively, which will properly segment the output
+on the wire for you.
+
+### Example
+
+```rust
+use ciborium_ll::{Encoder, Header};
+use ciborium_io::Write as _;
+
+let mut buffer = [0u8; 19];
+let mut encoder = Encoder::from(&mut buffer[..]);
+
+// Write the structure
+encoder.push(Header::Map(Some(1))).unwrap();
+encoder.push(Header::Positive(7)).unwrap();
+encoder.text("Hello, World!", 7).unwrap();
+
+// Validate our output
+encoder.flush().unwrap();
+assert_eq!(b"\xa1\x07\x7f\x67Hello, \x66World!\xff", &buffer[..]);
+```
+
+License: Apache-2.0
diff --git a/src/dec.rs b/src/dec.rs
new file mode 100644 (file)
index 0000000..8329634
--- /dev/null
@@ -0,0 +1,174 @@
+use super::*;
+
+use ciborium_io::Read;
+
+/// An error that occurred while decoding
+#[derive(Debug)]
+pub enum Error<T> {
+    /// An error occurred while reading bytes
+    ///
+    /// Contains the underlying error reaturned while reading.
+    Io(T),
+
+    /// An error occurred while parsing bytes
+    ///
+    /// Contains the offset into the stream where the syntax error occurred.
+    Syntax(usize),
+}
+
+impl<T> From<T> for Error<T> {
+    #[inline]
+    fn from(value: T) -> Self {
+        Self::Io(value)
+    }
+}
+
+/// A decoder for deserializing CBOR items
+///
+/// This decoder manages the low-level decoding of CBOR items into `Header`
+/// objects. It also contains utility functions for parsing segmented bytes
+/// and text inputs.
+pub struct Decoder<R: Read> {
+    reader: R,
+    offset: usize,
+    buffer: Option<Title>,
+}
+
+impl<R: Read> From<R> for Decoder<R> {
+    #[inline]
+    fn from(value: R) -> Self {
+        Self {
+            reader: value,
+            offset: 0,
+            buffer: None,
+        }
+    }
+}
+
+impl<R: Read> Read for Decoder<R> {
+    type Error = R::Error;
+
+    #[inline]
+    fn read_exact(&mut self, data: &mut [u8]) -> Result<(), Self::Error> {
+        assert!(self.buffer.is_none());
+        self.reader.read_exact(data)?;
+        self.offset += data.len();
+        Ok(())
+    }
+}
+
+impl<R: Read> Decoder<R> {
+    #[inline]
+    fn pull_title(&mut self) -> Result<Title, Error<R::Error>> {
+        if let Some(title) = self.buffer.take() {
+            self.offset += title.1.as_ref().len() + 1;
+            return Ok(title);
+        }
+
+        let mut prefix = [0u8; 1];
+        self.read_exact(&mut prefix[..])?;
+
+        let major = match prefix[0] >> 5 {
+            0 => Major::Positive,
+            1 => Major::Negative,
+            2 => Major::Bytes,
+            3 => Major::Text,
+            4 => Major::Array,
+            5 => Major::Map,
+            6 => Major::Tag,
+            7 => Major::Other,
+            _ => unreachable!(),
+        };
+
+        let mut minor = match prefix[0] & 0b00011111 {
+            x if x < 24 => Minor::This(x),
+            24 => Minor::Next1([0; 1]),
+            25 => Minor::Next2([0; 2]),
+            26 => Minor::Next4([0; 4]),
+            27 => Minor::Next8([0; 8]),
+            31 => Minor::More,
+            _ => return Err(Error::Syntax(self.offset - 1)),
+        };
+
+        self.read_exact(minor.as_mut())?;
+        Ok(Title(major, minor))
+    }
+
+    #[inline]
+    fn push_title(&mut self, item: Title) {
+        assert!(self.buffer.is_none());
+        self.buffer = Some(item);
+        self.offset -= item.1.as_ref().len() + 1;
+    }
+
+    /// Pulls the next header from the input
+    #[inline]
+    pub fn pull(&mut self) -> Result<Header, Error<R::Error>> {
+        let offset = self.offset;
+        self.pull_title()?
+            .try_into()
+            .map_err(|_| Error::Syntax(offset))
+    }
+
+    /// Push a single header into the input buffer
+    ///
+    /// # Panics
+    ///
+    /// This function panics if called while there is already a header in the
+    /// input buffer. You should take care to call this function only after
+    /// pulling a header to ensure there is nothing in the input buffer.
+    #[inline]
+    pub fn push(&mut self, item: Header) {
+        self.push_title(Title::from(item))
+    }
+
+    /// Gets the current byte offset into the stream
+    ///
+    /// The offset starts at zero when the decoder is created. Therefore, if
+    /// bytes were already read from the reader before the decoder was created,
+    /// you must account for this.
+    #[inline]
+    pub fn offset(&mut self) -> usize {
+        self.offset
+    }
+
+    /// Process an incoming bytes item
+    ///
+    /// In CBOR, bytes can be segmented. The logic for this can be a bit tricky,
+    /// so we encapsulate that logic using this function. This function **MUST**
+    /// be called immediately after first pulling a `Header::Bytes(len)` from
+    /// the wire and `len` must be provided to this function from that value.
+    ///
+    /// The `buf` parameter provides a buffer used when reading in the segmented
+    /// bytes. A large buffer will result in fewer calls to read incoming bytes
+    /// at the cost of memory usage. You should consider this trade off when
+    /// deciding the size of your buffer.
+    #[inline]
+    pub fn bytes(&mut self, len: Option<usize>) -> Segments<R, crate::seg::Bytes> {
+        self.push(Header::Bytes(len));
+        Segments::new(self, |header| match header {
+            Header::Bytes(len) => Ok(len),
+            _ => Err(()),
+        })
+    }
+
+    /// Process an incoming text item
+    ///
+    /// In CBOR, text can be segmented. The logic for this can be a bit tricky,
+    /// so we encapsulate that logic using this function. This function **MUST**
+    /// be called immediately after first pulling a `Header::Text(len)` from
+    /// the wire and `len` must be provided to this function from that value.
+    ///
+    /// The `buf` parameter provides a buffer used when reading in the segmented
+    /// text. A large buffer will result in fewer calls to read incoming bytes
+    /// at the cost of memory usage. You should consider this trade off when
+    /// deciding the size of your buffer.
+    #[inline]
+    pub fn text(&mut self, len: Option<usize>) -> Segments<R, crate::seg::Text> {
+        self.push(Header::Text(len));
+        Segments::new(self, |header| match header {
+            Header::Text(len) => Ok(len),
+            _ => Err(()),
+        })
+    }
+}
diff --git a/src/enc.rs b/src/enc.rs
new file mode 100644 (file)
index 0000000..909728f
--- /dev/null
@@ -0,0 +1,127 @@
+use super::*;
+
+use ciborium_io::Write;
+
+/// An encoder for serializing CBOR items
+///
+/// This structure wraps a writer and provides convenience functions for
+/// writing `Header` objects to the wire.
+pub struct Encoder<W: Write>(W);
+
+impl<W: Write> From<W> for Encoder<W> {
+    #[inline]
+    fn from(value: W) -> Self {
+        Self(value)
+    }
+}
+
+impl<W: Write> Write for Encoder<W> {
+    type Error = W::Error;
+
+    fn write_all(&mut self, data: &[u8]) -> Result<(), Self::Error> {
+        self.0.write_all(data)
+    }
+
+    fn flush(&mut self) -> Result<(), Self::Error> {
+        self.0.flush()
+    }
+}
+
+impl<W: Write> Encoder<W> {
+    /// Push a `Header` to the wire
+    #[inline]
+    pub fn push(&mut self, header: Header) -> Result<(), W::Error> {
+        let title = Title::from(header);
+
+        let major = match title.0 {
+            Major::Positive => 0,
+            Major::Negative => 1,
+            Major::Bytes => 2,
+            Major::Text => 3,
+            Major::Array => 4,
+            Major::Map => 5,
+            Major::Tag => 6,
+            Major::Other => 7,
+        };
+
+        let minor = match title.1 {
+            Minor::This(x) => x,
+            Minor::Next1(..) => 24,
+            Minor::Next2(..) => 25,
+            Minor::Next4(..) => 26,
+            Minor::Next8(..) => 27,
+            Minor::More => 31,
+        };
+
+        self.0.write_all(&[major << 5 | minor])?;
+        self.0.write_all(title.1.as_ref())
+    }
+
+    /// Serialize a byte slice as CBOR
+    ///
+    /// Optionally, segment the output into `segment` size segments. Note that
+    /// if `segment == Some(0)` it will be silently upgraded to `Some(1)`. This
+    /// minimum value is highly inefficient and should not be relied upon.
+    #[inline]
+    pub fn bytes(
+        &mut self,
+        value: &[u8],
+        segment: impl Into<Option<usize>>,
+    ) -> Result<(), W::Error> {
+        let max = segment.into().unwrap_or_else(|| value.len());
+        let max = core::cmp::max(max, 1);
+
+        if max >= value.len() {
+            self.push(Header::Bytes(Some(value.len())))?;
+            self.write_all(value)?;
+        } else {
+            self.push(Header::Bytes(None))?;
+
+            for chunk in value.chunks(max) {
+                self.push(Header::Bytes(Some(chunk.len())))?;
+                self.write_all(chunk)?;
+            }
+
+            self.push(Header::Break)?;
+        }
+
+        Ok(())
+    }
+
+    /// Serialize a string slice as CBOR
+    ///
+    /// Optionally, segment the output into `segment` size segments. Note that
+    /// since care is taken to ensure that each segment is itself a valid UTF-8
+    /// string, if `segment` contains a value of less than 4, it will be
+    /// silently upgraded to 4. This minimum value is highly inefficient and
+    /// should not be relied upon.
+    #[inline]
+    pub fn text(&mut self, value: &str, segment: impl Into<Option<usize>>) -> Result<(), W::Error> {
+        let max = segment.into().unwrap_or_else(|| value.len());
+        let max = core::cmp::max(max, 4);
+
+        if max >= value.len() {
+            self.push(Header::Text(Some(value.len())))?;
+            self.write_all(value.as_bytes())?;
+        } else {
+            self.push(Header::Text(None))?;
+
+            let mut bytes = value.as_bytes();
+            while !bytes.is_empty() {
+                let mut len = core::cmp::min(bytes.len(), max);
+                while len > 0 && core::str::from_utf8(&bytes[..len]).is_err() {
+                    len -= 1
+                }
+
+                let (prefix, suffix) = bytes.split_at(len);
+                self.push(Header::Text(Some(prefix.len())))?;
+                self.write_all(prefix)?;
+                bytes = suffix;
+            }
+
+            self.push(Header::Break)?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/hdr.rs b/src/hdr.rs
new file mode 100644 (file)
index 0000000..dec1788
--- /dev/null
@@ -0,0 +1,163 @@
+use super::*;
+
+use half::f16;
+
+/// A semantic representation of a CBOR item header
+///
+/// This structure represents the valid values of a CBOR item header and is
+/// used extensively when serializing or deserializing CBOR items. Note well
+/// that this structure **DOES NOT** represent the body (i.e. suffix) of the
+/// CBOR item. You must parse the body yourself based on the contents of the
+/// `Header`. However, utility functions are provided for this (see:
+/// `Decoder::bytes()` and `Decoder::text()`).
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Header {
+    /// A positive integer
+    Positive(u64),
+
+    /// A negative integer
+    ///
+    /// Note well that this value has all bits inverted from a normal signed
+    /// integer. For example, to convert the `u64` to a `i128` you would do
+    /// this: `neg as i128 ^ !0`.
+    Negative(u64),
+
+    /// A floating point value
+    Float(f64),
+
+    /// A "simple" value
+    Simple(u8),
+
+    /// A tag
+    Tag(u64),
+
+    /// The "break" value
+    ///
+    /// This value is used to terminate indefinite length arrays and maps,
+    /// as well as segmented byte or text items.
+    Break,
+
+    /// A bytes item
+    ///
+    /// The value contained in this variant indicates the length of the bytes
+    /// which follow or, if `None`, segmented bytes input.
+    ///
+    /// A best practice is to call `Decoder::bytes()` immediately after
+    /// first pulling a bytes item header since this utility function
+    /// encapsulates all the logic needed to handle segmentation.
+    Bytes(Option<usize>),
+
+    /// A text item
+    ///
+    /// The value contained in this variant indicates the length of the text
+    /// which follows (in bytes) or, if `None`, segmented text input.
+    ///
+    /// A best practice is to call `Decoder::text()` immediately after
+    /// first pulling a text item header since this utility function
+    /// encapsulates all the logic needed to handle segmentation.
+    Text(Option<usize>),
+
+    /// An array item
+    ///
+    /// The value contained in this variant indicates the length of the array
+    /// which follows (in items) or, if `None`, an indefinite length array
+    /// terminated by a "break" value.
+    Array(Option<usize>),
+
+    /// An map item
+    ///
+    /// The value contained in this variant indicates the length of the map
+    /// which follows (in item pairs) or, if `None`, an indefinite length map
+    /// terminated by a "break" value.
+    Map(Option<usize>),
+}
+
+impl TryFrom<Title> for Header {
+    type Error = InvalidError;
+
+    fn try_from(title: Title) -> Result<Self, Self::Error> {
+        let opt = |minor| {
+            Some(match minor {
+                Minor::This(x) => x.into(),
+                Minor::Next1(x) => u8::from_be_bytes(x).into(),
+                Minor::Next2(x) => u16::from_be_bytes(x).into(),
+                Minor::Next4(x) => u32::from_be_bytes(x).into(),
+                Minor::Next8(x) => u64::from_be_bytes(x),
+                Minor::More => return None,
+            })
+        };
+
+        let int = |m| opt(m).ok_or(InvalidError(()));
+
+        let len = |m| {
+            opt(m)
+                .map(usize::try_from)
+                .transpose()
+                .or(Err(InvalidError(())))
+        };
+
+        Ok(match title {
+            Title(Major::Positive, minor) => Self::Positive(int(minor)?),
+            Title(Major::Negative, minor) => Self::Negative(int(minor)?),
+            Title(Major::Bytes, minor) => Self::Bytes(len(minor)?),
+            Title(Major::Text, minor) => Self::Text(len(minor)?),
+            Title(Major::Array, minor) => Self::Array(len(minor)?),
+            Title(Major::Map, minor) => Self::Map(len(minor)?),
+            Title(Major::Tag, minor) => Self::Tag(int(minor)?),
+
+            Title(Major::Other, Minor::More) => Self::Break,
+            Title(Major::Other, Minor::This(x)) => Self::Simple(x),
+            Title(Major::Other, Minor::Next1(x)) => Self::Simple(x[0]),
+            Title(Major::Other, Minor::Next2(x)) => Self::Float(f16::from_be_bytes(x).into()),
+            Title(Major::Other, Minor::Next4(x)) => Self::Float(f32::from_be_bytes(x).into()),
+            Title(Major::Other, Minor::Next8(x)) => Self::Float(f64::from_be_bytes(x)),
+        })
+    }
+}
+
+impl From<Header> for Title {
+    fn from(header: Header) -> Self {
+        let int = |i: u64| match i {
+            x if x <= 23 => Minor::This(i as u8),
+            x if x <= core::u8::MAX as u64 => Minor::Next1([i as u8]),
+            x if x <= core::u16::MAX as u64 => Minor::Next2((i as u16).to_be_bytes()),
+            x if x <= core::u32::MAX as u64 => Minor::Next4((i as u32).to_be_bytes()),
+            x => Minor::Next8(x.to_be_bytes()),
+        };
+
+        let len = |l: Option<usize>| l.map(|x| int(x as u64)).unwrap_or(Minor::More);
+
+        match header {
+            Header::Positive(x) => Title(Major::Positive, int(x)),
+            Header::Negative(x) => Title(Major::Negative, int(x)),
+            Header::Bytes(x) => Title(Major::Bytes, len(x)),
+            Header::Text(x) => Title(Major::Text, len(x)),
+            Header::Array(x) => Title(Major::Array, len(x)),
+            Header::Map(x) => Title(Major::Map, len(x)),
+            Header::Tag(x) => Title(Major::Tag, int(x)),
+
+            Header::Break => Title(Major::Other, Minor::More),
+
+            Header::Simple(x) => match x {
+                x @ 0..=23 => Title(Major::Other, Minor::This(x)),
+                x => Title(Major::Other, Minor::Next1([x])),
+            },
+
+            Header::Float(n64) => {
+                let n16 = f16::from_f64(n64);
+                let n32 = n64 as f32;
+
+                Title(
+                    Major::Other,
+                    if f64::from(n16).to_bits() == n64.to_bits() {
+                        Minor::Next2(n16.to_be_bytes())
+                    } else if f64::from(n32).to_bits() == n64.to_bits() {
+                        Minor::Next4(n32.to_be_bytes())
+                    } else {
+                        Minor::Next8(n64.to_be_bytes())
+                    },
+                )
+            }
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644 (file)
index 0000000..8a1fe90
--- /dev/null
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Low level CBOR parsing tools
+//!
+//! This crate contains low-level types for encoding and decoding items in
+//! CBOR. This crate is usable in both `no_std` and `no_alloc` environments.
+//! To understand how this crate works, first we will look at the structure
+//! of a CBOR item on the wire.
+//!
+//! # Anatomy of a CBOR Item
+//!
+//! This is a brief anatomy of a CBOR item on the wire.
+//!
+//! ```text
+//! +------------+-----------+
+//! |            |           |
+//! |   Major    |   Minor   |
+//! |  (3bits)   |  (5bits)  |
+//! |            |           |
+//! +------------+-----------+
+//! ^                        ^
+//! |                        |
+//! +-----+            +-----+
+//!       |            |
+//!       |            |
+//!       +----------------------------+--------------+
+//!       |            |               |              |
+//!       |   Prefix   |     Affix     |    Suffix    |
+//!       |  (1 byte)  |  (0-8 bytes)  |  (0+ bytes)  |
+//!       |            |               |              |
+//!       +------------+---------------+--------------+
+//!
+//!       |                            |              |
+//!       +------------+---------------+--------------+
+//!                    |                       |
+//!                    v                       v
+//!
+//!                  Header                   Body
+//! ```
+//!
+//! The `ciborium` crate works by providing the `Decoder` and `Encoder` types
+//! which provide input and output for a CBOR header (see: `Header`). From
+//! there, you can either handle the body yourself or use the provided utility
+//! functions.
+//!
+//! For more information on the CBOR format, see
+//! [RFC 7049](https://tools.ietf.org/html/rfc7049).
+//!
+//! # Decoding
+//!
+//! In order to decode CBOR, you will create a `Decoder` from a reader. The
+//! decoder instance will allow you to `Decoder::pull()` `Header` instances
+//! from the input.
+//!
+//! Most CBOR items are fully contained in their headers and therefore have no
+//! body. These items can be evaluated directly from the `Header` instance.
+//!
+//! Bytes and text items have a body but do not contain child items. Since
+//! both bytes and text values may be segmented, parsing them can be a bit
+//! tricky. Therefore, we provide helper functions to parse these types. See
+//! `Decoder::bytes()` and `Decoder::text()` for more details.
+//!
+//! Array and map items have a body which contains child items. These can be
+//! parsed by simply doing `Decoder::pull()` to parse the child items.
+//!
+//! ## Example
+//!
+//! ```rust
+//! use ciborium_ll::{Decoder, Header};
+//! use ciborium_io::Read as _;
+//!
+//! let input = b"\x6dHello, World!";
+//! let mut decoder = Decoder::from(&input[..]);
+//! let mut chunks = 0;
+//!
+//! match decoder.pull().unwrap() {
+//!     Header::Text(len) => {
+//!         let mut segments = decoder.text(len);
+//!         while let Some(mut segment) = segments.pull().unwrap() {
+//!             let mut buffer = [0u8; 7];
+//!             while let Some(chunk) = segment.pull(&mut buffer[..]).unwrap() {
+//!                  match chunk {
+//!                      "Hello, " if chunks == 0 => chunks = 1,
+//!                      "World!" if chunks == 1 => chunks = 2,
+//!                      _ => panic!("received unexpected chunk"),
+//!                  }
+//!             }
+//!         }
+//!     }
+//!
+//!     _ => panic!("received unexpected value"),
+//! }
+//!
+//! assert_eq!(chunks, 2);
+//! ```
+//!
+//! # Encoding
+//!
+//! To encode values to CBOR, create an `Encoder` from a writer. The encoder
+//! instance provides the `Encoder::push()` method to write a `Header` value
+//! to the wire. CBOR item bodies can be written directly.
+//!
+//! For bytes and text, there are the `Encoder::bytes()` and `Encoder::text()`
+//! utility functions, respectively, which will properly segment the output
+//! on the wire for you.
+//!
+//! ## Example
+//!
+//! ```rust
+//! use ciborium_ll::{Encoder, Header};
+//! use ciborium_io::Write as _;
+//!
+//! let mut buffer = [0u8; 19];
+//! let mut encoder = Encoder::from(&mut buffer[..]);
+//!
+//! // Write the structure
+//! encoder.push(Header::Map(Some(1))).unwrap();
+//! encoder.push(Header::Positive(7)).unwrap();
+//! encoder.text("Hello, World!", 7).unwrap();
+//!
+//! // Validate our output
+//! encoder.flush().unwrap();
+//! assert_eq!(b"\xa1\x07\x7f\x67Hello, \x66World!\xff", &buffer[..]);
+//! ```
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![deny(missing_docs)]
+#![deny(clippy::all)]
+#![deny(clippy::cargo)]
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+mod dec;
+mod enc;
+mod hdr;
+mod seg;
+
+pub use dec::*;
+pub use enc::*;
+pub use hdr::*;
+pub use seg::{Segment, Segments};
+
+/// Simple value constants
+pub mod simple {
+    #![allow(missing_docs)]
+
+    pub const FALSE: u8 = 20;
+    pub const TRUE: u8 = 21;
+    pub const NULL: u8 = 22;
+    pub const UNDEFINED: u8 = 23;
+}
+
+/// Tag constants
+pub mod tag {
+    #![allow(missing_docs)]
+
+    pub const BIGPOS: u64 = 2;
+    pub const BIGNEG: u64 = 3;
+}
+
+#[derive(Debug)]
+struct InvalidError(());
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum Major {
+    Positive,
+    Negative,
+    Bytes,
+    Text,
+    Array,
+    Map,
+    Tag,
+    Other,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum Minor {
+    This(u8),
+    Next1([u8; 1]),
+    Next2([u8; 2]),
+    Next4([u8; 4]),
+    Next8([u8; 8]),
+    More,
+}
+
+impl AsRef<[u8]> for Minor {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        match self {
+            Self::More => &[],
+            Self::This(..) => &[],
+            Self::Next1(x) => x.as_ref(),
+            Self::Next2(x) => x.as_ref(),
+            Self::Next4(x) => x.as_ref(),
+            Self::Next8(x) => x.as_ref(),
+        }
+    }
+}
+
+impl AsMut<[u8]> for Minor {
+    #[inline]
+    fn as_mut(&mut self) -> &mut [u8] {
+        match self {
+            Self::More => &mut [],
+            Self::This(..) => &mut [],
+            Self::Next1(x) => x.as_mut(),
+            Self::Next2(x) => x.as_mut(),
+            Self::Next4(x) => x.as_mut(),
+            Self::Next8(x) => x.as_mut(),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+struct Title(pub Major, pub Minor);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    macro_rules! neg {
+        ($i:expr) => {
+            Header::Negative((($i as i128) ^ !0) as u64)
+        };
+    }
+
+    #[allow(clippy::excessive_precision)]
+    #[test]
+    fn leaf() {
+        use core::f64::{INFINITY, NAN};
+
+        let data = &[
+            (Header::Positive(0), "00", true),
+            (Header::Positive(1), "01", true),
+            (Header::Positive(10), "0a", true),
+            (Header::Positive(23), "17", true),
+            (Header::Positive(24), "1818", true),
+            (Header::Positive(25), "1819", true),
+            (Header::Positive(100), "1864", true),
+            (Header::Positive(1000), "1903e8", true),
+            (Header::Positive(1000000), "1a000f4240", true),
+            (Header::Positive(1000000000000), "1b000000e8d4a51000", true),
+            (
+                Header::Positive(18446744073709551615),
+                "1bffffffffffffffff",
+                true,
+            ),
+            (neg!(-18446744073709551616), "3bffffffffffffffff", true),
+            (neg!(-1), "20", true),
+            (neg!(-10), "29", true),
+            (neg!(-100), "3863", true),
+            (neg!(-1000), "3903e7", true),
+            (Header::Float(0.0), "f90000", true),
+            (Header::Float(-0.0), "f98000", true),
+            (Header::Float(1.0), "f93c00", true),
+            (Header::Float(1.1), "fb3ff199999999999a", true),
+            (Header::Float(1.5), "f93e00", true),
+            (Header::Float(65504.0), "f97bff", true),
+            (Header::Float(100000.0), "fa47c35000", true),
+            (Header::Float(3.4028234663852886e+38), "fa7f7fffff", true),
+            (Header::Float(1.0e+300), "fb7e37e43c8800759c", true),
+            (Header::Float(5.960464477539063e-8), "f90001", true),
+            (Header::Float(0.00006103515625), "f90400", true),
+            (Header::Float(-4.0), "f9c400", true),
+            (Header::Float(-4.1), "fbc010666666666666", true),
+            (Header::Float(INFINITY), "f97c00", true),
+            (Header::Float(NAN), "f97e00", true),
+            (Header::Float(-INFINITY), "f9fc00", true),
+            (Header::Float(INFINITY), "fa7f800000", false),
+            (Header::Float(NAN), "fa7fc00000", false),
+            (Header::Float(-INFINITY), "faff800000", false),
+            (Header::Float(INFINITY), "fb7ff0000000000000", false),
+            (Header::Float(NAN), "fb7ff8000000000000", false),
+            (Header::Float(-INFINITY), "fbfff0000000000000", false),
+            (Header::Simple(simple::FALSE), "f4", true),
+            (Header::Simple(simple::TRUE), "f5", true),
+            (Header::Simple(simple::NULL), "f6", true),
+            (Header::Simple(simple::UNDEFINED), "f7", true),
+            (Header::Simple(16), "f0", true),
+            (Header::Simple(24), "f818", true),
+            (Header::Simple(255), "f8ff", true),
+            (Header::Tag(0), "c0", true),
+            (Header::Tag(1), "c1", true),
+            (Header::Tag(23), "d7", true),
+            (Header::Tag(24), "d818", true),
+            (Header::Tag(32), "d820", true),
+            (Header::Bytes(Some(0)), "40", true),
+            (Header::Bytes(Some(4)), "44", true),
+            (Header::Text(Some(0)), "60", true),
+            (Header::Text(Some(4)), "64", true),
+        ];
+
+        for (header, bytes, encode) in data.iter().cloned() {
+            let bytes = hex::decode(bytes).unwrap();
+
+            let mut decoder = Decoder::from(&bytes[..]);
+            match (header, decoder.pull().unwrap()) {
+                // NaN equality...
+                (Header::Float(l), Header::Float(r)) if l.is_nan() && r.is_nan() => (),
+
+                // Everything else...
+                (l, r) => assert_eq!(l, r),
+            }
+
+            if encode {
+                let mut buffer = [0u8; 1024];
+                let mut writer = &mut buffer[..];
+                let mut encoder = Encoder::from(&mut writer);
+                encoder.push(header).unwrap();
+
+                let len = writer.len();
+                assert_eq!(&bytes[..], &buffer[..1024 - len]);
+            }
+        }
+    }
+
+    #[test]
+    fn node() {
+        let data: &[(&str, &[Header])] = &[
+            ("80", &[Header::Array(Some(0))]),
+            (
+                "83010203",
+                &[
+                    Header::Array(Some(3)),
+                    Header::Positive(1),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                ],
+            ),
+            (
+                "98190102030405060708090a0b0c0d0e0f101112131415161718181819",
+                &[
+                    Header::Array(Some(25)),
+                    Header::Positive(1),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Positive(4),
+                    Header::Positive(5),
+                    Header::Positive(6),
+                    Header::Positive(7),
+                    Header::Positive(8),
+                    Header::Positive(9),
+                    Header::Positive(10),
+                    Header::Positive(11),
+                    Header::Positive(12),
+                    Header::Positive(13),
+                    Header::Positive(14),
+                    Header::Positive(15),
+                    Header::Positive(16),
+                    Header::Positive(17),
+                    Header::Positive(18),
+                    Header::Positive(19),
+                    Header::Positive(20),
+                    Header::Positive(21),
+                    Header::Positive(22),
+                    Header::Positive(23),
+                    Header::Positive(24),
+                    Header::Positive(25),
+                ],
+            ),
+            ("a0", &[Header::Map(Some(0))]),
+            (
+                "a201020304",
+                &[
+                    Header::Map(Some(2)),
+                    Header::Positive(1),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Positive(4),
+                ],
+            ),
+            ("9fff", &[Header::Array(None), Header::Break]),
+            (
+                "9f018202039f0405ffff",
+                &[
+                    Header::Array(None),
+                    Header::Positive(1),
+                    Header::Array(Some(2)),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Array(None),
+                    Header::Positive(4),
+                    Header::Positive(5),
+                    Header::Break,
+                    Header::Break,
+                ],
+            ),
+            (
+                "9f01820203820405ff",
+                &[
+                    Header::Array(None),
+                    Header::Positive(1),
+                    Header::Array(Some(2)),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Array(Some(2)),
+                    Header::Positive(4),
+                    Header::Positive(5),
+                    Header::Break,
+                ],
+            ),
+            (
+                "83018202039f0405ff",
+                &[
+                    Header::Array(Some(3)),
+                    Header::Positive(1),
+                    Header::Array(Some(2)),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Array(None),
+                    Header::Positive(4),
+                    Header::Positive(5),
+                    Header::Break,
+                ],
+            ),
+            (
+                "83019f0203ff820405",
+                &[
+                    Header::Array(Some(3)),
+                    Header::Positive(1),
+                    Header::Array(None),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Break,
+                    Header::Array(Some(2)),
+                    Header::Positive(4),
+                    Header::Positive(5),
+                ],
+            ),
+            (
+                "9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff",
+                &[
+                    Header::Array(None),
+                    Header::Positive(1),
+                    Header::Positive(2),
+                    Header::Positive(3),
+                    Header::Positive(4),
+                    Header::Positive(5),
+                    Header::Positive(6),
+                    Header::Positive(7),
+                    Header::Positive(8),
+                    Header::Positive(9),
+                    Header::Positive(10),
+                    Header::Positive(11),
+                    Header::Positive(12),
+                    Header::Positive(13),
+                    Header::Positive(14),
+                    Header::Positive(15),
+                    Header::Positive(16),
+                    Header::Positive(17),
+                    Header::Positive(18),
+                    Header::Positive(19),
+                    Header::Positive(20),
+                    Header::Positive(21),
+                    Header::Positive(22),
+                    Header::Positive(23),
+                    Header::Positive(24),
+                    Header::Positive(25),
+                    Header::Break,
+                ],
+            ),
+        ];
+
+        for (bytes, headers) in data {
+            let bytes = hex::decode(bytes).unwrap();
+
+            // Test decoding
+            let mut decoder = Decoder::from(&bytes[..]);
+            for header in headers.iter().cloned() {
+                assert_eq!(header, decoder.pull().unwrap());
+            }
+
+            // Test encoding
+            let mut buffer = [0u8; 1024];
+            let mut writer = &mut buffer[..];
+            let mut encoder = Encoder::from(&mut writer);
+
+            for header in headers.iter().cloned() {
+                encoder.push(header).unwrap();
+            }
+
+            let len = writer.len();
+            assert_eq!(&bytes[..], &buffer[..1024 - len]);
+        }
+    }
+}
diff --git a/src/seg.rs b/src/seg.rs
new file mode 100644 (file)
index 0000000..d75ebae
--- /dev/null
@@ -0,0 +1,213 @@
+use super::*;
+
+use ciborium_io::Read;
+
+use core::marker::PhantomData;
+
+/// A parser for incoming segments
+pub trait Parser: Default {
+    /// The type of item that is parsed
+    type Item: ?Sized;
+
+    /// The parsing error that may occur
+    type Error;
+
+    /// The main parsing function
+    ///
+    /// This function processes the incoming bytes and returns the item.
+    ///
+    /// One important detail that **MUST NOT** be overlooked is that the
+    /// parser may save data from a previous parsing attempt. The number of
+    /// bytes saved is indicated by the `Parser::saved()` function. The saved
+    /// bytes will be copied into the beginning of the `bytes` array before
+    /// processing. Therefore, two requirements should be met.
+    ///
+    /// First, the incoming byte slice should be larger than the saved bytes.
+    ///
+    /// Second, the incoming byte slice should contain new bytes only after
+    /// the saved byte prefix.
+    ///
+    /// If both criteria are met, this allows the parser to prepend its saved
+    /// bytes without any additional allocation.
+    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
+
+    /// Indicates the number of saved bytes in the parser
+    fn saved(&self) -> usize {
+        0
+    }
+}
+
+/// A bytes parser
+///
+/// No actual processing is performed and the input bytes are directly
+/// returned. This implies that this parser never saves any bytes internally.
+#[derive(Default)]
+pub struct Bytes(());
+
+impl Parser for Bytes {
+    type Item = [u8];
+    type Error = core::convert::Infallible;
+
+    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
+        Ok(bytes)
+    }
+}
+
+/// A text parser
+///
+/// This parser converts the input bytes to a `str`. This parser preserves
+/// trailing invalid UTF-8 sequences in the case that chunking fell in the
+/// middle of a valid UTF-8 character.
+#[derive(Default)]
+pub struct Text {
+    stored: usize,
+    buffer: [u8; 3],
+}
+
+impl Parser for Text {
+    type Item = str;
+    type Error = core::str::Utf8Error;
+
+    fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
+        // If we cannot advance, return nothing.
+        if bytes.len() <= self.stored {
+            return Ok("");
+        }
+
+        // Copy previously invalid data into place.
+        bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
+
+        Ok(match core::str::from_utf8(bytes) {
+            Ok(s) => s,
+            Err(e) => {
+                let valid_len = e.valid_up_to();
+                let invalid_len = bytes.len() - valid_len;
+
+                // If the size of the invalid UTF-8 is large enough to hold
+                // all valid UTF-8 characters, we have a syntax error.
+                if invalid_len > self.buffer.len() {
+                    return Err(e);
+                }
+
+                // Otherwise, store the invalid bytes for the next read cycle.
+                self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
+                self.stored = invalid_len;
+
+                // Decode the valid part of the string.
+                core::str::from_utf8(&bytes[..valid_len]).unwrap()
+            }
+        })
+    }
+
+    fn saved(&self) -> usize {
+        self.stored
+    }
+}
+
+/// A CBOR segment
+///
+/// This type represents a single bytes or text segment on the wire. It can be
+/// read out in parsed chunks based on the size of the input scratch buffer.
+pub struct Segment<'r, R: Read, P: Parser> {
+    reader: &'r mut Decoder<R>,
+    unread: usize,
+    offset: usize,
+    parser: P,
+}
+
+impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
+    /// Gets the number of unprocessed bytes
+    #[inline]
+    pub fn left(&self) -> usize {
+        self.unread + self.parser.saved()
+    }
+
+    /// Gets the next parsed chunk within the segment
+    ///
+    /// Returns `Ok(None)` when all chunks have been read.
+    #[inline]
+    pub fn pull<'a>(
+        &mut self,
+        buffer: &'a mut [u8],
+    ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
+        use core::cmp::min;
+
+        let prev = self.parser.saved();
+        match self.unread {
+            0 if prev == 0 => return Ok(None),
+            0 => return Err(Error::Syntax(self.offset)),
+            _ => (),
+        }
+
+        // Determine how many bytes to read.
+        let size = min(buffer.len(), prev + self.unread);
+        let full = &mut buffer[..size];
+        let next = &mut full[min(size, prev)..];
+
+        // Read additional bytes.
+        self.reader.read_exact(next)?;
+        self.unread -= next.len();
+
+        self.parser
+            .parse(full)
+            .or(Err(Error::Syntax(self.offset)))
+            .map(Some)
+    }
+}
+
+/// A sequence of CBOR segments
+///
+/// CBOR allows for bytes or text items to be segmented. This type represents
+/// the state of that segmented input stream.
+pub struct Segments<'r, R: Read, P: Parser> {
+    reader: &'r mut Decoder<R>,
+    finish: bool,
+    nested: usize,
+    parser: PhantomData<P>,
+    unwrap: fn(Header) -> Result<Option<usize>, ()>,
+}
+
+impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
+    #[inline]
+    pub(crate) fn new(
+        decoder: &'r mut Decoder<R>,
+        unwrap: fn(Header) -> Result<Option<usize>, ()>,
+    ) -> Self {
+        Self {
+            reader: decoder,
+            finish: false,
+            nested: 0,
+            parser: PhantomData,
+            unwrap,
+        }
+    }
+
+    /// Gets the next segment in the stream
+    ///
+    /// Returns `Ok(None)` at the conclusion of the stream.
+    #[inline]
+    pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
+        while !self.finish {
+            let offset = self.reader.offset();
+            match self.reader.pull()? {
+                Header::Break if self.nested == 1 => return Ok(None),
+                Header::Break if self.nested > 1 => self.nested -= 1,
+                header => match (self.unwrap)(header) {
+                    Err(..) => return Err(Error::Syntax(offset)),
+                    Ok(None) => self.nested += 1,
+                    Ok(Some(len)) => {
+                        self.finish = self.nested == 0;
+                        return Ok(Some(Segment {
+                            reader: self.reader,
+                            unread: len,
+                            offset,
+                            parser: P::default(),
+                        }));
+                    }
+                },
+            }
+        }
+
+        Ok(None)
+    }
+}