From 9695dd839e27920c64d34fe467c9af194ed0ce72 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 22 Mar 2023 16:12:17 +0900 Subject: [PATCH] Import peg-runtime 0.8.1 --- .cargo_vcs_info.json | 6 ++ Cargo.toml | 22 ++++++++ Cargo.toml.orig | 11 ++++ LICENSE | 25 +++++++++ error.rs | 128 +++++++++++++++++++++++++++++++++++++++++++ lib.rs | 52 ++++++++++++++++++ slice.rs | 45 +++++++++++++++ str.rs | 74 +++++++++++++++++++++++++ 8 files changed, 363 insertions(+) create mode 100644 .cargo_vcs_info.json create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 100644 LICENSE create mode 100644 error.rs create mode 100644 lib.rs create mode 100644 slice.rs create mode 100644 str.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..5128d25 --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "4a99950b26cc9cb7fd483ef0983c20e66f68eb2f" + }, + "path_in_vcs": "peg-runtime" +} \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d7ee346 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,22 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "peg-runtime" +version = "0.8.1" +authors = ["Kevin Mehall "] +description = "Runtime support for rust-peg grammars. To use rust-peg, see the `peg` crate." +license = "MIT" +repository = "https://github.com/kevinmehall/rust-peg" + +[lib] +path = "lib.rs" diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..ed2d549 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,11 @@ +[package] +name = "peg-runtime" +version = "0.8.1" +authors = [ "Kevin Mehall " ] +license = "MIT" +repository = "https://github.com/kevinmehall/rust-peg" +description = "Runtime support for rust-peg grammars. To use rust-peg, see the `peg` crate." +edition = "2018" + +[lib] +path = "lib.rs" \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d8f7bea --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +Copyright (C) 2013 Kevin Mehall + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/error.rs b/error.rs new file mode 100644 index 0000000..cc017d8 --- /dev/null +++ b/error.rs @@ -0,0 +1,128 @@ +//! Parse error reporting + +use crate::{Parse, RuleResult}; +use std::collections::HashSet; +use std::fmt::{self, Debug, Display}; + +/// A set of literals or names that failed to match +#[derive(PartialEq, Eq, Debug, Clone)] +pub struct ExpectedSet { + expected: HashSet<&'static str>, +} + +impl ExpectedSet { + /// Iterator of expected literals + pub fn tokens<'a>(&'a self) -> impl Iterator + 'a { + self.expected.iter().map(|x| *x) + } +} + +impl Display for ExpectedSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + if self.expected.is_empty() { + write!(fmt, "")?; + } else if self.expected.len() == 1 { + write!(fmt, "{}", self.expected.iter().next().unwrap())?; + } else { + let mut errors = self.tokens().collect::>(); + errors.sort(); + let mut iter = errors.into_iter(); + + write!(fmt, "one of {}", iter.next().unwrap())?; + for elem in iter { + write!(fmt, ", {}", elem)?; + } + } + + Ok(()) + } +} + +/// A parse failure. +#[derive(PartialEq, Eq, Debug, Clone)] +pub struct ParseError { + /// The furthest position the parser reached in the input before failing. + pub location: L, + + /// The set of literals that failed to match at that position. + pub expected: ExpectedSet, +} + +impl Display for ParseError { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::result::Result<(), ::std::fmt::Error> { + write!( + fmt, + "error at {}: expected {}", + self.location, self.expected + ) + } +} + +impl ::std::error::Error for ParseError { + fn description(&self) -> &str { + "parse error" + } +} + +#[doc(hidden)] +pub struct ErrorState { + /// Furthest failure we've hit so far. + pub max_err_pos: usize, + + /// Are we inside a lookahead/quiet block? If so, failure is disabled. + /// Non-zero => yes, to support nested blocks. + pub suppress_fail: usize, + + /// Are we reparsing after a failure? If so, compute and store expected set of all alternative expectations + /// when we are at offset `max_err_pos`. + pub reparsing_on_error: bool, + + /// The set of tokens we expected to find when we hit the failure. Updated when `reparsing_on_error`. + pub expected: ExpectedSet, +} + +impl ErrorState { + pub fn new(initial_pos: usize) -> Self { + ErrorState { + max_err_pos: initial_pos, + suppress_fail: 0, + reparsing_on_error: false, + expected: ExpectedSet { + expected: HashSet::new(), + }, + } + } + + /// Set up for reparsing to record the details of the furthest failure. + pub fn reparse_for_error(&mut self) { + self.suppress_fail = 0; + self.reparsing_on_error = true; + } + + #[inline(never)] + pub fn mark_failure_slow_path(&mut self, pos: usize, expected: &'static str) { + if pos == self.max_err_pos { + self.expected.expected.insert(expected); + } + } + + /// Flag a failure. + #[inline(always)] + pub fn mark_failure(&mut self, pos: usize, expected: &'static str) -> RuleResult<()> { + if self.suppress_fail == 0 { + if self.reparsing_on_error { + self.mark_failure_slow_path(pos, expected); + } else if pos > self.max_err_pos { + self.max_err_pos = pos; + } + } + RuleResult::Failed + } + + pub fn into_parse_error(self, input: &I) -> ParseError { + ParseError { + location: Parse::position_repr(input, self.max_err_pos.into()), + expected: self.expected, + } + } +} diff --git a/lib.rs b/lib.rs new file mode 100644 index 0000000..b351f24 --- /dev/null +++ b/lib.rs @@ -0,0 +1,52 @@ +use std::fmt::Display; + +pub mod error; +mod slice; +pub mod str; + +/// The result type used internally in the parser. +/// +/// You'll only need this if implementing the `Parse*` traits for a custom input +/// type. The public API of a parser adapts errors to `std::result::Result`. +#[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)] +pub enum RuleResult { + /// Success, with final location + Matched(usize, T), + + /// Failure (furthest failure location is not yet known) + Failed, +} + +/// A type that can be used as input to a parser. +#[allow(clippy::needless_lifetimes)] +pub trait Parse { + type PositionRepr: Display; + fn start<'input>(&'input self) -> usize; + fn is_eof<'input>(&'input self, p: usize) -> bool; + fn position_repr<'input>(&'input self, p: usize) -> Self::PositionRepr; +} + +/// A parser input type supporting the `[...]` syntax. +pub trait ParseElem<'input>: Parse { + /// Type of a single atomic element of the input, for example a character or token + type Element: Copy; + + /// Get the element at `pos`, or `Failed` if past end of input. + fn parse_elem(&'input self, pos: usize) -> RuleResult; +} + +/// A parser input type supporting the `"literal"` syntax. +pub trait ParseLiteral: Parse { + /// Attempt to match the `literal` string at `pos`, returning whether it + /// matched or failed. + fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()>; +} + +/// A parser input type supporting the `$()` syntax. +pub trait ParseSlice<'input>: Parse { + /// Type of a slice of the input. + type Slice; + + /// Get a slice of input. + fn parse_slice(&'input self, p1: usize, p2: usize) -> Self::Slice; +} diff --git a/slice.rs b/slice.rs new file mode 100644 index 0000000..03e426e --- /dev/null +++ b/slice.rs @@ -0,0 +1,45 @@ +use super::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; + +impl Parse for [T] { + type PositionRepr = usize; + fn start(&self) -> usize { + 0 + } + + fn is_eof(&self, pos: usize) -> bool { + pos >= self.len() + } + + fn position_repr(&self, pos: usize) -> usize { + pos + } +} + +impl<'input, T: 'input + Copy> ParseElem<'input> for [T] { + type Element = T; + + fn parse_elem(&'input self, pos: usize) -> RuleResult { + match self[pos..].first() { + Some(c) => RuleResult::Matched(pos + 1, *c), + None => RuleResult::Failed, + } + } +} + +impl ParseLiteral for [u8] { + fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { + let l = literal.len(); + if self.len() >= pos + l && &self[pos..pos + l] == literal.as_bytes() { + RuleResult::Matched(pos + l, ()) + } else { + RuleResult::Failed + } + } +} + +impl<'input, T: 'input> ParseSlice<'input> for [T] { + type Slice = &'input [T]; + fn parse_slice(&'input self, p1: usize, p2: usize) -> &'input [T] { + &self[p1..p2] + } +} diff --git a/str.rs b/str.rs new file mode 100644 index 0000000..7b07097 --- /dev/null +++ b/str.rs @@ -0,0 +1,74 @@ +//! Utilities for `str` input + +use super::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; +use std::fmt::Display; + +/// Line and column within a string +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub struct LineCol { + /// Line (1-indexed) + pub line: usize, + + /// Column (1-indexed) + pub column: usize, + + /// Byte offset from start of string (0-indexed) + pub offset: usize, +} + +impl Display for LineCol { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::result::Result<(), ::std::fmt::Error> { + write!(fmt, "{}:{}", self.line, self.column) + } +} + +impl Parse for str { + type PositionRepr = LineCol; + fn start(&self) -> usize { + 0 + } + + fn is_eof(&self, pos: usize) -> bool { + pos >= self.len() + } + + fn position_repr(&self, pos: usize) -> LineCol { + let before = &self[..pos]; + let line = before.as_bytes().iter().filter(|&&c| c == b'\n').count() + 1; + let column = before.chars().rev().take_while(|&c| c != '\n').count() + 1; + LineCol { + line, + column, + offset: pos, + } + } +} + +impl<'input> ParseElem<'input> for str { + type Element = char; + + fn parse_elem(&'input self, pos: usize) -> RuleResult { + match self[pos..].chars().next() { + Some(c) => RuleResult::Matched(pos + c.len_utf8(), c), + None => RuleResult::Failed, + } + } +} + +impl ParseLiteral for str { + fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { + let l = literal.len(); + if self.len() >= pos + l && &self.as_bytes()[pos..pos + l] == literal.as_bytes() { + RuleResult::Matched(pos + l, ()) + } else { + RuleResult::Failed + } + } +} + +impl<'input> ParseSlice<'input> for str { + type Slice = &'input str; + fn parse_slice(&'input self, p1: usize, p2: usize) -> &'input str { + &self[p1..p2] + } +} -- 2.34.1