--- /dev/null
+use std::ffi::OsStr;
+
+pub trait OsStrExt: private::Sealed {
+ /// Converts to a string slice.
+ fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
+ /// Returns `true` if the given pattern matches a sub-slice of
+ /// this string slice.
+ ///
+ /// Returns `false` if it does not.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use clap_lex::OsStrExt as _;
+ /// let bananas = std::ffi::OsStr::new("bananas");
+ ///
+ /// assert!(bananas.contains("nana"));
+ /// assert!(!bananas.contains("apples"));
+ /// ```
+ fn contains(&self, needle: &str) -> bool;
+ /// Returns the byte index of the first character of this string slice that
+ /// matches the pattern.
+ ///
+ /// Returns [`None`] if the pattern doesn't match.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use clap_lex::OsStrExt as _;
+ /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
+ ///
+ /// assert_eq!(s.find("L"), Some(0));
+ /// assert_eq!(s.find("é"), Some(14));
+ /// assert_eq!(s.find("par"), Some(17));
+ /// ```
+ ///
+ /// Not finding the pattern:
+ ///
+ /// ```rust
+ /// use clap_lex::OsStrExt as _;
+ /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
+ ///
+ /// assert_eq!(s.find("1"), None);
+ /// ```
+ fn find(&self, needle: &str) -> Option<usize>;
+ /// Returns a string slice with the prefix removed.
+ ///
+ /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
+ /// in `Some`.
+ ///
+ /// If the string does not start with `prefix`, returns `None`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
+ /// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
+ /// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
+ /// ```
+ fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
+ /// Returns `true` if the given pattern matches a prefix of this
+ /// string slice.
+ ///
+ /// Returns `false` if it does not.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use clap_lex::OsStrExt as _;
+ /// let bananas = std::ffi::OsStr::new("bananas");
+ ///
+ /// assert!(bananas.starts_with("bana"));
+ /// assert!(!bananas.starts_with("nana"));
+ /// ```
+ fn starts_with(&self, prefix: &str) -> bool;
+ /// An iterator over substrings of this string slice, separated by
+ /// characters matched by a pattern.
+ ///
+ /// # Examples
+ ///
+ /// Simple patterns:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
+ /// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
+ ///
+ /// let v: Vec<_> = OsStr::new("").split("X").collect();
+ /// assert_eq!(v, [OsStr::new("")]);
+ ///
+ /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
+ /// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
+ ///
+ /// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
+ /// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings in the output:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// let x = OsStr::new("||||a||b|c");
+ /// let d: Vec<_> = x.split("|").collect();
+ ///
+ /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
+ /// ```
+ ///
+ /// Contiguous separators are separated by the empty string.
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// let x = OsStr::new("(///)");
+ /// let d: Vec<_> = x.split("/").collect();
+ ///
+ /// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored
+ /// by empty strings.
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// let d: Vec<_> = OsStr::new("010").split("0").collect();
+ /// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it panics
+ ///
+ /// ```should_panic
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// let f: Vec<_> = OsStr::new("rust").split("").collect();
+ /// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
+ /// ```
+ ///
+ /// Contiguous separators can lead to possibly surprising behavior
+ /// when whitespace is used as the separator. This code is correct:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// let x = OsStr::new(" a b c");
+ /// let d: Vec<_> = x.split(" ").collect();
+ ///
+ /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
+ /// ```
+ ///
+ /// It does _not_ give you:
+ ///
+ /// ```,ignore
+ /// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
+ /// ```
+ ///
+ /// Use [`split_whitespace`] for this behavior.
+ ///
+ /// [`split_whitespace`]: str::split_whitespace
+ fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
+ /// Divide one string slice into two at an index.
+ ///
+ /// The argument, `mid`, should be a byte offset from the start of the
+ /// string. It must also be on the boundary of a UTF-8 code point.
+ ///
+ /// The two slices returned go from the start of the string slice to `mid`,
+ /// and from `mid` to the end of the string slice.
+ ///
+ /// To get mutable string slices instead, see the [`split_at_mut`]
+ /// method.
+ ///
+ /// [`split_at_mut`]: str::split_at_mut
+ ///
+ /// # Panics
+ ///
+ /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
+ /// past the end of the last code point of the string slice.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use clap_lex::OsStrExt as _;
+ /// let s = std::ffi::OsStr::new("Per Martin-Löf");
+ ///
+ /// let (first, last) = s.split_at(3);
+ ///
+ /// assert_eq!("Per", first);
+ /// assert_eq!(" Martin-Löf", last);
+ /// ```
+ fn split_at(&self, index: usize) -> (&OsStr, &OsStr);
+ /// Splits the string on the first occurrence of the specified delimiter and
+ /// returns prefix before delimiter and suffix after delimiter.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ /// use clap_lex::OsStrExt as _;
+ /// assert_eq!(OsStr::new("cfg").split_once("="), None);
+ /// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
+ /// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
+ /// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
+ /// ```
+ fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
+}
+
+impl OsStrExt for OsStr {
+ fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
+ let bytes = to_bytes(self);
+ std::str::from_utf8(bytes)
+ }
+
+ fn contains(&self, needle: &str) -> bool {
+ self.find(needle).is_some()
+ }
+
+ fn find(&self, needle: &str) -> Option<usize> {
+ (0..=self.len().checked_sub(needle.len())?)
+ .find(|&x| to_bytes(self)[x..].starts_with(needle.as_bytes()))
+ }
+
+ fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
+ to_bytes(self)
+ .strip_prefix(prefix.as_bytes())
+ .map(to_os_str)
+ }
+ fn starts_with(&self, prefix: &str) -> bool {
+ to_bytes(self).starts_with(prefix.as_bytes())
+ }
+
+ fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
+ assert_ne!(needle, "");
+ Split {
+ haystack: Some(self),
+ needle,
+ }
+ }
+
+ fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
+ let (first, second) = to_bytes(self).split_at(index);
+ (to_os_str(first), to_os_str(second))
+ }
+
+ fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
+ let start = self.find(needle)?;
+ let end = start + needle.len();
+ let haystack = to_bytes(self);
+ let first = &haystack[0..start];
+ let second = &haystack[end..];
+ Some((to_os_str(first), to_os_str(second)))
+ }
+}
+
+mod private {
+ pub trait Sealed {}
+
+ impl Sealed for std::ffi::OsStr {}
+}
+
+/// Allow access to raw bytes
+///
+/// **Note:** the bytes only make sense when compared with ASCII or `&str`
+///
+/// **Note:** This must never be serialized as there is no guarantee at how invalid UTF-8 will be
+/// encoded, even within the same version of this crate (since its dependent on rustc version)
+fn to_bytes(s: &OsStr) -> &[u8] {
+ // SAFETY:
+ // - Lifetimes are the same
+ // - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
+ // - The primary contract is that the encoding for invalid surrogate code points is not
+ // guaranteed which isn't a problem here
+ //
+ // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
+ // but its in limbo
+ unsafe { std::mem::transmute(s) }
+}
+
+/// Restore raw bytes as `OsStr`
+fn to_os_str(s: &[u8]) -> &OsStr {
+ // SAFETY:
+ // - Lifetimes are the same
+ // - Types are compatible (`OsStr` is a transparent wrapper for `[u8]`)
+ // - The primary contract is that the encoding for invalid surrogate code points is not
+ // guaranteed which isn't a problem here
+ //
+ // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
+ // but its in limbo
+ unsafe { std::mem::transmute(s) }
+}
+
+pub struct Split<'s, 'n> {
+ haystack: Option<&'s OsStr>,
+ needle: &'n str,
+}
+
+impl<'s, 'n> Iterator for Split<'s, 'n> {
+ type Item = &'s OsStr;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let haystack = self.haystack?;
+ match haystack.split_once(self.needle) {
+ Some((first, second)) => {
+ if !haystack.is_empty() {
+ debug_assert_ne!(haystack, second);
+ }
+ self.haystack = Some(second);
+ Some(first)
+ }
+ None => {
+ self.haystack = None;
+ Some(haystack)
+ }
+ }
+ }
+}
//!
//! ```rust
//! use std::path::PathBuf;
+//! use std::ffi::OsStr;
//!
//! type BoxedError = Box<dyn std::error::Error + Send + Sync>;
//!
//! }
//!
//! impl Color {
-//! fn parse(s: Option<&clap_lex::RawOsStr>) -> Result<Self, BoxedError> {
+//! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> {
//! let s = s.map(|s| s.to_str().ok_or(s));
//! match s {
//! Some(Ok("always")) | Some(Ok("")) | None => {
//! return Err(format!("Unexpected flag: -{}", c).into());
//! }
//! Err(e) => {
-//! return Err(format!("Unexpected flag: -{}", e.to_str_lossy()).into());
+//! return Err(format!("Unexpected flag: -{}", e.to_string_lossy()).into());
//! }
//! }
//! }
//! } else {
-//! args.paths.push(PathBuf::from(arg.to_value_os().to_os_str().into_owned()));
+//! args.paths.push(PathBuf::from(arg.to_value_os().to_owned()));
//! }
//! }
//!
//! println!("{:?}", args);
//! ```
+mod ext;
+
use std::ffi::OsStr;
use std::ffi::OsString;
pub use std::io::SeekFrom;
-pub use os_str_bytes::RawOsStr;
-pub use os_str_bytes::RawOsString;
+pub use ext::OsStrExt;
/// Command-line arguments
#[derive(Default, Clone, Debug, PartialEq, Eq)]
/// Command-line Argument
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ParsedArg<'s> {
- inner: std::borrow::Cow<'s, RawOsStr>,
- utf8: Option<&'s str>,
+ inner: &'s OsStr,
}
impl<'s> ParsedArg<'s> {
fn new(inner: &'s OsStr) -> Self {
- let utf8 = inner.to_str();
- let inner = RawOsStr::new(inner);
- Self { inner, utf8 }
+ Self { inner }
}
/// Argument is length of 0
pub fn is_empty(&self) -> bool {
- self.inner.as_ref().is_empty()
+ self.inner.is_empty()
}
/// Does the argument look like a stdio argument (`-`)
pub fn is_stdio(&self) -> bool {
- self.inner.as_ref() == "-"
+ self.inner == "-"
}
/// Does the argument look like an argument escape (`--`)
pub fn is_escape(&self) -> bool {
- self.inner.as_ref() == "--"
+ self.inner == "--"
}
/// Does the argument look like a number
}
/// Treat as a long-flag
- pub fn to_long(&self) -> Option<(Result<&str, &RawOsStr>, Option<&RawOsStr>)> {
- if let Some(raw) = self.utf8 {
- let remainder = raw.strip_prefix("--")?;
- if remainder.is_empty() {
- debug_assert!(self.is_escape());
- return None;
- }
+ pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> {
+ let raw = self.inner;
+ let remainder = raw.strip_prefix("--")?;
+ if remainder.is_empty() {
+ debug_assert!(self.is_escape());
+ return None;
+ }
- let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
- (p0, Some(p1))
- } else {
- (remainder, None)
- };
- let flag = Ok(flag);
- let value = value.map(RawOsStr::from_str);
- Some((flag, value))
+ let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=") {
+ (p0, Some(p1))
} else {
- let raw = self.inner.as_ref();
- let remainder = raw.strip_prefix("--")?;
- if remainder.is_empty() {
- debug_assert!(self.is_escape());
- return None;
- }
-
- let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
- (p0, Some(p1))
- } else {
- (remainder, None)
- };
- let flag = flag.to_str().ok_or(flag);
- Some((flag, value))
- }
+ (remainder, None)
+ };
+ let flag = flag.to_str().ok_or(flag);
+ Some((flag, value))
}
/// Can treat as a long-flag
pub fn is_long(&self) -> bool {
- self.inner.as_ref().starts_with("--") && !self.is_escape()
+ self.inner.starts_with("--") && !self.is_escape()
}
/// Treat as a short-flag
pub fn to_short(&self) -> Option<ShortFlags<'_>> {
- if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') {
- if remainder_os.starts_with('-') {
+ if let Some(remainder_os) = self.inner.strip_prefix("-") {
+ if remainder_os.starts_with("-") {
None
} else if remainder_os.is_empty() {
debug_assert!(self.is_stdio());
None
} else {
- let remainder = self.utf8.map(|s| &s[1..]);
- Some(ShortFlags::new(remainder_os, remainder))
+ Some(ShortFlags::new(remainder_os))
}
} else {
None
/// Can treat as a short-flag
pub fn is_short(&self) -> bool {
- self.inner.as_ref().starts_with('-')
- && !self.is_stdio()
- && !self.inner.as_ref().starts_with("--")
+ self.inner.starts_with("-") && !self.is_stdio() && !self.inner.starts_with("--")
}
/// Treat as a value
///
/// **NOTE:** May return a flag or an escape.
- pub fn to_value_os(&self) -> &RawOsStr {
- self.inner.as_ref()
+ pub fn to_value_os(&self) -> &OsStr {
+ self.inner
}
/// Treat as a value
///
/// **NOTE:** May return a flag or an escape.
- pub fn to_value(&self) -> Result<&str, &RawOsStr> {
- self.utf8.ok_or_else(|| self.inner.as_ref())
+ pub fn to_value(&self) -> Result<&str, &OsStr> {
+ self.inner.to_str().ok_or(self.inner)
}
/// Safely print an argument that may contain non-UTF8 content
///
/// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
pub fn display(&self) -> impl std::fmt::Display + '_ {
- self.inner.to_str_lossy()
+ self.inner.to_string_lossy()
}
}
/// Walk through short flags within a [`ParsedArg`]
#[derive(Clone, Debug)]
pub struct ShortFlags<'s> {
- inner: &'s RawOsStr,
+ inner: &'s OsStr,
utf8_prefix: std::str::CharIndices<'s>,
- invalid_suffix: Option<&'s RawOsStr>,
+ invalid_suffix: Option<&'s OsStr>,
}
impl<'s> ShortFlags<'s> {
- fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self {
- let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 {
- (utf8, None)
- } else {
- split_nonutf8_once(inner)
- };
+ fn new(inner: &'s OsStr) -> Self {
+ let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner);
let utf8_prefix = utf8_prefix.char_indices();
Self {
inner,
/// Advance the iterator, returning the next short flag on success
///
/// On error, returns the invalid-UTF8 value
- pub fn next_flag(&mut self) -> Option<Result<char, &'s RawOsStr>> {
+ pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> {
if let Some((_, flag)) = self.utf8_prefix.next() {
return Some(Ok(flag));
}
}
/// Advance the iterator, returning everything left as a value
- pub fn next_value_os(&mut self) -> Option<&'s RawOsStr> {
+ pub fn next_value_os(&mut self) -> Option<&'s OsStr> {
if let Some((index, _)) = self.utf8_prefix.next() {
self.utf8_prefix = "".char_indices();
self.invalid_suffix = None;
- return Some(&self.inner[index..]);
+ return Some(self.inner.split_at(index).1);
}
if let Some(suffix) = self.invalid_suffix {
}
impl<'s> Iterator for ShortFlags<'s> {
- type Item = Result<char, &'s RawOsStr>;
+ type Item = Result<char, &'s OsStr>;
fn next(&mut self) -> Option<Self::Item> {
self.next_flag()
}
}
-fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) {
- match std::str::from_utf8(b.as_raw_bytes()) {
+fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) {
+ match b.try_str() {
Ok(s) => (s, None),
Err(err) => {
let (valid, after_valid) = b.split_at(err.valid_up_to());
- let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap();
+ let valid = valid.try_str().unwrap();
(valid, Some(after_valid))
}
}