From fcdec046d26ac55941e7b8a996583656111d6bbe Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 22 Mar 2023 15:19:10 +0900 Subject: [PATCH] Import convert_case 0.6.0 --- .cargo_vcs_info.json | 6 + .gitignore | 3 + Cargo.toml | 47 ++++ Cargo.toml.orig | 32 +++ LICENSE | 21 ++ README.md | 71 ++++++ justfile | 32 +++ src/case.rs | 397 ++++++++++++++++++++++++++++++ src/converter.rs | 393 ++++++++++++++++++++++++++++++ src/lib.rs | 661 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/pattern.rs | 354 +++++++++++++++++++++++++++ src/segmentation.rs | 459 +++++++++++++++++++++++++++++++++++ tests/string_types.rs | 41 ++++ 13 files changed, 2517 insertions(+) create mode 100644 .cargo_vcs_info.json create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 100644 LICENSE create mode 100644 README.md create mode 100644 justfile create mode 100644 src/case.rs create mode 100644 src/converter.rs create mode 100644 src/lib.rs create mode 100644 src/pattern.rs create mode 100644 src/segmentation.rs create mode 100644 tests/string_types.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..4047d7a --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "a8702a05217664ca59cb7471df68a91dcf4b91ee" + }, + "path_in_vcs": "" +} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9ddc34c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +/ccase/test/tmp +cobertura.xml diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d068ede --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,47 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "convert_case" +version = "0.6.0" +authors = ["Rutrum "] +description = "Convert strings into any case" +readme = "README.md" +keywords = [ + "casing", + "case", + "string", +] +categories = ["text-processing"] +license = "MIT" +repository = "https://github.com/rutrum/convert-case" + +[profile.release] +lto = true +codegen-units = 1 +panic = "abort" + +[dependencies.rand] +version = "^0.7" +optional = true + +[dependencies.unicode-segmentation] +version = "1.9.0" + +[dev-dependencies.strum] +version = "0.18.0" + +[dev-dependencies.strum_macros] +version = "0.18.0" + +[features] +random = ["rand"] diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..a0db5bb --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,32 @@ +[package] +name = "convert_case" +version = "0.6.0" +authors = ["Rutrum "] +edition = "2018" +description = "Convert strings into any case" +license = "MIT" +keywords = [ "casing", "case", "string" ] +categories = [ "text-processing" ] +readme = "README.md" +repository = "https://github.com/rutrum/convert-case" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[workspace] +members = ["ccase"] + +[profile.release] +codegen-units = 1 +lto = true +panic = 'abort' + +[features] +random = ["rand"] + +[dependencies] +rand = { version = "^0.7", optional = true } +unicode-segmentation = "1.9.0" + +[dev-dependencies] +strum = "0.18.0" +strum_macros = "0.18.0" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aea2ac6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 David Purdum + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f5c62df --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# Convert Case + +Converts to and from various cases. + +## Rust Library `convert_case` + +Convert case was written in Rust and is ready to be used inline with your rust code as a library. +```{rust} +use convert_case::{Case, Casing}; + +assert_eq!("ronnieJamesDio", "Ronnie_James_dio".to_case(Case::Camel)); +assert_eq!("io_stream", "IOStream".to_case(Case::Snake)); +assert_eq!( + "2020-04-16 My Cat Cali", + "2020-04-16_my_cat_cali".from_case(Case::Snake).to_case(Case::Title) +); +``` +You can read the API documentation on [docs.rs](https://docs.rs/convert_case/) for a list of all features and read lots of examples. + +## Command Line Utility `ccase` + +The command line utility `ccase` was made to leverage the tools in the `convert_case` library. +``` +$ ccase -t title super_mario_64 +Super Mario 64 + +$ ccase -f snake -t title 2020-04-15_my_cat_cali +2020-04-16 My Cat Cali + +$ ccase -t camel "convert to camel" +convertToCamel +``` + +You can read more about the `ccase` executable in the [`ccase` directory](https://github.com/rutrum/convert-case/tree/master/ccase) within this repository. + +## Links + +| | `convert_case` | `ccase` | +| --- | --- | --- | +| Repository | [github](https://github.com/rutrum/convert-case) | [github](https://github.com/rutrum/convert-case/tree/master/ccase) | +| Crate | [crates.io](https://crates.io/crates/convert_case) | [crates.io](https://crates.io/crates/ccase) | +| Documentation | [docs.rs](https://docs.rs/convert_case) | | + +## Cases + +This is list of cases that convert\_case supports. Some cases are simply aliases of others. The "Random" and "PseudoRandom" cases are provided in the `convert_case` library with the "random" feature, and are automatically provided in the `ccase` binary. + +| Case | Example | +| ---- | ------- | +| Upper | MY VARIABLE NAME | +| Lower | my variable name | +| Title | My Variable Name | +| Toggle | mY vARIABLE nAME | +| Alternating | mY vArIaBlE nAmE | +| Camel | myVariableName | +| Pascal | MyVariableName | +| UpperCamel | MyVariableName | +| Snake | my\_variable\_name | +| UpperSnake | MY\_VARIABLE\_NAME | +| ScreamingSnake | MY\_VARIABLE\_NAME | +| Kebab | my-variable-name | +| Cobol | MY-VARIABLE-NAME | +| Train | My-Variable-Name | +| Flat | myvariablename | +| UpperFlat | MYVARIABLENAME | +| Random | MY vaRiabLe nAME | +| PseudoRandom | mY VaRiAblE nAMe | + +## License + +Licensed under [MIT License](./LICENSE). diff --git a/justfile b/justfile new file mode 100644 index 0000000..54c8804 --- /dev/null +++ b/justfile @@ -0,0 +1,32 @@ +test: + cargo test --all + +watch-test: + watchexec -- "reset && just test" + +build: + cargo build --all + +watch-build: + watchexec -- "reset && just build" + +coverage: + cargo tarpaulin --all-features --out Xml && pycobertura show cobertura.xml + +doc: + cargo doc --all-features + +watch-doc: + watchexec -- "just doc && cargo test --all-features --doc" + +tree: + tree -I target + +test-ccase: build-ccase + cargo test -p ccase --no-fail-fast + +build-ccase: + cargo build -p ccase + +run *OPTIONS: + cargo run -p ccase -- {{OPTIONS}} diff --git a/src/case.rs b/src/case.rs new file mode 100644 index 0000000..fdcfb1c --- /dev/null +++ b/src/case.rs @@ -0,0 +1,397 @@ +#[cfg(test)] +use strum_macros::EnumIter; + +use crate::pattern::Pattern; +use crate::Boundary; + +/// Defines the type of casing a string can be. +/// +/// ``` +/// use convert_case::{Case, Casing}; +/// +/// let super_mario_title: String = "super_mario_64".to_case(Case::Title); +/// assert_eq!("Super Mario 64", super_mario_title); +/// ``` +/// +/// A case is the pair of a [pattern](enum.Pattern.html) and a delimeter (a string). Given +/// a list of words, a pattern describes how to mutate the words and a delimeter is how the mutated +/// words are joined together. These inherantly are the properties of what makes a "multiword +/// identifier case", or simply "case". +/// +/// This crate provides the ability to convert "from" a case. This introduces a different feature +/// of cases which are the [word boundaries](Boundary) that segment the identifier into words. For example, a +/// snake case identifier `my_var_name` can be split on underscores `_` to segment into words. A +/// camel case identifier `myVarName` is split where a lowercase letter is followed by an +/// uppercase letter. Each case is also associated with a list of boundaries that are used when +/// converting "from" a particular case. +#[cfg_attr(test, derive(EnumIter))] +#[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)] +pub enum Case { + /// Uppercase strings are delimited by spaces and all characters are uppercase. + /// * Boundaries: [Space](`Boundary::Space`) + /// * Pattern: [Uppercase](`Pattern::Uppercase`) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("MY VARIABLE NAME", "My variable NAME".to_case(Case::Upper)) + /// ``` + Upper, + + /// Lowercase strings are delimited by spaces and all characters are lowercase. + /// * Boundaries: [Space](`Boundary::Space`) + /// * Pattern: [Lowercase](`Pattern::Lowercase`) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("my variable name", "My variable NAME".to_case(Case::Lower)) + /// ``` + Lower, + + /// Title case strings are delimited by spaces. Only the leading character of + /// each word is uppercase. No inferences are made about language, so words + /// like "as", "to", and "for" will still be capitalized. + /// * Boundaries: [Space](`Boundary::Space`) + /// * Pattern: [Capital](`Pattern::Capital`) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("My Variable Name", "My variable NAME".to_case(Case::Title)) + /// ``` + Title, + + /// Toggle case strings are delimited by spaces. All characters are uppercase except + /// for the leading character of each word, which is lowercase. + /// * Boundaries: [Space](`Boundary::Space`) + /// * Pattern: [Toggle](`Pattern::Toggle`) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("mY vARIABLE nAME", "My variable NAME".to_case(Case::Toggle)) + /// ``` + Toggle, + + /// Camel case strings are lowercase, but for every word _except the first_ the + /// first letter is capitalized. + /// * Boundaries: [LowerUpper](Boundary::LowerUpper), [DigitUpper](Boundary::DigitUpper), + /// [UpperDigit](Boundary::UpperDigit), [DigitLower](Boundary::DigitLower), + /// [LowerDigit](Boundary::LowerDigit), [Acronym](Boundary::Acronym) + /// * Pattern: [Camel](`Pattern::Camel`) + /// * Delimeter: No delimeter + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("myVariableName", "My variable NAME".to_case(Case::Camel)) + /// ``` + Camel, + + /// Pascal case strings are lowercase, but for every word the + /// first letter is capitalized. + /// * Boundaries: [LowerUpper](Boundary::LowerUpper), [DigitUpper](Boundary::DigitUpper), + /// [UpperDigit](Boundary::UpperDigit), [DigitLower](Boundary::DigitLower), + /// [LowerDigit](Boundary::LowerDigit), [Acronym](Boundary::Acronym) + /// * Pattern: [Capital](`Pattern::Capital`) + /// * Delimeter: No delimeter + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("MyVariableName", "My variable NAME".to_case(Case::Pascal)) + /// ``` + Pascal, + + /// Upper camel case is an alternative name for [Pascal case](Case::Pascal). + UpperCamel, + + /// Snake case strings are delimited by underscores `_` and are all lowercase. + /// * Boundaries: [Underscore](Boundary::Underscore) + /// * Pattern: [Lowercase](Pattern::Lowercase) + /// * Delimeter: Underscore `_` + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("my_variable_name", "My variable NAME".to_case(Case::Snake)) + /// ``` + Snake, + + /// Upper snake case strings are delimited by underscores `_` and are all uppercase. + /// * Boundaries: [Underscore](Boundary::Underscore) + /// * Pattern: [Uppercase](Pattern::Uppercase) + /// * Delimeter: Underscore `_` + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("MY_VARIABLE_NAME", "My variable NAME".to_case(Case::UpperSnake)) + /// ``` + UpperSnake, + + /// Screaming snake case is an alternative name for [upper snake case](Case::UpperSnake). + ScreamingSnake, + + /// Kebab case strings are delimited by hyphens `-` and are all lowercase. + /// * Boundaries: [Hyphen](Boundary::Hyphen) + /// * Pattern: [Lowercase](Pattern::Lowercase) + /// * Delimeter: Hyphen `-` + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("my-variable-name", "My variable NAME".to_case(Case::Kebab)) + /// ``` + Kebab, + + /// Cobol case strings are delimited by hyphens `-` and are all uppercase. + /// * Boundaries: [Hyphen](Boundary::Hyphen) + /// * Pattern: [Uppercase](Pattern::Uppercase) + /// * Delimeter: Hyphen `-` + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("MY-VARIABLE-NAME", "My variable NAME".to_case(Case::Cobol)) + /// ``` + Cobol, + + /// Upper kebab case is an alternative name for [Cobol case](Case::Cobol). + UpperKebab, + + /// Train case strings are delimited by hyphens `-`. All characters are lowercase + /// except for the leading character of each word. + /// * Boundaries: [Hyphen](Boundary::Hyphen) + /// * Pattern: [Capital](Pattern::Capital) + /// * Delimeter: Hyphen `-` + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("My-Variable-Name", "My variable NAME".to_case(Case::Train)) + /// ``` + Train, + + /// Flat case strings are all lowercase, with no delimiter. Note that word boundaries are lost. + /// * Boundaries: No boundaries + /// * Pattern: [Lowercase](Pattern::Lowercase) + /// * Delimeter: No delimeter + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("myvariablename", "My variable NAME".to_case(Case::Flat)) + /// ``` + Flat, + + /// Upper flat case strings are all uppercase, with no delimiter. Note that word boundaries are lost. + /// * Boundaries: No boundaries + /// * Pattern: [Uppercase](Pattern::Uppercase) + /// * Delimeter: No delimeter + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("MYVARIABLENAME", "My variable NAME".to_case(Case::UpperFlat)) + /// ``` + UpperFlat, + + /// Alternating case strings are delimited by spaces. Characters alternate between uppercase + /// and lowercase. + /// * Boundaries: [Space](Boundary::Space) + /// * Pattern: [Alternating](Pattern::Alternating) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// assert_eq!("mY vArIaBlE nAmE", "My variable NAME".to_case(Case::Alternating)); + /// ``` + Alternating, + + /// Random case strings are delimited by spaces and characters are + /// randomly upper case or lower case. This uses the `rand` crate + /// and is only available with the "random" feature. + /// * Boundaries: [Space](Boundary::Space) + /// * Pattern: [Random](Pattern::Random) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// let new = "My variable NAME".to_case(Case::Random); + /// ``` + /// String `new` could be "My vaRIAbLE nAme" for example. + #[cfg(any(doc, feature = "random"))] + Random, + + /// Pseudo-random case strings are delimited by spaces and characters are randomly + /// upper case or lower case, but there will never more than two consecutive lower + /// case or upper case letters in a row. This uses the `rand` crate and is + /// only available with the "random" feature. + /// * Boundaries: [Space](Boundary::Space) + /// * Pattern: [PseudoRandom](Pattern::PseudoRandom) + /// * Delimeter: Space + /// + /// ``` + /// use convert_case::{Case, Casing}; + /// let new = "My variable NAME".to_case(Case::Random); + /// ``` + /// String `new` could be "mY vArIAblE NamE" for example. + #[cfg(any(doc, feature = "random"))] + PseudoRandom, +} + +impl Case { + /// Returns the delimiter used in the corresponding case. The following + /// table outlines which cases use which delimeter. + /// + /// | Cases | Delimeter | + /// | --- | --- | + /// | Upper, Lower, Title, Toggle, Alternating, Random, PseudoRandom | Space | + /// | Snake, UpperSnake, ScreamingSnake | Underscore `_` | + /// | Kebab, Cobol, UpperKebab, Train | Hyphen `-` | + /// | UpperFlat, Flat, Camel, UpperCamel, Pascal | Empty string, no delimeter | + pub const fn delim(&self) -> &'static str { + use Case::*; + match self { + Upper | Lower | Title | Toggle | Alternating => " ", + Snake | UpperSnake | ScreamingSnake => "_", + Kebab | Cobol | UpperKebab | Train => "-", + + #[cfg(feature = "random")] + Random | PseudoRandom => " ", + + UpperFlat | Flat | Camel | UpperCamel | Pascal => "", + } + } + + /// Returns the pattern used in the corresponding case. The following + /// table outlines which cases use which pattern. + /// + /// | Cases | Pattern | + /// | --- | --- | + /// | Upper, UpperSnake, ScreamingSnake, UpperFlat, Cobol, UpperKebab | Uppercase | + /// | Lower, Snake, Kebab, Flat | Lowercase | + /// | Title, Pascal, UpperCamel, Train | Capital | + /// | Camel | Camel | + /// | Alternating | Alternating | + /// | Random | Random | + /// | PseudoRandom | PseudoRandom | + pub const fn pattern(&self) -> Pattern { + use Case::*; + match self { + Upper | UpperSnake | ScreamingSnake | UpperFlat | Cobol | UpperKebab => { + Pattern::Uppercase + } + Lower | Snake | Kebab | Flat => Pattern::Lowercase, + Title | Pascal | UpperCamel | Train => Pattern::Capital, + Camel => Pattern::Camel, + Toggle => Pattern::Toggle, + Alternating => Pattern::Alternating, + + #[cfg(feature = "random")] + Random => Pattern::Random, + #[cfg(feature = "random")] + PseudoRandom => Pattern::PseudoRandom, + } + } + + /// Returns the boundaries used in the corresponding case. That is, where can word boundaries + /// be distinguished in a string of the given case. The table outlines which cases use which + /// set of boundaries. + /// + /// | Cases | Boundaries | + /// | --- | --- | + /// | Upper, Lower, Title, Toggle, Alternating, Random, PseudoRandom | Space | + /// | Snake, UpperSnake, ScreamingSnake | Underscore `_` | + /// | Kebab, Cobol, UpperKebab, Train | Hyphen `-` | + /// | Camel, UpperCamel, Pascal | LowerUpper, LowerDigit, UpperDigit, DigitLower, DigitUpper, Acronym | + /// | UpperFlat, Flat | No boundaries | + pub fn boundaries(&self) -> Vec { + use Boundary::*; + use Case::*; + match self { + Upper | Lower | Title | Toggle | Alternating => vec![Space], + Snake | UpperSnake | ScreamingSnake => vec![Underscore], + Kebab | Cobol | UpperKebab | Train => vec![Hyphen], + + #[cfg(feature = "random")] + Random | PseudoRandom => vec![Space], + + UpperFlat | Flat => vec![], + Camel | UpperCamel | Pascal => vec![ + LowerUpper, Acronym, LowerDigit, UpperDigit, DigitLower, DigitUpper, + ], + } + } + + // Created to avoid using the EnumIter trait from strum in + // final library. A test confirms that all cases are listed here. + /// Returns a vector with all case enum variants in no particular order. + pub fn all_cases() -> Vec { + use Case::*; + vec![ + Upper, + Lower, + Title, + Toggle, + Camel, + Pascal, + UpperCamel, + Snake, + UpperSnake, + ScreamingSnake, + Kebab, + Cobol, + UpperKebab, + Train, + Flat, + UpperFlat, + Alternating, + #[cfg(feature = "random")] + Random, + #[cfg(feature = "random")] + PseudoRandom, + ] + } + + /// Returns a vector with the two "random" feature cases `Random` and `PseudoRandom`. Only + /// defined in the "random" feature. + #[cfg(feature = "random")] + pub fn random_cases() -> Vec { + use Case::*; + vec![Random, PseudoRandom] + } + + /// Returns a vector with all the cases that do not depend on randomness. This is all + /// the cases not in the "random" feature. + pub fn deterministic_cases() -> Vec { + use Case::*; + vec![ + Upper, + Lower, + Title, + Toggle, + Camel, + Pascal, + UpperCamel, + Snake, + UpperSnake, + ScreamingSnake, + Kebab, + Cobol, + UpperKebab, + Train, + Flat, + UpperFlat, + Alternating, + ] + } +} + +#[cfg(test)] +mod test { + + use super::*; + use strum::IntoEnumIterator; + + #[test] + fn all_cases_in_iter() { + let all = Case::all_cases(); + for case in Case::iter() { + assert!(all.contains(&case)); + } + } +} diff --git a/src/converter.rs b/src/converter.rs new file mode 100644 index 0000000..344fdee --- /dev/null +++ b/src/converter.rs @@ -0,0 +1,393 @@ +use crate::segmentation; +use crate::Boundary; +use crate::Case; +use crate::Pattern; + +/// The parameters for performing a case conversion. +/// +/// A `Converter` stores three fields needed for case conversion. +/// 1) `boundaries`: how a string is segmented into _words_. +/// 2) `pattern`: how words are mutated, or how each character's case will change. +/// 3) `delim` or delimeter: how the mutated words are joined into the final string. +/// +/// Then calling [`convert`](Converter::convert) on a `Converter` will apply a case conversion +/// defined by those fields. The `Converter` struct is what is used underneath those functions +/// available in the `Casing` struct. +/// +/// You can use `Converter` when you need more specificity on conversion +/// than those provided in `Casing`, or if it is simply more convenient or explicit. +/// +/// ``` +/// use convert_case::{Boundary, Case, Casing, Converter, Pattern}; +/// +/// let s = "DialogueBox-border-shadow"; +/// +/// // Convert using Casing trait +/// assert_eq!( +/// "dialoguebox_border_shadow", +/// s.from_case(Case::Kebab).to_case(Case::Snake) +/// ); +/// +/// // Convert using similar functions on Converter +/// let conv = Converter::new() +/// .from_case(Case::Kebab) +/// .to_case(Case::Snake); +/// assert_eq!("dialoguebox_border_shadow", conv.convert(s)); +/// +/// // Convert by setting each field explicitly. +/// let conv = Converter::new() +/// .set_boundaries(&[Boundary::Hyphen]) +/// .set_pattern(Pattern::Lowercase) +/// .set_delim("_"); +/// assert_eq!("dialoguebox_border_shadow", conv.convert(s)); +/// ``` +/// +/// Or you can use `Converter` when you are trying to make a unique case +/// not provided as a variant of `Case`. +/// +/// ``` +/// use convert_case::{Boundary, Case, Casing, Converter, Pattern}; +/// +/// let dot_camel = Converter::new() +/// .set_boundaries(&[Boundary::LowerUpper, Boundary::LowerDigit]) +/// .set_pattern(Pattern::Camel) +/// .set_delim("."); +/// assert_eq!("collision.Shape.2d", dot_camel.convert("CollisionShape2D")); +/// ``` +pub struct Converter { + /// How a string is segmented into words. + pub boundaries: Vec, + + /// How each word is mutated before joining. In the case that there is no pattern, none of the + /// words will be mutated before joining and will maintain whatever case they were in the + /// original string. + pub pattern: Option, + + /// The string used to join mutated words together. + pub delim: String, +} + +impl Default for Converter { + fn default() -> Self { + Converter { + boundaries: Boundary::defaults(), + pattern: None, + delim: String::new(), + } + } +} + +impl Converter { + /// Creates a new `Converter` with default fields. This is the same as `Default::default()`. + /// The `Converter` will use `Boundary::defaults()` for boundaries, no pattern, and an empty + /// string as a delimeter. + /// ``` + /// use convert_case::Converter; + /// + /// let conv = Converter::new(); + /// assert_eq!("DeathPerennialQUEST", conv.convert("Death-Perennial QUEST")) + /// ``` + pub fn new() -> Self { + Self::default() + } + + /// Converts a string. + /// ``` + /// use convert_case::{Case, Converter}; + /// + /// let conv = Converter::new() + /// .to_case(Case::Camel); + /// assert_eq!("xmlHttpRequest", conv.convert("XML_HTTP_Request")) + /// ``` + pub fn convert(&self, s: T) -> String + where + T: AsRef, + { + let words = segmentation::split(&s, &self.boundaries); + if let Some(p) = self.pattern { + let words = words.iter().map(|s| s.as_ref()).collect::>(); + p.mutate(&words).join(&self.delim) + } else { + words.join(&self.delim) + } + } + + /// Set the pattern and delimiter to those associated with the given case. + /// ``` + /// use convert_case::{Case, Converter}; + /// + /// let conv = Converter::new() + /// .to_case(Case::Pascal); + /// assert_eq!("VariableName", conv.convert("variable name")) + /// ``` + pub fn to_case(mut self, case: Case) -> Self { + self.pattern = Some(case.pattern()); + self.delim = case.delim().to_string(); + self + } + + /// Sets the boundaries to those associated with the provided case. This is used + /// by the `from_case` function in the `Casing` trait. + /// ``` + /// use convert_case::{Case, Converter}; + /// + /// let conv = Converter::new() + /// .from_case(Case::Snake) + /// .to_case(Case::Title); + /// assert_eq!("Dot Productvalue", conv.convert("dot_productValue")) + /// ``` + pub fn from_case(mut self, case: Case) -> Self { + self.boundaries = case.boundaries(); + self + } + + /// Sets the boundaries to those provided. + /// ``` + /// use convert_case::{Boundary, Case, Converter}; + /// + /// let conv = Converter::new() + /// .set_boundaries(&[Boundary::Underscore, Boundary::LowerUpper]) + /// .to_case(Case::Lower); + /// assert_eq!("panic attack dream theater", conv.convert("panicAttack_dreamTheater")) + /// ``` + pub fn set_boundaries(mut self, bs: &[Boundary]) -> Self { + self.boundaries = bs.to_vec(); + self + } + + /// Adds a boundary to the list of boundaries. + /// ``` + /// use convert_case::{Boundary, Case, Converter}; + /// + /// let conv = Converter::new() + /// .from_case(Case::Title) + /// .add_boundary(Boundary::Hyphen) + /// .to_case(Case::Snake); + /// assert_eq!("my_biography_video_1", conv.convert("My Biography - Video 1")) + /// ``` + pub fn add_boundary(mut self, b: Boundary) -> Self { + self.boundaries.push(b); + self + } + + /// Adds a vector of boundaries to the list of boundaries. + /// ``` + /// use convert_case::{Boundary, Case, Converter}; + /// + /// let conv = Converter::new() + /// .from_case(Case::Kebab) + /// .to_case(Case::Title) + /// .add_boundaries(&[Boundary::Underscore, Boundary::LowerUpper]); + /// assert_eq!("2020 10 First Day", conv.convert("2020-10_firstDay")); + /// ``` + pub fn add_boundaries(mut self, bs: &[Boundary]) -> Self { + self.boundaries.extend(bs); + self + } + + /// Removes a boundary from the list of boundaries if it exists. + /// ``` + /// use convert_case::{Boundary, Case, Converter}; + /// + /// let conv = Converter::new() + /// .remove_boundary(Boundary::Acronym) + /// .to_case(Case::Kebab); + /// assert_eq!("httprequest-parser", conv.convert("HTTPRequest_parser")); + /// ``` + pub fn remove_boundary(mut self, b: Boundary) -> Self { + self.boundaries.retain(|&x| x != b); + self + } + + /// Removes all the provided boundaries from the list of boundaries if it exists. + /// ``` + /// use convert_case::{Boundary, Case, Converter}; + /// + /// let conv = Converter::new() + /// .remove_boundaries(&Boundary::digits()) + /// .to_case(Case::Snake); + /// assert_eq!("c04_s03_path_finding.pdf", conv.convert("C04 S03 Path Finding.pdf")); + /// ``` + pub fn remove_boundaries(mut self, bs: &[Boundary]) -> Self { + for b in bs { + self.boundaries.retain(|&x| x != *b); + } + self + } + + /// Sets the delimeter. + /// ``` + /// use convert_case::{Case, Converter}; + /// + /// let conv = Converter::new() + /// .to_case(Case::Snake) + /// .set_delim("."); + /// assert_eq!("lower.with.dots", conv.convert("LowerWithDots")); + /// ``` + pub fn set_delim(mut self, d: T) -> Self + where + T: ToString, + { + self.delim = d.to_string(); + self + } + + /// Sets the delimeter to an empty string. + /// ``` + /// use convert_case::{Case, Converter}; + /// + /// let conv = Converter::new() + /// .to_case(Case::Snake) + /// .remove_delim(); + /// assert_eq!("nodelimshere", conv.convert("No Delims Here")); + /// ``` + pub fn remove_delim(mut self) -> Self { + self.delim = String::new(); + self + } + + /// Sets the pattern. + /// ``` + /// use convert_case::{Case, Converter, Pattern}; + /// + /// let conv = Converter::new() + /// .set_delim("_") + /// .set_pattern(Pattern::Sentence); + /// assert_eq!("Bjarne_case", conv.convert("BJARNE CASE")); + /// ``` + pub fn set_pattern(mut self, p: Pattern) -> Self { + self.pattern = Some(p); + self + } + + /// Sets the pattern field to `None`. Where there is no pattern, a character's case is never + /// mutated and will be maintained at the end of conversion. + /// ``` + /// use convert_case::{Case, Converter}; + /// + /// let conv = Converter::new() + /// .from_case(Case::Title) + /// .to_case(Case::Snake) + /// .remove_pattern(); + /// assert_eq!("KoRn_Alone_I_Break", conv.convert("KoRn Alone I Break")); + /// ``` + pub fn remove_pattern(mut self) -> Self { + self.pattern = None; + self + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Casing; + use crate::Pattern; + + #[test] + fn snake_converter_from_case() { + let conv = Converter::new().to_case(Case::Snake); + let s = String::from("my var name"); + assert_eq!(s.to_case(Case::Snake), conv.convert(s)); + } + + #[test] + fn snake_converter_from_scratch() { + let conv = Converter::new() + .set_delim("_") + .set_pattern(Pattern::Lowercase); + let s = String::from("my var name"); + assert_eq!(s.to_case(Case::Snake), conv.convert(s)); + } + + #[test] + fn custom_pattern() { + let conv = Converter::new() + .to_case(Case::Snake) + .set_pattern(Pattern::Sentence); + assert_eq!("Bjarne_case", conv.convert("bjarne case")); + } + + #[test] + fn custom_delim() { + let conv = Converter::new().set_delim(".."); + assert_eq!("oh..My", conv.convert("ohMy")); + } + + #[test] + fn no_pattern() { + let conv = Converter::new() + .from_case(Case::Title) + .to_case(Case::Kebab) + .remove_pattern(); + assert_eq!("wIErd-CASing", conv.convert("wIErd CASing")); + } + + #[test] + fn no_delim() { + let conv = Converter::new() + .from_case(Case::Title) + .to_case(Case::Kebab) + .remove_delim(); + assert_eq!("justflat", conv.convert("Just Flat")); + } + + #[test] + fn no_digit_boundaries() { + let conv = Converter::new() + .remove_boundaries(&Boundary::digits()) + .to_case(Case::Snake); + assert_eq!("test_08bound", conv.convert("Test 08Bound")); + assert_eq!("a8a_a8a", conv.convert("a8aA8A")); + } + + #[test] + fn remove_boundary() { + let conv = Converter::new() + .remove_boundary(Boundary::DigitUpper) + .to_case(Case::Snake); + assert_eq!("test_08bound", conv.convert("Test 08Bound")); + assert_eq!("a_8_a_a_8a", conv.convert("a8aA8A")); + } + + #[test] + fn add_boundary() { + let conv = Converter::new() + .from_case(Case::Snake) + .to_case(Case::Kebab) + .add_boundary(Boundary::LowerUpper); + assert_eq!("word-word-word", conv.convert("word_wordWord")); + } + + #[test] + fn add_boundaries() { + let conv = Converter::new() + .from_case(Case::Snake) + .to_case(Case::Kebab) + .add_boundaries(&[Boundary::LowerUpper, Boundary::UpperLower]); + assert_eq!("word-word-w-ord", conv.convert("word_wordWord")); + } + + #[test] + fn reuse_after_change() { + let conv = Converter::new().from_case(Case::Snake).to_case(Case::Kebab); + assert_eq!("word-wordword", conv.convert("word_wordWord")); + + let conv = conv.add_boundary(Boundary::LowerUpper); + assert_eq!("word-word-word", conv.convert("word_wordWord")); + } + + #[test] + fn explicit_boundaries() { + let conv = Converter::new() + .set_boundaries(&[ + Boundary::DigitLower, + Boundary::DigitUpper, + Boundary::Acronym, + ]) + .to_case(Case::Snake); + assert_eq!( + "section8_lesson2_http_requests", + conv.convert("section8lesson2HTTPRequests") + ); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1ea51ef --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,661 @@ +//! Converts to and from various cases. +//! +//! # Command Line Utility `ccase` +//! +//! This library was developed for the purposes of a command line utility for converting +//! the case of strings and filenames. You can check out +//! [`ccase` on Github](https://github.com/rutrum/convert-case/tree/master/ccase). +//! +//! # Rust Library +//! +//! Provides a [`Case`](enum.Case.html) enum which defines a variety of cases to convert into. +//! Strings have implemented the [`Casing`](trait.Casing.html) trait, which adds methods for +//! case conversion. +//! +//! You can convert strings into a case using the [`to_case`](Casing::to_case) method. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! assert_eq!("Ronnie James Dio", "ronnie james dio".to_case(Case::Title)); +//! assert_eq!("ronnieJamesDio", "Ronnie_James_dio".to_case(Case::Camel)); +//! assert_eq!("Ronnie-James-Dio", "RONNIE_JAMES_DIO".to_case(Case::Train)); +//! ``` +//! +//! By default, `to_case` will split along a set of default word boundaries, that is +//! * space characters ` `, +//! * underscores `_`, +//! * hyphens `-`, +//! * changes in capitalization from lowercase to uppercase `aA`, +//! * adjacent digits and letters `a1`, `1a`, `A1`, `1A`, +//! * and acroynms `AAa` (as in `HTTPRequest`). +//! +//! For more accuracy, the `from_case` method splits based on the word boundaries +//! of a particular case. For example, splitting from snake case will only use +//! underscores as word boundaries. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! assert_eq!( +//! "2020 04 16 My Cat Cali", +//! "2020-04-16_my_cat_cali".to_case(Case::Title) +//! ); +//! assert_eq!( +//! "2020-04-16 My Cat Cali", +//! "2020-04-16_my_cat_cali".from_case(Case::Snake).to_case(Case::Title) +//! ); +//! ``` +//! +//! Case conversion can detect acronyms for camel-like strings. It also ignores any leading, +//! trailing, or duplicate delimiters. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! assert_eq!("io_stream", "IOStream".to_case(Case::Snake)); +//! assert_eq!("my_json_parser", "myJSONParser".to_case(Case::Snake)); +//! +//! assert_eq!("weird_var_name", "__weird--var _name-".to_case(Case::Snake)); +//! ``` +//! +//! It also works non-ascii characters. However, no inferences on the language itself is made. +//! For instance, the digraph `ij` in Dutch will not be capitalized, because it is represented +//! as two distinct Unicode characters. However, `æ` would be capitalized. Accuracy with unicode +//! characters is done using the `unicode-segmentation` crate, the sole dependency of this crate. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! assert_eq!("granat-äpfel", "GranatÄpfel".to_case(Case::Kebab)); +//! assert_eq!("Перспектива 24", "ПЕРСПЕКТИВА24".to_case(Case::Title)); +//! +//! // The example from str::to_lowercase documentation +//! let odysseus = "ὈΔΥΣΣΕΎΣ"; +//! assert_eq!("ὀδυσσεύς", odysseus.to_case(Case::Lower)); +//! ``` +//! +//! By default, characters followed by digits and vice-versa are +//! considered word boundaries. In addition, any special ASCII characters (besides `_` and `-`) +//! are ignored. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! assert_eq!("e_5150", "E5150".to_case(Case::Snake)); +//! assert_eq!("10,000_days", "10,000Days".to_case(Case::Snake)); +//! assert_eq!("HELLO, WORLD!", "Hello, world!".to_case(Case::Upper)); +//! assert_eq!("One\ntwo\nthree", "ONE\nTWO\nTHREE".to_case(Case::Title)); +//! ``` +//! +//! You can also test what case a string is in. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! assert!( "css-class-name".is_case(Case::Kebab)); +//! assert!(!"css-class-name".is_case(Case::Snake)); +//! assert!(!"UPPER_CASE_VAR".is_case(Case::Snake)); +//! ``` +//! +//! # Note on Accuracy +//! +//! The `Casing` methods `from_case` and `to_case` do not fail. Conversion to a case will always +//! succeed. However, the results can still be unexpected. Failure to detect any word boundaries +//! for a particular case means the entire string will be considered a single word. +//! ``` +//! use convert_case::{Case, Casing}; +//! +//! // Mistakenly parsing using Case::Snake +//! assert_eq!("My-kebab-var", "my-kebab-var".from_case(Case::Snake).to_case(Case::Title)); +//! +//! // Converts using an unexpected method +//! assert_eq!("my_kebab_like_variable", "myKebab-like-variable".to_case(Case::Snake)); +//! ``` +//! +//! # Boundary Specificity +//! +//! It can be difficult to determine how to split a string into words. That is why this case +//! provides the [`from_case`](Casing::from_case) functionality, but sometimes that isn't enough +//! to meet a specific use case. +//! +//! Take an identifier has the word `2D`, such as `scale2D`. No exclusive usage of `from_case` will +//! be enough to solve the problem. In this case we can further specify which boundaries to split +//! the string on. `convert_case` provides some patterns for achieving this specificity. +//! We can specify what boundaries we want to split on using the [`Boundary` enum](Boundary). +//! ``` +//! use convert_case::{Boundary, Case, Casing}; +//! +//! // Not quite what we want +//! assert_eq!( +//! "scale_2_d", +//! "scale2D" +//! .from_case(Case::Camel) +//! .to_case(Case::Snake) +//! ); +//! +//! // Remove boundary from Case::Camel +//! assert_eq!( +//! "scale_2d", +//! "scale2D" +//! .from_case(Case::Camel) +//! .without_boundaries(&[Boundary::DigitUpper, Boundary::DigitLower]) +//! .to_case(Case::Snake) +//! ); +//! +//! // Write boundaries explicitly +//! assert_eq!( +//! "scale_2d", +//! "scale2D" +//! .with_boundaries(&[Boundary::LowerDigit]) +//! .to_case(Case::Snake) +//! ); +//! ``` +//! +//! The `Casing` trait provides initial methods, but any subsequent methods that do not resolve +//! the conversion return a [`StateConverter`] struct. It contains similar methods as `Casing`. +//! +//! # Custom Cases +//! +//! Because `Case` is an enum, you can't create your own variant for your use case. However +//! the parameters for case conversion have been encapsulated into the [`Converter`] struct +//! which can be used for specific use cases. +//! +//! Suppose you wanted to format a word like camel case, where the first word is lower case and the +//! rest are capitalized. But you want to include a delimeter like underscore. This case isn't +//! available as a `Case` variant, but you can create it by constructing the parameters of the +//! `Converter`. +//! ``` +//! use convert_case::{Case, Casing, Converter, Pattern}; +//! +//! let conv = Converter::new() +//! .set_pattern(Pattern::Camel) +//! .set_delim("_"); +//! +//! assert_eq!( +//! "my_Special_Case", +//! conv.convert("My Special Case") +//! ) +//! ``` +//! Just as with the `Casing` trait, you can also manually set the boundaries strings are split +//! on. You can use any of the [`Pattern`] variants available. This even includes [`Pattern::Sentence`] +//! which isn't used in any `Case` variant. You can also set no pattern at all, which will +//! maintain the casing of each letter in the input string. You can also, of course, set any string as your +//! delimeter. +//! +//! For more details on how strings are converted, see the docs for [`Converter`]. +//! +//! # Random Feature +//! +//! To ensure this library had zero dependencies, randomness was moved to the _random_ feature, +//! which requires the `rand` crate. You can enable this feature by including the +//! following in your `Cargo.toml`. +//! ```{toml} +//! [dependencies] +//! convert_case = { version = "^0.3.0", features = ["random"] } +//! ``` +//! This will add two additional cases: Random and PseudoRandom. You can read about their +//! construction in the [Case enum](enum.Case.html). + +mod case; +mod converter; +mod pattern; +mod segmentation; + +pub use case::Case; +pub use converter::Converter; +pub use pattern::Pattern; +pub use segmentation::Boundary; + +/// Describes items that can be converted into a case. This trait is used +/// in conjunction with the [`StateConverter`] struct which is returned from a couple +/// methods on `Casing`. +/// +/// Implemented for strings `&str`, `String`, and `&String`. +pub trait Casing> { + + /// Convert the string into the given case. It will reference `self` and create a new + /// `String` with the same pattern and delimeter as `case`. It will split on boundaries + /// defined at [`Boundary::defaults()`]. + /// ``` + /// use convert_case::{Case, Casing}; + /// + /// assert_eq!( + /// "tetronimo-piece-border", + /// "Tetronimo piece border".to_case(Case::Kebab) + /// ); + /// ``` + fn to_case(&self, case: Case) -> String; + + /// Start the case conversion by storing the boundaries associated with the given case. + /// ``` + /// use convert_case::{Case, Casing}; + /// + /// assert_eq!( + /// "2020-08-10_dannie_birthday", + /// "2020-08-10 Dannie Birthday" + /// .from_case(Case::Title) + /// .to_case(Case::Snake) + /// ); + /// ``` + #[allow(clippy::wrong_self_convention)] + fn from_case(&self, case: Case) -> StateConverter; + + /// Creates a `StateConverter` struct initialized with the boundaries + /// provided. + /// ``` + /// use convert_case::{Boundary, Case, Casing}; + /// + /// assert_eq!( + /// "e1_m1_hangar", + /// "E1M1 Hangar" + /// .with_boundaries(&[Boundary::DigitUpper, Boundary::Space]) + /// .to_case(Case::Snake) + /// ); + /// ``` + fn with_boundaries(&self, bs: &[Boundary]) -> StateConverter; + + /// Determines if `self` is of the given case. This is done simply by applying + /// the conversion and seeing if the result is the same. + /// ``` + /// use convert_case::{Case, Casing}; + /// + /// assert!( "kebab-case-string".is_case(Case::Kebab)); + /// assert!( "Train-Case-String".is_case(Case::Train)); + /// + /// assert!(!"kebab-case-string".is_case(Case::Snake)); + /// assert!(!"kebab-case-string".is_case(Case::Train)); + /// ``` + fn is_case(&self, case: Case) -> bool; +} + +impl> Casing for T +where + String: PartialEq, +{ + fn to_case(&self, case: Case) -> String { + StateConverter::new(self).to_case(case) + } + + fn with_boundaries(&self, bs: &[Boundary]) -> StateConverter { + StateConverter::new(self).with_boundaries(bs) + } + + fn from_case(&self, case: Case) -> StateConverter { + StateConverter::new_from_case(self, case) + } + + fn is_case(&self, case: Case) -> bool { + &self.to_case(case) == self + } +} + +/// Holds information about parsing before converting into a case. +/// +/// This struct is used when invoking the `from_case` and `with_boundaries` methods on +/// `Casing`. For a more fine grained approach to case conversion, consider using the [`Converter`] +/// struct. +/// ``` +/// use convert_case::{Case, Casing}; +/// +/// let title = "ninety-nine_problems".from_case(Case::Snake).to_case(Case::Title); +/// assert_eq!("Ninety-nine Problems", title); +/// ``` +pub struct StateConverter<'a, T: AsRef> { + s: &'a T, + conv: Converter, +} + +impl<'a, T: AsRef> StateConverter<'a, T> { + /// Only called by Casing function to_case() + fn new(s: &'a T) -> Self { + Self { + s, + conv: Converter::new(), + } + } + + /// Only called by Casing function from_case() + fn new_from_case(s: &'a T, case: Case) -> Self { + Self { + s, + conv: Converter::new().from_case(case), + } + } + + /// Uses the boundaries associated with `case` for word segmentation. This + /// will overwrite any boundary information initialized before. This method is + /// likely not useful, but provided anyway. + /// ``` + /// use convert_case::{Case, Casing}; + /// + /// let name = "Chuck Schuldiner" + /// .from_case(Case::Snake) // from Casing trait + /// .from_case(Case::Title) // from StateConverter, overwrites previous + /// .to_case(Case::Kebab); + /// assert_eq!("chuck-schuldiner", name); + /// ``` + pub fn from_case(self, case: Case) -> Self { + Self { + conv: self.conv.from_case(case), + ..self + } + } + + /// Overwrites boundaries for word segmentation with those provided. This will overwrite + /// any boundary information initialized before. This method is likely not useful, but + /// provided anyway. + /// ``` + /// use convert_case::{Boundary, Case, Casing}; + /// + /// let song = "theHumbling river-puscifer" + /// .from_case(Case::Kebab) // from Casing trait + /// .with_boundaries(&[Boundary::Space, Boundary::LowerUpper]) // overwrites `from_case` + /// .to_case(Case::Pascal); + /// assert_eq!("TheHumblingRiver-puscifer", song); // doesn't split on hyphen `-` + /// ``` + pub fn with_boundaries(self, bs: &[Boundary]) -> Self { + Self { + s: self.s, + conv: self.conv.set_boundaries(bs), + } + } + + /// Removes any boundaries that were already initialized. This is particularly useful when a + /// case like `Case::Camel` has a lot of associated word boundaries, but you want to exclude + /// some. + /// ``` + /// use convert_case::{Boundary, Case, Casing}; + /// + /// assert_eq!( + /// "2d_transformation", + /// "2dTransformation" + /// .from_case(Case::Camel) + /// .without_boundaries(&Boundary::digits()) + /// .to_case(Case::Snake) + /// ); + /// ``` + pub fn without_boundaries(self, bs: &[Boundary]) -> Self { + Self { + s: self.s, + conv: self.conv.remove_boundaries(bs), + } + } + + /// Consumes the `StateConverter` and returns the converted string. + /// ``` + /// use convert_case::{Boundary, Case, Casing}; + /// + /// assert_eq!( + /// "ice-cream social", + /// "Ice-Cream Social".from_case(Case::Title).to_case(Case::Lower) + /// ); + /// ``` + pub fn to_case(self, case: Case) -> String { + self.conv.to_case(case).convert(self.s) + } +} + +#[cfg(test)] +mod test { + use super::*; + use strum::IntoEnumIterator; + + fn possible_cases(s: &str) -> Vec { + Case::deterministic_cases() + .into_iter() + .filter(|case| s.from_case(*case).to_case(*case) == s) + .collect() + } + + #[test] + fn lossless_against_lossless() { + let examples = vec![ + (Case::Lower, "my variable 22 name"), + (Case::Upper, "MY VARIABLE 22 NAME"), + (Case::Title, "My Variable 22 Name"), + (Case::Camel, "myVariable22Name"), + (Case::Pascal, "MyVariable22Name"), + (Case::Snake, "my_variable_22_name"), + (Case::UpperSnake, "MY_VARIABLE_22_NAME"), + (Case::Kebab, "my-variable-22-name"), + (Case::Cobol, "MY-VARIABLE-22-NAME"), + (Case::Toggle, "mY vARIABLE 22 nAME"), + (Case::Train, "My-Variable-22-Name"), + (Case::Alternating, "mY vArIaBlE 22 nAmE"), + ]; + + for (case_a, str_a) in examples.iter() { + for (case_b, str_b) in examples.iter() { + assert_eq!(*str_a, str_b.from_case(*case_b).to_case(*case_a)) + } + } + } + + #[test] + fn obvious_default_parsing() { + let examples = vec![ + "SuperMario64Game", + "super-mario64-game", + "superMario64 game", + "Super Mario 64_game", + "SUPERMario 64-game", + "super_mario-64 game", + ]; + + for example in examples { + assert_eq!("super_mario_64_game", example.to_case(Case::Snake)); + } + } + + #[test] + fn multiline_strings() { + assert_eq!("One\ntwo\nthree", "one\ntwo\nthree".to_case(Case::Title)); + } + + #[test] + fn camel_case_acroynms() { + assert_eq!( + "xml_http_request", + "XMLHttpRequest".from_case(Case::Camel).to_case(Case::Snake) + ); + assert_eq!( + "xml_http_request", + "XMLHttpRequest" + .from_case(Case::UpperCamel) + .to_case(Case::Snake) + ); + assert_eq!( + "xml_http_request", + "XMLHttpRequest" + .from_case(Case::Pascal) + .to_case(Case::Snake) + ); + } + + #[test] + fn leading_tailing_delimeters() { + assert_eq!( + "leading_underscore", + "_leading_underscore" + .from_case(Case::Snake) + .to_case(Case::Snake) + ); + assert_eq!( + "tailing_underscore", + "tailing_underscore_" + .from_case(Case::Snake) + .to_case(Case::Snake) + ); + assert_eq!( + "leading_hyphen", + "-leading-hyphen" + .from_case(Case::Kebab) + .to_case(Case::Snake) + ); + assert_eq!( + "tailing_hyphen", + "tailing-hyphen-" + .from_case(Case::Kebab) + .to_case(Case::Snake) + ); + } + + #[test] + fn double_delimeters() { + assert_eq!( + "many_underscores", + "many___underscores" + .from_case(Case::Snake) + .to_case(Case::Snake) + ); + assert_eq!( + "many-underscores", + "many---underscores" + .from_case(Case::Kebab) + .to_case(Case::Kebab) + ); + } + + #[test] + fn early_word_boundaries() { + assert_eq!( + "a_bagel", + "aBagel".from_case(Case::Camel).to_case(Case::Snake) + ); + } + + #[test] + fn late_word_boundaries() { + assert_eq!( + "team_a", + "teamA".from_case(Case::Camel).to_case(Case::Snake) + ); + } + + #[test] + fn empty_string() { + for (case_a, case_b) in Case::iter().zip(Case::iter()) { + assert_eq!("", "".from_case(case_a).to_case(case_b)); + } + } + + #[test] + fn owned_string() { + assert_eq!( + "test_variable", + String::from("TestVariable").to_case(Case::Snake) + ) + } + + #[test] + fn default_all_boundaries() { + assert_eq!( + "abc_abc_abc_abc_abc_abc", + "ABC-abc_abcAbc ABCAbc".to_case(Case::Snake) + ); + } + + #[test] + fn alternating_ignore_symbols() { + assert_eq!("tHaT's", "that's".to_case(Case::Alternating)); + } + + #[test] + fn string_is_snake() { + assert!("im_snake_case".is_case(Case::Snake)); + assert!(!"im_NOTsnake_case".is_case(Case::Snake)); + } + + #[test] + fn string_is_kebab() { + assert!("im-kebab-case".is_case(Case::Kebab)); + assert!(!"im_not_kebab".is_case(Case::Kebab)); + } + + #[test] + fn remove_boundaries() { + assert_eq!( + "m02_s05_binary_trees.pdf", + "M02S05BinaryTrees.pdf" + .from_case(Case::Pascal) + .without_boundaries(&[Boundary::UpperDigit]) + .to_case(Case::Snake) + ); + } + + #[test] + fn with_boundaries() { + assert_eq!( + "my-dumb-file-name", + "my_dumbFileName" + .with_boundaries(&[Boundary::Underscore, Boundary::LowerUpper]) + .to_case(Case::Kebab) + ); + } + + #[cfg(feature = "random")] + #[test] + fn random_case_boundaries() { + for random_case in Case::random_cases() { + assert_eq!( + "split_by_spaces", + "Split By Spaces" + .from_case(random_case) + .to_case(Case::Snake) + ); + } + } + + #[test] + fn multiple_from_case() { + assert_eq!( + "longtime_nosee", + "LongTime NoSee" + .from_case(Case::Camel) + .from_case(Case::Title) + .to_case(Case::Snake), + ) + } + + use std::collections::HashSet; + use std::iter::FromIterator; + + #[test] + fn detect_many_cases() { + let lower_cases_vec = possible_cases(&"asef"); + let lower_cases_set = HashSet::from_iter(lower_cases_vec.into_iter()); + let mut actual = HashSet::new(); + actual.insert(Case::Lower); + actual.insert(Case::Camel); + actual.insert(Case::Snake); + actual.insert(Case::Kebab); + actual.insert(Case::Flat); + assert_eq!(lower_cases_set, actual); + + let lower_cases_vec = possible_cases(&"asefCase"); + let lower_cases_set = HashSet::from_iter(lower_cases_vec.into_iter()); + let mut actual = HashSet::new(); + actual.insert(Case::Camel); + assert_eq!(lower_cases_set, actual); + } + + #[test] + fn detect_each_case() { + let s = "My String Identifier".to_string(); + for case in Case::deterministic_cases() { + let new_s = s.from_case(case).to_case(case); + let possible = possible_cases(&new_s); + println!("{} {:?} {:?}", new_s, case, possible); + assert!(possible.iter().any(|c| c == &case)); + } + } + + // From issue https://github.com/rutrum/convert-case/issues/8 + #[test] + fn accent_mark() { + let s = "música moderna".to_string(); + assert_eq!("MúsicaModerna", s.to_case(Case::Pascal)); + } + + // From issue https://github.com/rutrum/convert-case/issues/4 + #[test] + fn russian() { + let s = "ПЕРСПЕКТИВА24".to_string(); + let _n = s.to_case(Case::Title); + } +} diff --git a/src/pattern.rs b/src/pattern.rs new file mode 100644 index 0000000..87d013f --- /dev/null +++ b/src/pattern.rs @@ -0,0 +1,354 @@ +use std::iter; + +#[cfg(feature = "random")] +use rand::prelude::*; + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +enum WordCase { + Lower, + Upper, + Capital, + Toggle, +} + +impl WordCase { + fn mutate(&self, word: &str) -> String { + use WordCase::*; + match self { + Lower => word.to_lowercase(), + Upper => word.to_uppercase(), + Capital => { + let mut chars = word.chars(); + if let Some(c) = chars.next() { + c.to_uppercase() + .chain(chars.as_str().to_lowercase().chars()) + .collect() + } else { + String::new() + } + } + Toggle => { + let mut chars = word.chars(); + if let Some(c) = chars.next() { + c.to_lowercase() + .chain(chars.as_str().to_uppercase().chars()) + .collect() + } else { + String::new() + } + } + } + } +} + +/// A pattern is how a set of words is mutated before joining with +/// a delimeter. +/// +/// The `Random` and `PseudoRandom` patterns are used for their respective cases +/// and are only available in the "random" feature. +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum Pattern { + /// Lowercase patterns make all words lowercase. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["case", "conversion", "library"], + /// Pattern::Lowercase.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + Lowercase, + + /// Uppercase patterns make all words uppercase. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["CASE", "CONVERSION", "LIBRARY"], + /// Pattern::Uppercase.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + Uppercase, + + /// Capital patterns makes the first letter of each word uppercase + /// and the remaining letters of each word lowercase. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["Case", "Conversion", "Library"], + /// Pattern::Capital.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + Capital, + + /// Capital patterns make the first word capitalized and the + /// remaining lowercase. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["Case", "conversion", "library"], + /// Pattern::Sentence.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + Sentence, + + /// Camel patterns make the first word lowercase and the remaining + /// capitalized. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["case", "Conversion", "Library"], + /// Pattern::Camel.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + Camel, + + /// Alternating patterns make each letter of each word alternate + /// between lowercase and uppercase. They alternate across words, + /// which means the last letter of one word and the first letter of the + /// next will not be the same letter casing. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["cAsE", "cOnVeRsIoN", "lIbRaRy"], + /// Pattern::Alternating.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// assert_eq!( + /// vec!["aNoThEr", "ExAmPlE"], + /// Pattern::Alternating.mutate(&["Another", "Example"]), + /// ); + /// ``` + Alternating, + + /// Toggle patterns have the first letter of each word uppercase + /// and the remaining letters of each word uppercase. + /// ``` + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["cASE", "cONVERSION", "lIBRARY"], + /// Pattern::Toggle.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + Toggle, + + /// Random patterns will lowercase or uppercase each letter + /// uniformly randomly. This uses the `rand` crate and is only available with the "random" + /// feature. This example will not pass the assertion due to randomness, but it used as an + /// example of what output is possible. + /// ```should_panic + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["Case", "coNVeRSiOn", "lIBraRY"], + /// Pattern::Random.mutate(&["Case", "CONVERSION", "library"]) + /// ); + /// ``` + #[cfg(feature = "random")] + #[cfg(any(doc, feature = "random"))] + Random, + + /// PseudoRandom patterns are random-like patterns. Instead of randomizing + /// each letter individually, it mutates each pair of characters + /// as either (Lowercase, Uppercase) or (Uppercase, Lowercase). This generates + /// more "random looking" words. A consequence of this algorithm for randomization + /// is that there will never be three consecutive letters that are all lowercase + /// or all uppercase. This uses the `rand` crate and is only available with the "random" + /// feature. This example will not pass the assertion due to randomness, but it used as an + /// example of what output is possible. + /// ```should_panic + /// use convert_case::Pattern; + /// assert_eq!( + /// vec!["cAsE", "cONveRSioN", "lIBrAry"], + /// Pattern::Random.mutate(&["Case", "CONVERSION", "library"]), + /// ); + /// ``` + #[cfg(any(doc, feature = "random"))] + PseudoRandom, +} + +impl Pattern { + /// Generates a vector of new `String`s in the right pattern given + /// the input strings. + /// ``` + /// use convert_case::Pattern; + /// + /// assert_eq!( + /// vec!["crack", "the", "skye"], + /// Pattern::Lowercase.mutate(&vec!["CRACK", "the", "Skye"]), + /// ) + /// ``` + pub fn mutate(&self, words: &[&str]) -> Vec { + use Pattern::*; + match self { + Lowercase => words + .iter() + .map(|word| WordCase::Lower.mutate(word)) + .collect(), + Uppercase => words + .iter() + .map(|word| WordCase::Upper.mutate(word)) + .collect(), + Capital => words + .iter() + .map(|word| WordCase::Capital.mutate(word)) + .collect(), + Toggle => words + .iter() + .map(|word| WordCase::Toggle.mutate(word)) + .collect(), + Sentence => { + let word_cases = + iter::once(WordCase::Capital).chain(iter::once(WordCase::Lower).cycle()); + words + .iter() + .zip(word_cases) + .map(|(word, word_case)| word_case.mutate(word)) + .collect() + } + Camel => { + let word_cases = + iter::once(WordCase::Lower).chain(iter::once(WordCase::Capital).cycle()); + words + .iter() + .zip(word_cases) + .map(|(word, word_case)| word_case.mutate(word)) + .collect() + } + Alternating => alternating(words), + #[cfg(feature = "random")] + Random => randomize(words), + #[cfg(feature = "random")] + PseudoRandom => pseudo_randomize(words), + } + } +} + +fn alternating(words: &[&str]) -> Vec { + let mut upper = false; + words + .iter() + .map(|word| { + word.chars() + .map(|letter| { + if letter.is_uppercase() || letter.is_lowercase() { + if upper { + upper = false; + letter.to_uppercase().to_string() + } else { + upper = true; + letter.to_lowercase().to_string() + } + } else { + letter.to_string() + } + }) + .collect() + }) + .collect() +} + +/// Randomly picks whether to be upper case or lower case +#[cfg(feature = "random")] +fn randomize(words: &[&str]) -> Vec { + let mut rng = rand::thread_rng(); + words + .iter() + .map(|word| { + word.chars() + .map(|letter| { + if rng.gen::() > 0.5 { + letter.to_uppercase().to_string() + } else { + letter.to_lowercase().to_string() + } + }) + .collect() + }) + .collect() +} + +/// Randomly selects patterns: [upper, lower] or [lower, upper] +/// for a more random feeling pattern. +#[cfg(feature = "random")] +fn pseudo_randomize(words: &[&str]) -> Vec { + let mut rng = rand::thread_rng(); + + // Keeps track of when to alternate + let mut alt: Option = None; + words + .iter() + .map(|word| { + word.chars() + .map(|letter| { + match alt { + // No existing pattern, start one + None => { + if rng.gen::() > 0.5 { + alt = Some(false); // Make the next char lower + letter.to_uppercase().to_string() + } else { + alt = Some(true); // Make the next char upper + letter.to_lowercase().to_string() + } + } + // Existing pattern, do what it says + Some(upper) => { + alt = None; + if upper { + letter.to_uppercase().to_string() + } else { + letter.to_lowercase().to_string() + } + } + } + }) + .collect() + }) + .collect() +} + +#[cfg(test)] +mod test { + use super::*; + + #[cfg(feature = "random")] + #[test] + fn pseudo_no_triples() { + let words = vec!["abcdefg", "hijklmnop", "qrstuv", "wxyz"]; + for _ in 0..5 { + let new = pseudo_randomize(&words).join(""); + let mut iter = new + .chars() + .zip(new.chars().skip(1)) + .zip(new.chars().skip(2)); + assert!(!iter + .clone() + .any(|((a, b), c)| a.is_lowercase() && b.is_lowercase() && c.is_lowercase())); + assert!( + !iter.any(|((a, b), c)| a.is_uppercase() && b.is_uppercase() && c.is_uppercase()) + ); + } + } + + #[cfg(feature = "random")] + #[test] + fn randoms_are_random() { + let words = vec!["abcdefg", "hijklmnop", "qrstuv", "wxyz"]; + + for _ in 0..5 { + let transformed = pseudo_randomize(&words); + assert_ne!(words, transformed); + let transformed = randomize(&words); + assert_ne!(words, transformed); + } + } + + #[test] + fn mutate_empty_strings() { + for wcase in [ + WordCase::Lower, + WordCase::Upper, + WordCase::Capital, + WordCase::Toggle, + ] { + assert_eq!(String::new(), wcase.mutate(&String::new())) + } + } +} diff --git a/src/segmentation.rs b/src/segmentation.rs new file mode 100644 index 0000000..99ab435 --- /dev/null +++ b/src/segmentation.rs @@ -0,0 +1,459 @@ +#[cfg(test)] +use strum_macros::EnumIter; + +use unicode_segmentation::{UnicodeSegmentation}; //, GraphemeCursor}; + +/// A boundary defines how a string is split into words. Some boundaries, `Hyphen`, `Underscore`, +/// and `Space`, consume the character they split on, whereas the other boundaries +/// do not. +/// +/// The struct offers methods that return `Vec`s containing useful groups of boundaries. It also +/// contains the [`list_from`](Boundary::list_from) method which will generate a list of boundaries +/// based on a string slice. +/// +/// Note that all boundaries are distinct and do not share functionality. That is, there is no +/// such DigitLetter variant, because that would be equivalent to the current `DigitUpper` and +/// `DigitLower` variants. For common functionality, consider using +/// some provided functions that return a list of boundaries. +/// ``` +/// use convert_case::{Boundary, Case, Casing, Converter}; +/// +/// assert_eq!( +/// "transformations_in_3d", +/// "TransformationsIn3D" +/// .from_case(Case::Camel) +/// .without_boundaries(&Boundary::digit_letter()) +/// .to_case(Case::Snake) +/// ); +/// +/// let conv = Converter::new() +/// .set_boundaries(&Boundary::list_from("aA ")) +/// .to_case(Case::Title); +/// assert_eq!("7empest By Tool", conv.convert("7empest byTool")); +/// ``` +#[cfg_attr(test, derive(EnumIter))] +#[derive(Clone, Copy, Eq, PartialEq, Debug)] +pub enum Boundary { + /// Splits on `-`, consuming the character on segmentation. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::Hyphen], + /// Boundary::list_from("-") + /// ); + /// ``` + Hyphen, + + /// Splits on `_`, consuming the character on segmentation. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::Underscore], + /// Boundary::list_from("_") + /// ); + /// ``` + Underscore, + + /// Splits on space, consuming the character on segmentation. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::Space], + /// Boundary::list_from(" ") + /// ); + /// ``` + Space, + + /// Splits where an uppercase letter is followed by a lowercase letter. This is seldom used, + /// and is not included in the [defaults](Boundary::defaults). + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::UpperLower], + /// Boundary::list_from("Aa") + /// ); + /// ``` + UpperLower, + + /// Splits where a lowercase letter is followed by an uppercase letter. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::LowerUpper], + /// Boundary::list_from("aA") + /// ); + /// ``` + LowerUpper, + + /// Splits where digit is followed by an uppercase letter. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::DigitUpper], + /// Boundary::list_from("1A") + /// ); + /// ``` + DigitUpper, + + /// Splits where an uppercase letter is followed by a digit. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::UpperDigit], + /// Boundary::list_from("A1") + /// ); + /// ``` + UpperDigit, + + /// Splits where digit is followed by a lowercase letter. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::DigitLower], + /// Boundary::list_from("1a") + /// ); + /// ``` + DigitLower, + + /// Splits where a lowercase letter is followed by a digit. + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::LowerDigit], + /// Boundary::list_from("a1") + /// ); + /// ``` + LowerDigit, + + /// Acronyms are identified by two uppercase letters followed by a lowercase letter. + /// The word boundary is between the two uppercase letters. For example, "HTTPRequest" + /// would have an acronym boundary identified at "PRe" and split into "HTTP" and "Request". + /// ``` + /// use convert_case::Boundary; + /// assert_eq!( + /// vec![Boundary::Acronym], + /// Boundary::list_from("AAa") + /// ); + /// ``` + Acronym, +} + +impl Boundary { + /// Returns a list of all boundaries that are identified within the given string. + /// Could be a short of writing out all the boundaries in a list directly. This will not + /// identify boundary `UpperLower` if it also used as part of `Acronym`. + /// + /// If you want to be very explicit and not overlap boundaries, it is recommended to use a colon + /// character. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![Hyphen, Space, LowerUpper, UpperDigit, DigitLower], + /// Boundary::list_from("aA8a -") + /// ); + /// assert_eq!( + /// vec![Underscore, LowerUpper, DigitUpper, Acronym], + /// Boundary::list_from("bD:0B:_:AAa") + /// ); + /// ``` + pub fn list_from(s: &str) -> Vec { + Boundary::all().iter().filter(|boundary| { + let left_iter = s.graphemes(true); + let mid_iter = s.graphemes(true).skip(1); + let right_iter = s.graphemes(true).skip(2); + + let mut one_iter = left_iter.clone(); + + // Also capture when the previous pair was both uppercase, so we don't + // match the UpperLower boundary in the case of Acronym + let two_iter = left_iter.clone().zip(mid_iter.clone()); + let mut two_iter_and_upper = two_iter.clone() + .zip(std::iter::once(false).chain( + two_iter.map(|(a, b)| grapheme_is_uppercase(a) && grapheme_is_uppercase(b)) + )); + + let mut three_iter = left_iter.zip(mid_iter).zip(right_iter); + + one_iter.any(|a| boundary.detect_one(a)) + || two_iter_and_upper.any(|((a, b), is_acro)| boundary.detect_two(a, b) && !is_acro) + || three_iter.any(|((a, b), c)| boundary.detect_three(a, b, c)) + }).copied().collect() + } + + /// The default list of boundaries used when `Casing::to_case` is called directly + /// and in a `Converter` generated from `Converter::new()`. This includes + /// all the boundaries except the `UpperLower` boundary. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![ + /// Underscore, Hyphen, Space, LowerUpper, UpperDigit, + /// DigitUpper, DigitLower, LowerDigit, Acronym, + /// ], + /// Boundary::defaults() + /// ); + /// ``` + pub fn defaults() -> Vec { + use Boundary::*; + vec![ + Underscore, Hyphen, Space, LowerUpper, UpperDigit, DigitUpper, DigitLower, LowerDigit, + Acronym, + ] + } + + /// Returns the boundaries that split around single characters: `Hyphen`, + /// `Underscore`, and `Space`. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![Hyphen, Underscore, Space], + /// Boundary::delims() + /// ); + /// ``` + pub fn delims() -> Vec { + use Boundary::*; + vec![Hyphen, Underscore, Space] + } + + /// Returns the boundaries that involve digits: `DigitUpper`, `DigitLower`, `UpperDigit`, and + /// `LowerDigit`. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![DigitUpper, UpperDigit, DigitLower, LowerDigit], + /// Boundary::digits() + /// ); + /// ``` + pub fn digits() -> Vec { + use Boundary::*; + vec![DigitUpper, UpperDigit, DigitLower, LowerDigit] + } + + /// Returns the boundaries that are letters followed by digits: `UpperDigit` and `LowerDigit`. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![UpperDigit, LowerDigit], + /// Boundary::letter_digit() + /// ); + /// ``` + pub fn letter_digit() -> Vec { + use Boundary::*; + vec![UpperDigit, LowerDigit] + } + + /// Returns the boundaries that are digits followed by letters: `DigitUpper` and + /// `DigitLower`. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![DigitUpper, DigitLower], + /// Boundary::digit_letter() + /// ); + /// ``` + pub fn digit_letter() -> Vec { + use Boundary::*; + vec![DigitUpper, DigitLower] + } + + /// Returns all boundaries. Note that this includes the `UpperLower` variant which + /// might be unhelpful. Please look at [`Boundary::defaults`]. + /// ``` + /// use convert_case::Boundary; + /// use Boundary::*; + /// assert_eq!( + /// vec![ + /// Hyphen, Underscore, Space, LowerUpper, UpperLower, DigitUpper, + /// UpperDigit, DigitLower, LowerDigit, Acronym, + /// ], + /// Boundary::all() + /// ); + /// ``` + pub fn all() -> Vec { + use Boundary::*; + vec![ + Hyphen, Underscore, Space, LowerUpper, UpperLower, DigitUpper, UpperDigit, + DigitLower, LowerDigit, Acronym + ] + } + + fn detect_one(&self, c: &str) -> bool { + use Boundary::*; + match self { + Hyphen => c == "-", + Underscore => c == "_", + Space => c == " ", + _ => false, + } + } + + fn detect_two(&self, c: &str, d: &str) -> bool { + use Boundary::*; + match self { + UpperLower => grapheme_is_uppercase(c) && grapheme_is_lowercase(d), + LowerUpper => grapheme_is_lowercase(c) && grapheme_is_uppercase(d), + DigitUpper => grapheme_is_digit(c) && grapheme_is_uppercase(d), + UpperDigit => grapheme_is_uppercase(c) && grapheme_is_digit(d), + DigitLower => grapheme_is_digit(c) && grapheme_is_lowercase(d), + LowerDigit => grapheme_is_lowercase(c) && grapheme_is_digit(d), + _ => false, + } + } + + fn detect_three(&self, c: &str, d: &str, e: &str) -> bool { + use Boundary::*; + if let Acronym = self { + grapheme_is_uppercase(c) + && grapheme_is_uppercase(d) + && grapheme_is_lowercase(e) + } else { + false + } + } +} + +fn grapheme_is_digit(c: &str) -> bool { + c.chars().all(|c| c.is_ascii_digit()) +} + +fn grapheme_is_uppercase(c: &str) -> bool { + c.to_uppercase() != c.to_lowercase() && c == c.to_uppercase() +} + +fn grapheme_is_lowercase(c: &str) -> bool { + c.to_uppercase() != c.to_lowercase() && c == c.to_lowercase() +} + +pub fn split(s: T, boundaries: &[Boundary]) -> Vec +where + T: AsRef, +{ + use std::iter::once; + // create split_points function that counts off by graphemes into list + + let s = s.as_ref(); + + // Some means the following + // None: no split + // Some(false): split between characters + // Some(true): split consuming characters + + let left_iter = s.graphemes(true); + let mid_iter = s.graphemes(true).skip(1); + let right_iter = s.graphemes(true).skip(2); + + let singles = left_iter.clone(); + let doubles = left_iter.clone().zip(mid_iter.clone()); + let triples = left_iter.zip(mid_iter).zip(right_iter); + + let singles = singles + .map(|c| boundaries.iter().any(|b| b.detect_one(c))) + .map(|split| if split {Some(true)} else {None}); + let doubles = doubles + .map(|(c,d)| boundaries.iter().any(|b| b.detect_two(c, d))) + .map(|split| if split {Some(false)} else {None}); + let triples = triples + .map(|((c,d),e)| boundaries.iter().any(|b| b.detect_three(c, d, e))) + .map(|split| if split {Some(false)} else {None}); + + let split_points = singles + .zip(once(None).chain(doubles)) + .zip(once(None).chain(triples).chain(once(None))) + .map(|((s, d), t)| s.or(d).or(t)); + + let mut words = Vec::new(); + let mut word = String::new(); + for (c, split) in s.graphemes(true).zip(split_points) { + match split { + // no split here + None => word.push_str(c), + // split here, consume letter + Some(true) => words.push(std::mem::take(&mut word)), + // split here, keep letter + Some(false) => { + words.push(std::mem::take(&mut word)); + word.push_str(c); + } + } + } + words.push(word); + + /* + let mut words = Vec::new(); + let mut left_idx = 0; + let mut total_chars = 0; + let mut skip = 0; + let mut cur = GraphemeCursor::new(left_idx, s.len(), true); + + for (right_idx, split) in split_points.enumerate() { + match split { + // no split here + None => {}, + // split here, consume letter + Some(true) => { + let mut right_bound = left_bound; + for _ in 0..total_chars { + right_bound = cur.next_boundary(s, skip).unwrap().unwrap(); + } + words.push(&s[left_bound..right_bound]) + } + // split here, keep letter + Some(false) => { + } + // dont push an empty string, do nothing + _ => {} + } + } + */ + + words.into_iter().filter(|s| !s.is_empty()).collect() +} + +#[cfg(test)] +mod test { + use super::*; + use strum::IntoEnumIterator; + + #[test] + fn all_boundaries_in_iter() { + let all = Boundary::all(); + for boundary in Boundary::iter() { + assert!(all.contains(&boundary)); + } + } + + #[test] + fn split_on_delims() { + assert_eq!( + vec!["my", "word", "list", "separated", "by", "delims"], + split("my_word-list separated-by_delims", &Boundary::delims()) + ) + } + + #[test] + fn boundaries_found_in_string() { + use Boundary::*; + assert_eq!( + vec![UpperLower], + Boundary::list_from(".Aaaa") + ); + assert_eq!( + vec![LowerUpper, UpperLower, LowerDigit], + Boundary::list_from("a8.Aa.aA") + ); + assert_eq!( + Boundary::digits(), + Boundary::list_from("b1B1b") + ); + assert_eq!( + vec![Hyphen, Underscore, Space, Acronym], + Boundary::list_from("AAa -_") + ); + } +} diff --git a/tests/string_types.rs b/tests/string_types.rs new file mode 100644 index 0000000..5e49815 --- /dev/null +++ b/tests/string_types.rs @@ -0,0 +1,41 @@ +use convert_case::{Case, Casing}; + +// use std::ffi::{OsString}; + +#[test] +fn string_type() { + let s: String = String::from("rust_programming_language"); + assert_eq!( + "RustProgrammingLanguage", + s.to_case(Case::Pascal), + ); +} + +#[test] +fn str_type() { + let s: &str = "rust_programming_language"; + assert_eq!( + "RustProgrammingLanguage", + s.to_case(Case::Pascal), + ); +} + +#[test] +fn string_ref_type() { + let s: String = String::from("rust_programming_language"); + assert_eq!( + "RustProgrammingLanguage", + (&s).to_case(Case::Pascal), + ); +} + +/* +#[test] +fn os_string_type() { + let s: OsString = OsString::from("rust_programming_language"); + assert_eq!( + "RustProgrammingLanguage", + s.to_case(Case::Pascal), + ); +} +*/ -- 2.7.4