Import strsim 0.10.0 upstream upstream/0.10.0
authorDongHun Kwak <dh0128.kwak@samsung.com>
Tue, 4 Apr 2023 06:05:34 +0000 (15:05 +0900)
committerDongHun Kwak <dh0128.kwak@samsung.com>
Tue, 4 Apr 2023 06:05:34 +0000 (15:05 +0900)
14 files changed:
.cargo_vcs_info.json [new file with mode: 0644]
.editorconfig [new file with mode: 0644]
.gitattributes [new file with mode: 0644]
.github/FUNDING.yml [new file with mode: 0644]
.github/workflows/ci.yml [new file with mode: 0644]
.gitignore [new file with mode: 0644]
CHANGELOG.md [new file with mode: 0644]
Cargo.toml [new file with mode: 0644]
Cargo.toml.orig [new file with mode: 0644]
LICENSE [new file with mode: 0644]
README.md [new file with mode: 0644]
benches/benches.rs [new file with mode: 0644]
src/lib.rs [new file with mode: 0644]
tests/lib.rs [new file with mode: 0644]

diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644 (file)
index 0000000..7d7b408
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "git": {
+    "sha1": "72784f07f22cab2c93512a6eebca272467b1789e"
+  }
+}
diff --git a/.editorconfig b/.editorconfig
new file mode 100644 (file)
index 0000000..ee63547
--- /dev/null
@@ -0,0 +1,13 @@
+# editorconfig.org
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_brace_style = K&R
+indent_size = 4
+indent_style = space
+insert_final_newline = true
+max_line_length = 80
+trim_trailing_whitespace = true
+
diff --git a/.gitattributes b/.gitattributes
new file mode 100644 (file)
index 0000000..80cd9fe
--- /dev/null
@@ -0,0 +1,3 @@
+# Override core.autocrlf settings to keep line endings consistent (LF)
+* text=auto
+
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644 (file)
index 0000000..9ef00ab
--- /dev/null
@@ -0,0 +1,2 @@
+ko_fi: dannyguo
+custom: ['https://www.paypal.me/DannyGuo', 'https://www.buymeacoffee.com/dannyguo']
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644 (file)
index 0000000..ee3a66a
--- /dev/null
@@ -0,0 +1,24 @@
+name: CI
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: ["macos-latest", "ubuntu-latest", "windows-latest"]
+        rust: ["beta", "stable", "1.31.0"]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v1
+      - name: Install Rust
+        uses: actions-rs/toolchain@v1
+        with:
+            toolchain: ${{ matrix.rust }}
+            override: true
+      - name: Build the code
+        run: cargo build --verbose
+      - name: Test the code
+        run: cargo test --verbose
diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..41ddad1
--- /dev/null
@@ -0,0 +1,5 @@
+/target
+/Cargo.lock
+*.swp
+*.swo
+.DS_Store
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644 (file)
index 0000000..5b029ff
--- /dev/null
@@ -0,0 +1,203 @@
+# Change Log
+
+This project attempts to adhere to [Semantic Versioning](http://semver.org).
+
+## [Unreleased]
+
+## [0.10.0] - (2020-01-31)
+
+### Added
+
+- Sørensen-Dice implementation (thanks [@robjtede](https://github.com/robjtede))
+
+## [0.9.3] - (2019-12-12)
+
+### Fixed
+
+- Fix Jaro and Jaro-Winkler when the arguments have lengths of 1 and are equal.
+  Previously, the functions would erroneously return 0 instead of 1. Thanks to
+  [@vvrably](https://github.com/vvrably) for pointing out the issue.
+
+## [0.9.2] - (2019-05-09)
+
+### Changed
+
+- Revert back to the standard library hashmap because it will use hashbrown very
+  soon
+- Remove ndarray in favor of using a single vector to represent the 2d grid in
+  Damerau-Levenshtein
+
+## [0.9.1] - (2019-04-08)
+
+### Changed
+
+- Faster Damerau-Levenshtein implementation (thanks [@lovasoa](https://github.com/lovasoa))
+
+## [0.9.0] - (2019-04-06)
+
+### Added
+
+- Generic distance functions (thanks [@lovasoa](https://github.com/lovasoa))
+
+## [0.8.0] - (2018-08-19)
+
+### Added
+
+- Normalized versions of Levenshtein and Damerau-Levenshtein (thanks [@gentoid](https://github.com/gentoid))
+
+## [0.7.0] - (2018-01-17)
+
+### Changed
+
+- Faster Levenshtein implementation (thanks [@wdv4758h](https://github.com/wdv4758h))
+
+### Removed
+
+- Remove the "against_vec" functions. They are one-liners now, so they don't
+  seem to add enough value to justify making the API larger. I didn't find
+  anybody using them when I skimmed through a GitHub search. If you do use them,
+  you can change the calls to something like:
+```rust
+let distances = strings.iter().map(|a| jaro(target, a)).collect();
+```
+
+## [0.6.0] - (2016-12-26)
+
+### Added
+
+- Add optimal string alignment distance
+
+### Fixed
+
+- Fix Damerau-Levenshtein implementation (previous implementation was actually
+  optimal string alignment; see this [Damerau-Levenshtein explanation])
+
+## [0.5.2] - (2016-11-21)
+
+### Changed
+
+- Remove Cargo generated documentation in favor of a [docs.rs] link
+
+## [0.5.1] - (2016-08-23)
+
+### Added
+
+- Add Cargo generated documentation
+
+### Fixed
+
+- Fix panic when Jaro or Jaro-Winkler are given strings both with a length of
+  one
+
+## [0.5.0] - (2016-08-11)
+
+### Changed
+
+- Make Hamming faster (thanks @IBUzPE9) when the two strings have the same
+  length but slower when they have different lengths
+
+## [0.4.1] - (2016-04-18)
+
+### Added
+
+- Add Vagrant setup for development
+- Add AppVeyor configuration for Windows CI
+
+### Fixed
+
+- Fix metrics when given strings with multibyte characters (thanks @WanzenBug)
+
+## [0.4.0] - (2015-06-10)
+
+### Added
+
+- For each metric, add a function that takes a vector of strings and returns a
+vector of results (thanks @ovarene)
+
+## [0.3.0] - (2015-04-30)
+
+### Changed
+
+- Remove usage of unstable Rust features
+
+## [0.2.5] - (2015-04-24)
+
+### Fixed
+
+- Remove unnecessary `Float` import from doc tests
+
+## [0.2.4] - (2015-04-15)
+
+### Fixed
+
+- Remove unused `core` feature flag
+
+## [0.2.3] - (2015-04-01)
+
+### Fixed
+
+- Remove now unnecessary `Float` import
+
+## [0.2.2] - (2015-03-29)
+
+### Fixed
+
+- Remove usage of `char_at` (marked as unstable)
+
+## [0.2.1] - (2015-02-20)
+
+### Fixed
+
+- Update bit vector import to match Rust update
+
+## [0.2.0] - (2015-02-19)
+
+### Added
+
+- Implement Damerau-Levenshtein
+- Add tests in docs
+
+## [0.1.1] - (2015-02-10)
+
+### Added
+
+- Configure Travis for CI
+- Add rustdoc comments
+
+### Fixed
+
+- Limit Jaro-Winkler return value to a maximum of 1.0
+- Fix float comparisons in tests
+
+## [0.1.0] - (2015-02-09)
+
+### Added
+
+- Implement Hamming, Jaro, Jaro-Winkler, and Levenshtein
+
+[Unreleased]: https://github.com/dguo/strsim-rs/compare/0.10.0...HEAD
+[0.10.0]: https://github.com/dguo/strsim-rs/compare/0.9.3...0.10.0
+[0.9.3]: https://github.com/dguo/strsim-rs/compare/0.9.2...0.9.3
+[0.9.2]: https://github.com/dguo/strsim-rs/compare/0.9.1...0.9.2
+[0.9.1]: https://github.com/dguo/strsim-rs/compare/0.9.0...0.9.1
+[0.9.0]: https://github.com/dguo/strsim-rs/compare/0.8.0...0.9.0
+[0.8.0]: https://github.com/dguo/strsim-rs/compare/0.7.0...0.8.0
+[0.7.0]: https://github.com/dguo/strsim-rs/compare/0.6.0...0.7.0
+[0.6.0]: https://github.com/dguo/strsim-rs/compare/0.5.2...0.6.0
+[0.5.2]: https://github.com/dguo/strsim-rs/compare/0.5.1...0.5.2
+[0.5.1]: https://github.com/dguo/strsim-rs/compare/0.5.0...0.5.1
+[0.5.0]: https://github.com/dguo/strsim-rs/compare/0.4.1...0.5.0
+[0.4.1]: https://github.com/dguo/strsim-rs/compare/0.4.0...0.4.1
+[0.4.0]: https://github.com/dguo/strsim-rs/compare/0.3.0...0.4.0
+[0.3.0]: https://github.com/dguo/strsim-rs/compare/0.2.5...0.3.0
+[0.2.5]: https://github.com/dguo/strsim-rs/compare/0.2.4...0.2.5
+[0.2.4]: https://github.com/dguo/strsim-rs/compare/0.2.3...0.2.4
+[0.2.3]: https://github.com/dguo/strsim-rs/compare/0.2.2...0.2.3
+[0.2.2]: https://github.com/dguo/strsim-rs/compare/0.2.1...0.2.2
+[0.2.1]: https://github.com/dguo/strsim-rs/compare/0.2.0...0.2.1
+[0.2.0]: https://github.com/dguo/strsim-rs/compare/0.1.1...0.2.0
+[0.1.1]: https://github.com/dguo/strsim-rs/compare/0.1.0...0.1.1
+[0.1.0]: https://github.com/dguo/strsim-rs/compare/fabad4...0.1.0
+[docs.rs]: https://docs.rs/strsim/
+[Damerau-Levenshtein explanation]:
+http://scarcitycomputing.blogspot.com/2013/04/damerau-levenshtein-edit-distance.html
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644 (file)
index 0000000..aca741a
--- /dev/null
@@ -0,0 +1,24 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "strsim"
+version = "0.10.0"
+authors = ["Danny Guo <danny@dannyguo.com>"]
+exclude = ["/.github", "/dev"]
+description = "Implementations of string similarity metrics. Includes Hamming, Levenshtein,\nOSA, Damerau-Levenshtein, Jaro, Jaro-Winkler, and Sørensen-Dice.\n"
+homepage = "https://github.com/dguo/strsim-rs"
+documentation = "https://docs.rs/strsim/"
+readme = "README.md"
+keywords = ["string", "similarity", "Hamming", "Levenshtein", "Jaro"]
+license = "MIT"
+repository = "https://github.com/dguo/strsim-rs"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644 (file)
index 0000000..426210c
--- /dev/null
@@ -0,0 +1,15 @@
+[package]
+name = "strsim"
+version = "0.10.0"
+authors = ["Danny Guo <danny@dannyguo.com>"]
+description = """
+Implementations of string similarity metrics. Includes Hamming, Levenshtein,
+OSA, Damerau-Levenshtein, Jaro, Jaro-Winkler, and Sørensen-Dice.
+"""
+license = "MIT"
+readme = "README.md"
+keywords = ["string", "similarity", "Hamming", "Levenshtein", "Jaro"]
+homepage = "https://github.com/dguo/strsim-rs"
+repository = "https://github.com/dguo/strsim-rs"
+documentation = "https://docs.rs/strsim/"
+exclude = ["/.github", "/dev"]
diff --git a/LICENSE b/LICENSE
new file mode 100644 (file)
index 0000000..8d1fbe1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Danny Guo
+Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
+Copyright (c) 2018 Akash Kurdekar
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..d8c9780
--- /dev/null
+++ b/README.md
@@ -0,0 +1,102 @@
+# strsim-rs
+
+[![Crates.io](https://img.shields.io/crates/v/strsim.svg)](https://crates.io/crates/strsim)
+[![Crates.io](https://img.shields.io/crates/l/strsim.svg?maxAge=2592000)](https://github.com/dguo/strsim-rs/blob/master/LICENSE)
+[![CI status](https://github.com/dguo/strsim-rs/workflows/CI/badge.svg)](https://github.com/dguo/strsim-rs/actions?query=branch%3Amaster)
+[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
+
+[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]:
+  - [Hamming]
+  - [Levenshtein] - distance & normalized
+  - [Optimal string alignment]
+  - [Damerau-Levenshtein] - distance & normalized
+  - [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length
+  - [Sørensen-Dice]
+
+The normalized versions return values between `0.0` and `1.0`, where `1.0` means
+an exact match.
+
+There are also generic versions of the functions for non-string inputs.
+
+## Installation
+
+`strsim` is available on [crates.io](https://crates.io/crates/strsim). Add it to
+your `Cargo.toml`:
+```toml
+[dependencies]
+strsim = "0.10.0"
+```
+
+## Usage
+
+Go to [Docs.rs](https://docs.rs/strsim/) for the full documentation. You can
+also clone the repo, and run `$ cargo doc --open`.
+
+### Examples
+
+```rust
+extern crate strsim;
+
+use strsim::{hamming, levenshtein, normalized_levenshtein, osa_distance,
+             damerau_levenshtein, normalized_damerau_levenshtein, jaro,
+             jaro_winkler, sorensen_dice};
+
+fn main() {
+    match hamming("hamming", "hammers") {
+        Ok(distance) => assert_eq!(3, distance),
+        Err(why) => panic!("{:?}", why)
+    }
+
+    assert_eq!(levenshtein("kitten", "sitting"), 3);
+
+    assert!((normalized_levenshtein("kitten", "sitting") - 0.571).abs() < 0.001);
+
+    assert_eq!(osa_distance("ac", "cba"), 3);
+
+    assert_eq!(damerau_levenshtein("ac", "cba"), 2);
+
+    assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.272).abs() <
+            0.001);
+
+    assert!((jaro("Friedrich Nietzsche", "Jean-Paul Sartre") - 0.392).abs() <
+            0.001);
+
+    assert!((jaro_winkler("cheeseburger", "cheese fries") - 0.911).abs() <
+            0.001);
+
+    assert_eq!(sorensen_dice("web applications", "applications of the web"),
+        0.7878787878787878);
+}
+```
+
+Using the generic versions of the functions:
+
+```rust
+extern crate strsim;
+
+use strsim::generic_levenshtein;
+
+fn main() {
+    assert_eq!(2, generic_levenshtein(&[1, 2, 3], &[0, 2, 5]));
+}
+```
+
+## Contributing
+
+If you don't want to install Rust itself, you can run `$ ./dev` for a
+development CLI if you have [Docker] installed.
+
+Benchmarks require a Nightly toolchain. Run `$ cargo +nightly bench`.
+
+## License
+
+[MIT](https://github.com/dguo/strsim-rs/blob/master/LICENSE)
+
+[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric
+[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
+[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance
+[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance
+[Optimal string alignment]:https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
+[Sørensen-Dice]:http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
+[Docker]:https://docs.docker.com/engine/installation/
diff --git a/benches/benches.rs b/benches/benches.rs
new file mode 100644 (file)
index 0000000..b7ba829
--- /dev/null
@@ -0,0 +1,100 @@
+//! Benchmarks for strsim.
+
+#![feature(test)]
+
+extern crate strsim;
+
+mod benches {
+    use self::test::Bencher;
+    use super::*;
+
+    extern crate test;
+
+    #[bench]
+    fn bench_hamming(bencher: &mut Bencher) {
+        let a = "ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGG";
+        let b = "CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGC";
+        bencher.iter(|| {
+            strsim::hamming(a, b).unwrap();
+        })
+    }
+
+    #[bench]
+    fn bench_jaro(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::jaro(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_jaro_winkler(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::jaro_winkler(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_levenshtein(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::levenshtein(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_levenshtein_on_u8(bencher: &mut Bencher) {
+        bencher.iter(|| {
+            strsim::generic_levenshtein(&vec![0u8; 30], &vec![7u8; 31]);
+        })
+    }
+
+    #[bench]
+    fn bench_normalized_levenshtein(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::normalized_levenshtein(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_osa_distance(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::osa_distance(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_damerau_levenshtein(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::damerau_levenshtein(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_normalized_damerau_levenshtein(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::normalized_damerau_levenshtein(&a, &b);
+        })
+    }
+
+    #[bench]
+    fn bench_sorensen_dice(bencher: &mut Bencher) {
+        let a = "Philosopher Friedrich Nietzsche";
+        let b = "Philosopher Jean-Paul Sartre";
+        bencher.iter(|| {
+            strsim::sorensen_dice(&a, &b);
+        })
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644 (file)
index 0000000..480a642
--- /dev/null
@@ -0,0 +1,1005 @@
+//! This library implements string similarity metrics.
+
+#![forbid(unsafe_code)]
+
+use std::char;
+use std::cmp::{max, min};
+use std::collections::HashMap;
+use std::error::Error;
+use std::fmt::{self, Display, Formatter};
+use std::hash::Hash;
+use std::str::Chars;
+
+#[derive(Debug, PartialEq)]
+pub enum StrSimError {
+    DifferentLengthArgs,
+}
+
+impl Display for StrSimError {
+    fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
+        let text = match self {
+            StrSimError::DifferentLengthArgs => "Differing length arguments provided",
+        };
+
+        write!(fmt, "{}", text)
+    }
+}
+
+impl Error for StrSimError {}
+
+pub type HammingResult = Result<usize, StrSimError>;
+
+/// Calculates the number of positions in the two sequences where the elements
+/// differ. Returns an error if the sequences have different lengths.
+pub fn generic_hamming<Iter1, Iter2, Elem1, Elem2>(a: Iter1, b: Iter2) -> HammingResult
+    where Iter1: IntoIterator<Item=Elem1>,
+          Iter2: IntoIterator<Item=Elem2>,
+          Elem1: PartialEq<Elem2> {
+    let (mut ita, mut itb) = (a.into_iter(), b.into_iter());
+    let mut count = 0;
+    loop {
+        match (ita.next(), itb.next()){
+            (Some(x), Some(y)) => if x != y { count += 1 },
+            (None, None) => return Ok(count),
+            _ => return Err(StrSimError::DifferentLengthArgs),
+        }
+    }
+}
+
+/// Calculates the number of positions in the two strings where the characters
+/// differ. Returns an error if the strings have different lengths.
+///
+/// ```
+/// use strsim::{hamming, StrSimError::DifferentLengthArgs};
+///
+/// assert_eq!(Ok(3), hamming("hamming", "hammers"));
+///
+/// assert_eq!(Err(DifferentLengthArgs), hamming("hamming", "ham"));
+/// ```
+pub fn hamming(a: &str, b: &str) -> HammingResult {
+    generic_hamming(a.chars(), b.chars())
+}
+
+/// Calculates the Jaro similarity between two sequences. The returned value
+/// is between 0.0 and 1.0 (higher value means more similar).
+pub fn generic_jaro<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
+    where &'a Iter1: IntoIterator<Item=Elem1>,
+          &'b Iter2: IntoIterator<Item=Elem2>,
+          Elem1: PartialEq<Elem2> {
+    let a_len = a.into_iter().count();
+    let b_len = b.into_iter().count();
+
+    // The check for lengths of one here is to prevent integer overflow when
+    // calculating the search range.
+    if a_len == 0 && b_len == 0 {
+        return 1.0;
+    } else if a_len == 0 || b_len == 0 {
+        return 0.0;
+    } else if a_len == 1 && b_len == 1 {
+        return if a.into_iter().eq(b.into_iter()) { 1.0} else { 0.0 };
+    }
+
+    let search_range = (max(a_len, b_len) / 2) - 1;
+
+    let mut b_consumed = Vec::with_capacity(b_len);
+    for _ in 0..b_len {
+        b_consumed.push(false);
+    }
+    let mut matches = 0.0;
+
+    let mut transpositions = 0.0;
+    let mut b_match_index = 0;
+
+    for (i, a_elem) in a.into_iter().enumerate() {
+        let min_bound =
+            // prevent integer wrapping
+            if i > search_range {
+                max(0, i - search_range)
+            } else {
+                0
+            };
+
+        let max_bound = min(b_len - 1, i + search_range);
+
+        if min_bound > max_bound {
+            continue;
+        }
+
+        for (j, b_elem) in b.into_iter().enumerate() {
+            if min_bound <= j && j <= max_bound && a_elem == b_elem &&
+                !b_consumed[j] {
+                b_consumed[j] = true;
+                matches += 1.0;
+
+                if j < b_match_index {
+                    transpositions += 1.0;
+                }
+                b_match_index = j;
+
+                break;
+            }
+        }
+    }
+
+    if matches == 0.0 {
+        0.0
+    } else {
+        (1.0 / 3.0) * ((matches / a_len as f64) +
+            (matches / b_len as f64) +
+            ((matches - transpositions) / matches))
+    }
+}
+
+struct StringWrapper<'a>(&'a str);
+
+impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
+    type Item = char;
+    type IntoIter = Chars<'b>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.chars()
+    }
+}
+
+/// Calculates the Jaro similarity between two strings. The returned value
+/// is between 0.0 and 1.0 (higher value means more similar).
+///
+/// ```
+/// use strsim::jaro;
+///
+/// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
+///         0.001);
+/// ```
+pub fn jaro(a: &str, b: &str) -> f64 {
+    generic_jaro(&StringWrapper(a), &StringWrapper(b))
+}
+
+/// Like Jaro but gives a boost to sequences that have a common prefix.
+pub fn generic_jaro_winkler<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
+    where &'a Iter1: IntoIterator<Item=Elem1>,
+          &'b Iter2: IntoIterator<Item=Elem2>,
+          Elem1: PartialEq<Elem2> {
+    let jaro_distance = generic_jaro(a, b);
+
+    // Don't limit the length of the common prefix
+    let prefix_length = a.into_iter()
+        .zip(b.into_iter())
+        .take_while(|&(ref a_elem, ref b_elem)| a_elem == b_elem)
+        .count();
+
+    let jaro_winkler_distance =
+        jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));
+
+    if jaro_winkler_distance <= 1.0 {
+        jaro_winkler_distance
+    } else {
+        1.0
+    }
+}
+
+/// Like Jaro but gives a boost to strings that have a common prefix.
+///
+/// ```
+/// use strsim::jaro_winkler;
+///
+/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+///         0.001);
+/// ```
+pub fn jaro_winkler(a: &str, b: &str) -> f64 {
+    generic_jaro_winkler(&StringWrapper(a), &StringWrapper(b))
+}
+
+/// Calculates the minimum number of insertions, deletions, and substitutions
+/// required to change one sequence into the other.
+///
+/// ```
+/// use strsim::generic_levenshtein;
+///
+/// assert_eq!(3, generic_levenshtein(&[1,2,3], &[1,2,3,4,5,6]));
+/// ```
+pub fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> usize
+    where &'a Iter1: IntoIterator<Item=Elem1>,
+          &'b Iter2: IntoIterator<Item=Elem2>,
+          Elem1: PartialEq<Elem2> {
+    let b_len = b.into_iter().count();
+
+    if a.into_iter().next().is_none() { return b_len; }
+
+    let mut cache: Vec<usize> = (1..b_len+1).collect();
+
+    let mut result = 0;
+
+    for (i, a_elem) in a.into_iter().enumerate() {
+        result = i + 1;
+        let mut distance_b = i;
+
+        for (j, b_elem) in b.into_iter().enumerate() {
+            let cost = if a_elem == b_elem { 0usize } else { 1usize };
+            let distance_a = distance_b + cost;
+            distance_b = cache[j];
+            result = min(result + 1, min(distance_a, distance_b + 1));
+            cache[j] = result;
+        }
+    }
+
+    result
+}
+
+/// Calculates the minimum number of insertions, deletions, and substitutions
+/// required to change one string into the other.
+///
+/// ```
+/// use strsim::levenshtein;
+///
+/// assert_eq!(3, levenshtein("kitten", "sitting"));
+/// ```
+pub fn levenshtein(a: &str, b: &str) -> usize {
+    generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
+}
+
+/// Calculates a normalized score of the Levenshtein algorithm between 0.0 and
+/// 1.0 (inclusive), where 1.0 means the strings are the same.
+///
+/// ```
+/// use strsim::normalized_levenshtein;
+///
+/// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
+/// assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
+/// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
+/// assert!(normalized_levenshtein("first", "").abs() < 0.00001);
+/// assert!((normalized_levenshtein("string", "string") - 1.0).abs() < 0.00001);
+/// ```
+pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
+    if a.is_empty() && b.is_empty() {
+        return 1.0;
+    }
+    1.0 - (levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
+}
+
+/// Like Levenshtein but allows for adjacent transpositions. Each substring can
+/// only be edited once.
+///
+/// ```
+/// use strsim::osa_distance;
+///
+/// assert_eq!(3, osa_distance("ab", "bca"));
+/// ```
+pub fn osa_distance(a: &str, b: &str) -> usize {
+    let a_len = a.chars().count();
+    let b_len = b.chars().count();
+    if a == b { return 0; }
+    else if a_len == 0 { return b_len; }
+    else if b_len == 0 { return a_len; }
+
+    let mut prev_two_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
+    let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
+    let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
+
+    let mut prev_a_char = char::MAX;
+    let mut prev_b_char = char::MAX;
+
+    for i in 0..(b_len + 1) {
+        prev_two_distances.push(i);
+        prev_distances.push(i);
+        curr_distances.push(0);
+    }
+
+    for (i, a_char) in a.chars().enumerate() {
+        curr_distances[0] = i + 1;
+
+        for (j, b_char) in b.chars().enumerate() {
+            let cost = if a_char == b_char { 0 } else { 1 };
+            curr_distances[j + 1] = min(curr_distances[j] + 1,
+                                        min(prev_distances[j + 1] + 1,
+                                            prev_distances[j] + cost));
+            if i > 0 && j > 0 && a_char != b_char &&
+                a_char == prev_b_char && b_char == prev_a_char {
+                curr_distances[j + 1] = min(curr_distances[j + 1],
+                                            prev_two_distances[j - 1] + 1);
+            }
+
+            prev_b_char = b_char;
+        }
+
+        prev_two_distances.clone_from(&prev_distances);
+        prev_distances.clone_from(&curr_distances);
+        prev_a_char = a_char;
+    }
+
+    curr_distances[b_len]
+
+}
+
+/* Returns the final index for a value in a single vector that represents a fixed
+   2d grid */
+fn flat_index(i: usize, j: usize, width: usize) -> usize {
+    j * width + i
+}
+
+/// Like optimal string alignment, but substrings can be edited an unlimited
+/// number of times, and the triangle inequality holds.
+///
+/// ```
+/// use strsim::generic_damerau_levenshtein;
+///
+/// assert_eq!(2, generic_damerau_levenshtein(&[1,2], &[2,3,1]));
+/// ```
+pub fn generic_damerau_levenshtein<Elem>(a_elems: &[Elem], b_elems: &[Elem]) -> usize
+    where Elem: Eq + Hash + Clone {
+    let a_len = a_elems.len();
+    let b_len = b_elems.len();
+
+    if a_len == 0 { return b_len; }
+    if b_len == 0 { return a_len; }
+
+    let width = a_len + 2;
+    let mut distances = vec![0; (a_len + 2) * (b_len + 2)];
+    let max_distance = a_len + b_len;
+    distances[0] = max_distance;
+
+    for i in 0..(a_len + 1) {
+        distances[flat_index(i + 1, 0, width)] = max_distance;
+        distances[flat_index(i + 1, 1, width)] = i;
+    }
+
+    for j in 0..(b_len + 1) {
+        distances[flat_index(0, j + 1, width)] = max_distance;
+        distances[flat_index(1, j + 1, width)] = j;
+    }
+
+    let mut elems: HashMap<Elem, usize> = HashMap::with_capacity(64);
+
+    for i in 1..(a_len + 1) {
+        let mut db = 0;
+
+        for j in 1..(b_len + 1) {
+            let k = match elems.get(&b_elems[j - 1]) {
+                Some(&value) => value,
+                None => 0
+            };
+
+            let insertion_cost = distances[flat_index(i, j + 1, width)] + 1;
+            let deletion_cost = distances[flat_index(i + 1, j, width)] + 1;
+            let transposition_cost = distances[flat_index(k, db, width)] +
+                (i - k - 1) + 1 + (j - db - 1);
+
+            let mut substitution_cost = distances[flat_index(i, j, width)] + 1;
+            if a_elems[i - 1] == b_elems[j - 1] {
+                db = j;
+                substitution_cost -= 1;
+            }
+
+            distances[flat_index(i + 1, j + 1, width)] = min(substitution_cost,
+                min(insertion_cost, min(deletion_cost, transposition_cost)));
+        }
+
+        elems.insert(a_elems[i - 1].clone(), i);
+    }
+
+    distances[flat_index(a_len + 1, b_len + 1, width)]
+}
+
+/// Like optimal string alignment, but substrings can be edited an unlimited
+/// number of times, and the triangle inequality holds.
+///
+/// ```
+/// use strsim::damerau_levenshtein;
+///
+/// assert_eq!(2, damerau_levenshtein("ab", "bca"));
+/// ```
+pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
+    let (x, y): (Vec<_>, Vec<_>) = (a.chars().collect(), b.chars().collect());
+    generic_damerau_levenshtein(x.as_slice(), y.as_slice())
+}
+
+/// Calculates a normalized score of the Damerau–Levenshtein algorithm between
+/// 0.0 and 1.0 (inclusive), where 1.0 means the strings are the same.
+///
+/// ```
+/// use strsim::normalized_damerau_levenshtein;
+///
+/// assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
+/// assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
+/// assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
+/// assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
+/// assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
+/// ```
+pub fn normalized_damerau_levenshtein(a: &str, b: &str) -> f64 {
+    if a.is_empty() && b.is_empty() {
+        return 1.0;
+    }
+    1.0 - (damerau_levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
+}
+
+/// Returns an Iterator of char tuples.
+fn bigrams(s: &str) -> impl Iterator<Item=(char, char)> + '_ {
+    s.chars().zip(s.chars().skip(1))
+}
+
+
+/// Calculates a Sørensen-Dice similarity distance using bigrams.
+/// See http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient.
+///
+/// ```
+/// use strsim::sorensen_dice;
+///
+/// assert_eq!(1.0, sorensen_dice("", ""));
+/// assert_eq!(0.0, sorensen_dice("", "a"));
+/// assert_eq!(0.0, sorensen_dice("french", "quebec"));
+/// assert_eq!(1.0, sorensen_dice("ferris", "ferris"));
+/// assert_eq!(1.0, sorensen_dice("ferris", "ferris"));
+/// assert_eq!(0.8888888888888888, sorensen_dice("feris", "ferris"));
+/// ```
+pub fn sorensen_dice(a: &str, b: &str) -> f64 {
+    // implementation guided by
+    // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/index.js#L6
+
+    let a: String = a.chars().filter(|&x| !char::is_whitespace(x)).collect();
+    let b: String = b.chars().filter(|&x| !char::is_whitespace(x)).collect();
+
+    if a.len() == 0 && b.len() == 0 {
+        return 1.0;
+    }
+
+    if a.len() == 0 || b.len() == 0 {
+        return 0.0;
+    }
+
+    if a == b {
+        return 1.0;
+    }
+
+    if a.len() == 1 && b.len() == 1 {
+        return 0.0;
+    }
+
+    if a.len() < 2 || b.len() < 2 {
+        return 0.0;
+    }
+
+    let mut a_bigrams: HashMap<(char, char), usize> = HashMap::new();
+
+    for bigram in bigrams(&a) {
+        *a_bigrams.entry(bigram).or_insert(0) += 1;
+    }
+
+    let mut intersection_size = 0;
+
+    for bigram in bigrams(&b) {
+        a_bigrams.entry(bigram).and_modify(|bi| {
+            if *bi > 0 {
+                *bi -= 1;
+                intersection_size += 1;
+            }
+        });
+    }
+
+    (2 * intersection_size) as f64 / (a.len() + b.len() - 2) as f64
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn bigrams_iterator() {
+        let mut bi = bigrams("abcde");
+
+        assert_eq!(Some(('a', 'b')), bi.next());
+        assert_eq!(Some(('b', 'c')), bi.next());
+        assert_eq!(Some(('c', 'd')), bi.next());
+        assert_eq!(Some(('d', 'e')), bi.next());
+        assert_eq!(None, bi.next());
+    }
+
+    fn assert_hamming_dist(dist: usize, str1: &str, str2: &str) {
+        assert_eq!(Ok(dist), hamming(str1, str2));
+    }
+
+    #[test]
+    fn hamming_empty() {
+        assert_hamming_dist(0, "", "")
+    }
+
+    #[test]
+    fn hamming_same() {
+        assert_hamming_dist(0, "hamming", "hamming")
+    }
+
+    #[test]
+    fn hamming_numbers() {
+        assert_eq!(Ok(1), generic_hamming(&[1, 2, 4], &[1, 2, 3]));
+    }
+
+    #[test]
+    fn hamming_diff() {
+        assert_hamming_dist(3, "hamming", "hammers")
+    }
+
+    #[test]
+    fn hamming_diff_multibyte() {
+        assert_hamming_dist(2, "hamming", "h香mmüng");
+    }
+
+    #[test]
+    fn hamming_unequal_length() {
+        assert_eq!(
+            Err(StrSimError::DifferentLengthArgs),
+            generic_hamming("ham".chars(), "hamming".chars())
+        );
+    }
+
+    #[test]
+    fn hamming_names() {
+        assert_hamming_dist(14, "Friedrich Nietzs", "Jean-Paul Sartre")
+    }
+
+    #[test]
+    fn jaro_both_empty() {
+        assert_eq!(1.0, jaro("", ""));
+    }
+
+    #[test]
+    fn jaro_first_empty() {
+        assert_eq!(0.0, jaro("", "jaro"));
+    }
+
+    #[test]
+    fn jaro_second_empty() {
+        assert_eq!(0.0, jaro("distance", ""));
+    }
+
+    #[test]
+    fn jaro_same() {
+        assert_eq!(1.0, jaro("jaro", "jaro"));
+    }
+
+    #[test]
+    fn jaro_multibyte() {
+        assert!((0.818 - jaro("testabctest", "testöঙ香test")) < 0.001);
+        assert!((0.818 - jaro("testöঙ香test", "testabctest")) < 0.001);
+    }
+
+    #[test]
+    fn jaro_diff_short() {
+        assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_diff_one_character() {
+        assert_eq!(0.0, jaro("a", "b"));
+    }
+
+    #[test]
+    fn jaro_same_one_character() {
+        assert_eq!(1.0, jaro("a", "a"));
+    }
+
+    #[test]
+    fn generic_jaro_diff() {
+        assert_eq!(0.0, generic_jaro(&[1, 2], &[3, 4]));
+    }
+
+    #[test]
+    fn jaro_diff_one_and_two() {
+        assert!((0.83 - jaro("a", "ab")).abs() < 0.01);
+    }
+
+    #[test]
+    fn jaro_diff_two_and_one() {
+        assert!((0.83 - jaro("ab", "a")).abs() < 0.01);
+    }
+
+    #[test]
+    fn jaro_diff_no_transposition() {
+        assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_diff_with_transposition() {
+        assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_names() {
+        assert!((0.392 - jaro("Friedrich Nietzsche",
+                              "Jean-Paul Sartre")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_both_empty() {
+        assert_eq!(1.0, jaro_winkler("", ""));
+    }
+
+    #[test]
+    fn jaro_winkler_first_empty() {
+        assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
+    }
+
+    #[test]
+    fn jaro_winkler_second_empty() {
+        assert_eq!(0.0, jaro_winkler("distance", ""));
+    }
+
+    #[test]
+    fn jaro_winkler_same() {
+        assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
+    }
+
+    #[test]
+    fn jaro_winkler_multibyte() {
+        assert!((0.89 - jaro_winkler("testabctest", "testöঙ香test")).abs() <
+            0.001);
+        assert!((0.89 - jaro_winkler("testöঙ香test", "testabctest")).abs() <
+            0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_diff_short() {
+        assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001);
+        assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_diff_one_character() {
+        assert_eq!(0.0, jaro_winkler("a", "b"));
+    }
+
+    #[test]
+    fn jaro_winkler_same_one_character() {
+        assert_eq!(1.0, jaro_winkler("a", "a"));
+    }
+
+    #[test]
+    fn jaro_winkler_diff_no_transposition() {
+        assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_diff_with_transposition() {
+        assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_names() {
+        assert!((0.562 - jaro_winkler("Friedrich Nietzsche",
+                                      "Fran-Paul Sartre")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_long_prefix() {
+        assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+            0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_more_names() {
+        assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_length_of_one() {
+        assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001);
+    }
+
+    #[test]
+    fn jaro_winkler_very_long_prefix() {
+        assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
+                                    "thequickbrownfoxjumpedovery")).abs() <
+            0.001);
+    }
+
+    #[test]
+    fn levenshtein_empty() {
+        assert_eq!(0, levenshtein("", ""));
+    }
+
+    #[test]
+    fn levenshtein_same() {
+        assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
+    }
+
+    #[test]
+    fn levenshtein_diff_short() {
+        assert_eq!(3, levenshtein("kitten", "sitting"));
+    }
+
+    #[test]
+    fn levenshtein_diff_with_space() {
+        assert_eq!(5, levenshtein("hello, world", "bye, world"));
+    }
+
+    #[test]
+    fn levenshtein_diff_multibyte() {
+        assert_eq!(3, levenshtein("öঙ香", "abc"));
+        assert_eq!(3, levenshtein("abc", "öঙ香"));
+    }
+
+    #[test]
+    fn levenshtein_diff_longer() {
+        let a = "The quick brown fox jumped over the angry dog.";
+        let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
+        assert_eq!(37, levenshtein(a, b));
+    }
+
+    #[test]
+    fn levenshtein_first_empty() {
+        assert_eq!(7, levenshtein("", "sitting"));
+    }
+
+    #[test]
+    fn levenshtein_second_empty() {
+        assert_eq!(6, levenshtein("kitten", ""));
+    }
+
+    #[test]
+    fn normalized_levenshtein_diff_short() {
+        assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_levenshtein_for_empty_strings() {
+        assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_levenshtein_first_empty() {
+        assert!(normalized_levenshtein("", "second").abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_levenshtein_second_empty() {
+        assert!(normalized_levenshtein("first", "").abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_levenshtein_identical_strings() {
+        assert!((normalized_levenshtein("identical", "identical") - 1.0).abs() < 0.00001);
+    }
+
+    #[test]
+    fn osa_distance_empty() {
+        assert_eq!(0, osa_distance("", ""));
+    }
+
+    #[test]
+    fn osa_distance_same() {
+        assert_eq!(0, osa_distance("damerau", "damerau"));
+    }
+
+    #[test]
+    fn osa_distance_first_empty() {
+        assert_eq!(7, osa_distance("", "damerau"));
+    }
+
+    #[test]
+    fn osa_distance_second_empty() {
+        assert_eq!(7, osa_distance("damerau", ""));
+    }
+
+    #[test]
+    fn osa_distance_diff() {
+        assert_eq!(3, osa_distance("ca", "abc"));
+    }
+
+    #[test]
+    fn osa_distance_diff_short() {
+        assert_eq!(3, osa_distance("damerau", "aderua"));
+    }
+
+    #[test]
+    fn osa_distance_diff_reversed() {
+        assert_eq!(3, osa_distance("aderua", "damerau"));
+    }
+
+    #[test]
+    fn osa_distance_diff_multibyte() {
+        assert_eq!(3, osa_distance("öঙ香", "abc"));
+        assert_eq!(3, osa_distance("abc", "öঙ香"));
+    }
+
+    #[test]
+    fn osa_distance_diff_unequal_length() {
+        assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
+    }
+
+    #[test]
+    fn osa_distance_diff_unequal_length_reversed() {
+        assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
+    }
+
+    #[test]
+    fn osa_distance_diff_comedians() {
+        assert_eq!(5, osa_distance("Stewart", "Colbert"));
+    }
+
+    #[test]
+    fn osa_distance_many_transpositions() {
+        assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
+    }
+
+    #[test]
+    fn osa_distance_diff_longer() {
+        let a = "The quick brown fox jumped over the angry dog.";
+        let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
+        assert_eq!(36, osa_distance(a, b));
+    }
+
+    #[test]
+    fn osa_distance_beginning_transposition() {
+        assert_eq!(1, osa_distance("foobar", "ofobar"));
+    }
+
+    #[test]
+    fn osa_distance_end_transposition() {
+        assert_eq!(1, osa_distance("specter", "spectre"));
+    }
+
+    #[test]
+    fn osa_distance_restricted_edit() {
+        assert_eq!(4, osa_distance("a cat", "an abct"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_empty() {
+        assert_eq!(0, damerau_levenshtein("", ""));
+    }
+
+    #[test]
+    fn damerau_levenshtein_same() {
+        assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_first_empty() {
+        assert_eq!(7, damerau_levenshtein("", "damerau"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_second_empty() {
+        assert_eq!(7, damerau_levenshtein("damerau", ""));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff() {
+        assert_eq!(2, damerau_levenshtein("ca", "abc"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_short() {
+        assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_reversed() {
+        assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_multibyte() {
+        assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
+        assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_unequal_length() {
+        assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_unequal_length_reversed() {
+        assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_comedians() {
+        assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_many_transpositions() {
+        assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_diff_longer() {
+        let a = "The quick brown fox jumped over the angry dog.";
+        let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
+        assert_eq!(36, damerau_levenshtein(a, b));
+    }
+
+    #[test]
+    fn damerau_levenshtein_beginning_transposition() {
+        assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_end_transposition() {
+        assert_eq!(1, damerau_levenshtein("specter", "spectre"));
+    }
+
+    #[test]
+    fn damerau_levenshtein_unrestricted_edit() {
+        assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
+    }
+
+    #[test]
+    fn normalized_damerau_levenshtein_diff_short() {
+        assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_damerau_levenshtein_for_empty_strings() {
+        assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_damerau_levenshtein_first_empty() {
+        assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_damerau_levenshtein_second_empty() {
+        assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
+    }
+
+    #[test]
+    fn normalized_damerau_levenshtein_identical_strings() {
+        assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
+    }
+
+    #[test]
+    fn sorensen_dice_all() {
+        // test cases taken from
+        // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/spec/index.spec.js#L11
+
+        assert_eq!(1.0, sorensen_dice("a", "a"));
+        assert_eq!(0.0, sorensen_dice("a", "b"));
+        assert_eq!(1.0, sorensen_dice("", ""));
+        assert_eq!(0.0, sorensen_dice("a", ""));
+        assert_eq!(0.0, sorensen_dice("", "a"));
+        assert_eq!(1.0, sorensen_dice("apple event", "apple    event"));
+        assert_eq!(0.9090909090909091, sorensen_dice("iphone", "iphone x"));
+        assert_eq!(0.0, sorensen_dice("french", "quebec"));
+        assert_eq!(1.0, sorensen_dice("france", "france"));
+        assert_eq!(0.2, sorensen_dice("fRaNce", "france"));
+        assert_eq!(0.8, sorensen_dice("healed", "sealed"));
+        assert_eq!(
+            0.7878787878787878,
+            sorensen_dice("web applications", "applications of the web")
+        );
+        assert_eq!(
+            0.92,
+            sorensen_dice(
+                "this will have a typo somewhere",
+                "this will huve a typo somewhere"
+            )
+        );
+        assert_eq!(
+            0.6060606060606061,
+            sorensen_dice(
+                "Olive-green table for sale, in extremely good condition.",
+                "For sale: table in very good  condition, olive green in colour."
+            )
+        );
+        assert_eq!(
+            0.2558139534883721,
+            sorensen_dice(
+                "Olive-green table for sale, in extremely good condition.",
+                "For sale: green Subaru Impreza, 210,000 miles"
+            )
+        );
+        assert_eq!(
+            0.1411764705882353,
+            sorensen_dice(
+                "Olive-green table for sale, in extremely good condition.",
+                "Wanted: mountain bike with at least 21 gears."
+            )
+        );
+        assert_eq!(
+            0.7741935483870968,
+            sorensen_dice("this has one extra word", "this has one word")
+        );
+    }
+}
diff --git a/tests/lib.rs b/tests/lib.rs
new file mode 100644 (file)
index 0000000..ad7af51
--- /dev/null
@@ -0,0 +1,49 @@
+extern crate strsim;
+
+use strsim::{hamming, levenshtein, normalized_levenshtein, osa_distance,damerau_levenshtein,
+             normalized_damerau_levenshtein, jaro, jaro_winkler};
+
+#[test]
+fn hamming_works() {
+    match hamming("hamming", "hammers") {
+        Ok(distance) => assert_eq!(3, distance),
+        Err(why) => panic!("{:?}", why)
+    }
+}
+
+#[test]
+fn levenshtein_works() {
+    assert_eq!(3, levenshtein("kitten", "sitting"));
+}
+
+#[test]
+fn normalized_levenshtein_works() {
+    assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
+}
+
+#[test]
+fn osa_distance_works() {
+    assert_eq!(3, osa_distance("ac", "cba"));
+}
+
+#[test]
+fn damerau_levenshtein_works() {
+    assert_eq!(2, damerau_levenshtein("ac", "cba"));
+}
+
+#[test]
+fn normalized_damerau_levenshtein_works() {
+    assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
+}
+
+#[test]
+fn jaro_works() {
+    assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
+            0.001);
+}
+
+#[test]
+fn jaro_winkler_works() {
+    assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+            0.001);
+}