--- /dev/null
+{
+ "git": {
+ "sha1": "94b632aeac099031c373599313d5b5f0acbbaec0"
+ }
+}
--- /dev/null
+/target/
+Cargo.lock
--- /dev/null
+# This file is a template, and might need editing before it works on your project.
+# Official language image. Look for the different tagged releases at:
+# https://hub.docker.com/r/library/rust/tags/
+image: "rust:latest"
+
+# Optional: Pick zero or more services to be used on all builds.
+# Only needed when using a docker container to run your tests in.
+# Check out: http://docs.gitlab.com/ce/ci/docker/using_docker_images.html#what-is-a-service
+# services:
+# - mysql:latest
+# - redis:latest
+# - postgres:latest
+
+# Optional: Install a C compiler, cmake and git into the container.
+# You will often need this when you (or any of your dependencies) depends on C code.
+# before_script:
+# - apt-get update -yqq
+# - apt-get install -yqq --no-install-recommends build-essential
+
+# Use cargo to test the project
+test:cargo:
+ script:
+ - rustc --version && cargo --version # Print version info for debugging
+ - cargo test --all --verbose --all-features
--- /dev/null
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "seahash"
+version = "4.1.0"
+authors = ["ticki <ticki@users.noreply.github.com>", "Tom Almeida <tom@tommoa.me>"]
+exclude = ["target", "Cargo.lock"]
+description = "A blazingly fast, portable hash function with proven statistical guarantees."
+documentation = "https://docs.rs/seahash"
+keywords = ["hash", "hashing", "checksum", "checksumming", "portable"]
+license = "MIT"
+repository = "https://gitlab.redox-os.org/redox-os/seahash"
+
+[[bench]]
+name = "bench"
+harness = false
+[dev-dependencies.criterion]
+version = "0.3"
+
+[dev-dependencies.quickcheck]
+version = "0.9.2"
+
+[features]
+default = []
+use_std = []
--- /dev/null
+[package]
+name = "seahash"
+version = "4.1.0"
+authors = ["ticki <ticki@users.noreply.github.com>", "Tom Almeida <tom@tommoa.me>"]
+description = "A blazingly fast, portable hash function with proven statistical guarantees."
+repository = "https://gitlab.redox-os.org/redox-os/seahash"
+documentation = "https://docs.rs/seahash"
+license = "MIT"
+keywords = ["hash", "hashing", "checksum", "checksumming", "portable"]
+exclude = ["target", "Cargo.lock"]
+
+[dev-dependencies]
+quickcheck = "0.9.2"
+criterion = "0.3"
+
+[features]
+default = []
+use_std = []
+
+[[bench]]
+name = "bench"
+harness = false
--- /dev/null
+<div align="center"><img alt="Logo" width="200" src="./logo.png" /></div>
+===================
+
+SeaHash: A bizarrely fast hash function.
+
+SeaHash is a hash function with performance better than (around 3-20% improvement) xxHash and
+MetroHash. Furthermore, SeaHash has mathematically provable statistical guarantees.
+
+In action:
+
+[![The hash function in action.](http://ticki.github.io/img/seahash_construction_diagram.svg)](http://ticki.github.io/img/seahash_construction_diagram.svg)
--- /dev/null
+extern crate core;
+extern crate criterion;
+extern crate seahash;
+
+use core::hash::Hasher;
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+fn describe_benches(c: &mut Criterion) {
+ // shared buffers for all tests
+ let buf = vec![15; 16 * 1024];
+
+ // shared/n and buffer/n are executed for these sizes
+ let sizes = [64, 1024, 4096, 16 * 1024];
+
+ let mut group = c.benchmark_group("buffer");
+
+ for size in &sizes {
+ group.throughput(Throughput::Bytes(*size as u64));
+
+ group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
+ b.iter(|| {
+ black_box(seahash::hash(&buf[..size]));
+ })
+ });
+ }
+
+ group.finish();
+
+ let mut group = c.benchmark_group("stream");
+
+ for size in &sizes {
+ group.throughput(Throughput::Bytes(*size as u64));
+
+ group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
+ b.iter_with_setup(
+ || seahash::SeaHasher::default(),
+ |mut h: seahash::SeaHasher| {
+ // use chunks of 32 bytes to simulate some looping on a single hasher value
+ for _ in 0..size / 32 {
+ h.write(&buf[..32]);
+ }
+ // this will mostly be an empty slice, but that is a possible Hasher api usage
+ h.write(&buf[..(size % 32)]);
+ black_box(h.finish())
+ },
+ )
+ });
+ }
+
+ group.finish();
+
+ // gigabyte group times are comparable with earlier benchmark values based on
+ // d52d115a223a0e81d1600bd8a5e73cb4b24a38c0
+ let mut group = c.benchmark_group("gigabyte");
+ group.throughput(Throughput::Bytes((1024 * 1024 * 1024) as u64));
+
+ group.bench_function(BenchmarkId::from_parameter("buffer"), |b| {
+ b.iter(|| {
+ let mut buf = [15; 4096];
+ let mut total = 0;
+ for _ in 0..250_000 {
+ total ^= seahash::hash(&buf);
+ buf[0] = buf[0].wrapping_add(1);
+ }
+ black_box(total)
+ })
+ });
+
+ group.bench_function(BenchmarkId::from_parameter("stream"), |b| {
+ b.iter(|| {
+ let mut buf = [15; 4096];
+ let mut h = seahash::SeaHasher::default();
+ for _ in 0..250_000 {
+ h.write(&buf);
+ buf[0] = buf[0].wrapping_add(1);
+ }
+ black_box(h.finish())
+ })
+ });
+
+ group.finish();
+}
+
+criterion_group!(benches, describe_benches);
+criterion_main!(benches);
--- /dev/null
+//! A highly optimized version of SeaHash.
+
+use std::slice;
+
+use helper;
+
+/// A SeaHash state.
+#[derive(Clone)]
+pub struct State {
+ /// `a`
+ a: u64,
+ /// `b`
+ b: u64,
+ /// `c`
+ c: u64,
+ /// `d`
+ d: u64,
+ /// The number of written bytes.
+ written: u64,
+}
+
+impl State {
+ /// Create a new state vector with some initial values.
+ pub fn new(a: u64, b: u64, c: u64, d: u64) -> State {
+ State {
+ a: a,
+ b: b,
+ c: c,
+ d: d,
+ written: 0,
+ }
+ }
+
+ /// Hash a buffer with some seed.
+ pub fn hash(buf: &[u8], (mut a, mut b, mut c, mut d): (u64, u64, u64, u64)) -> State {
+ unsafe {
+ // We use 4 different registers to store seperate hash states, because this allows us
+ // to update them seperately, and consequently exploiting ILP to update the states in
+ // parallel.
+
+ // The pointer to the current bytes.
+ let mut ptr = buf.as_ptr();
+ // The end of the "main segment", i.e. the biggest buffer s.t. the length is divisible
+ // by 32.
+ let end_ptr = buf.as_ptr().offset(buf.len() as isize & !0x1F);
+
+ while end_ptr > ptr {
+ // Modern CPUs allow the pointer arithmetic to be done in place, hence not
+ // introducing tmpvars.
+ a ^= helper::read_u64(ptr);
+ b ^= helper::read_u64(ptr.offset(8));
+ c ^= helper::read_u64(ptr.offset(16));
+ d ^= helper::read_u64(ptr.offset(24));
+
+ // Increment the pointer.
+ ptr = ptr.offset(32);
+
+ // Diffuse the updated registers. We hope that each of these are executed in
+ // parallel.
+ a = helper::diffuse(a);
+ b = helper::diffuse(b);
+ c = helper::diffuse(c);
+ d = helper::diffuse(d);
+ }
+
+ // Calculate the number of excessive bytes. These are bytes that could not be handled
+ // in the loop above.
+ let mut excessive = buf.len() as usize + buf.as_ptr() as usize - end_ptr as usize;
+ // Handle the excessive bytes.
+ match excessive {
+ 0 => {}
+ 1..=7 => {
+ // 1 or more excessive.
+
+ // Write the last excessive bytes (<8 bytes).
+ a ^= helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive));
+
+ // Diffuse.
+ a = helper::diffuse(a);
+ }
+ 8 => {
+ // 8 bytes excessive.
+
+ // Mix in the partial block.
+ a ^= helper::read_u64(ptr);
+
+ // Diffuse.
+ a = helper::diffuse(a);
+ }
+ 9..=15 => {
+ // More than 8 bytes excessive.
+
+ // Mix in the partial block.
+ a ^= helper::read_u64(ptr);
+
+ // Write the last excessive bytes (<8 bytes).
+ excessive = excessive - 8;
+ b ^= helper::read_int(slice::from_raw_parts(ptr.offset(8), excessive));
+
+ // Diffuse.
+ a = helper::diffuse(a);
+ b = helper::diffuse(b);
+ }
+ 16 => {
+ // 16 bytes excessive.
+
+ // Mix in the partial block.
+ a = helper::diffuse(a ^ helper::read_u64(ptr));
+ b = helper::diffuse(b ^ helper::read_u64(ptr.offset(8)));
+ }
+ 17..=23 => {
+ // 16 bytes or more excessive.
+
+ // Mix in the partial block.
+ a ^= helper::read_u64(ptr);
+ b ^= helper::read_u64(ptr.offset(8));
+
+ // Write the last excessive bytes (<8 bytes).
+ excessive = excessive - 16;
+ c ^= helper::read_int(slice::from_raw_parts(ptr.offset(16), excessive));
+
+ // Diffuse.
+ a = helper::diffuse(a);
+ b = helper::diffuse(b);
+ c = helper::diffuse(c);
+ }
+ 24 => {
+ // 24 bytes excessive.
+
+ // Mix in the partial block.
+ a ^= helper::read_u64(ptr);
+ b ^= helper::read_u64(ptr.offset(8));
+ c ^= helper::read_u64(ptr.offset(16));
+
+ // Diffuse.
+ a = helper::diffuse(a);
+ b = helper::diffuse(b);
+ c = helper::diffuse(c);
+ }
+ _ => {
+ // More than 24 bytes excessive.
+
+ // Mix in the partial block.
+ a ^= helper::read_u64(ptr);
+ b ^= helper::read_u64(ptr.offset(8));
+ c ^= helper::read_u64(ptr.offset(16));
+
+ // Write the last excessive bytes (<8 bytes).
+ excessive = excessive - 24;
+ d ^= helper::read_int(slice::from_raw_parts(ptr.offset(24), excessive));
+
+ // Diffuse.
+ a = helper::diffuse(a);
+ b = helper::diffuse(b);
+ c = helper::diffuse(c);
+ d = helper::diffuse(d);
+ }
+ }
+ }
+
+ State {
+ a: a,
+ b: b,
+ c: c,
+ d: d,
+ written: buf.len() as u64,
+ }
+ }
+
+ /// Write another 64-bit integer into the state.
+ pub fn push(&mut self, x: u64) {
+ // Mix `x` into `a`.
+ let a = helper::diffuse(self.a ^ x);
+
+ // Rotate around.
+ // _______________________
+ // | v
+ // a <---- b <---- c <---- d
+ self.a = self.b;
+ self.b = self.c;
+ self.c = self.d;
+ self.d = a;
+
+ // Increase the written bytes counter.
+ self.written += 8;
+ }
+
+ /// Remove the most recently written 64-bit integer from the state.
+ ///
+ /// Given the value of the most recently written u64 `last`, remove it from the state.
+ pub fn pop(&mut self, last: u64) {
+ // Un-mix `last` from `d`. Removes the recently written data.
+ let d = helper::undiffuse(self.d) ^ last;
+
+ // Rotate back.
+ // _______________________
+ // v |
+ // a ----> b ----> c ----> d
+ self.d = self.c;
+ self.c = self.b;
+ self.b = self.a;
+ self.a = d;
+
+ // Decrese the written bytes counter.
+ self.written -= 8;
+ }
+
+ /// Finalize the state.
+ #[inline]
+ pub fn finalize(self) -> u64 {
+ let State {
+ written,
+ mut a,
+ b,
+ mut c,
+ d,
+ } = self;
+
+ // XOR the states together. Even though XOR is commutative, it doesn't matter, because the
+ // state vector's initial components are mutually distinct, and thus swapping even and odd
+ // chunks will affect the result, because it is sensitive to the initial condition.
+ a ^= b;
+ c ^= d;
+ a ^= c;
+ // XOR the number of written bytes in order to make the excessive bytes zero-sensitive
+ // (without this, two excessive zeros would be equivalent to three excessive zeros). This
+ // is know as length padding.
+ a ^= written;
+
+ // We diffuse to make the excessive bytes discrete (i.e. small changes shouldn't give small
+ // changes in the output).
+ helper::diffuse(a)
+ }
+}
+
+/// Hash some buffer.
+///
+/// This is a highly optimized implementation of SeaHash. It implements numerous techniques to
+/// improve performance:
+///
+/// - Register allocation: This makes a great deal out of making sure everything fits into
+/// registers such that minimal memory accesses are needed. This works quite successfully on most
+/// CPUs, and the only time it reads from memory is when it fetches the data of the buffer.
+/// - Bulk reads: Like most other good hash functions, we read 8 bytes a time. This obviously
+/// improves performance a lot
+/// - Independent updates: We make sure very few statements next to each other depends on the
+/// other. This means that almost always the CPU will be able to run the instructions in parallel.
+/// - Loop unrolling: The hot loop is unrolled such that very little branches (one every 32 bytes)
+/// are needed.
+///
+/// and more.
+///
+/// The seed of this hash function is prechosen.
+pub fn hash(buf: &[u8]) -> u64 {
+ hash_seeded(
+ buf,
+ 0x16f11fe89b0d677c,
+ 0xb480a793d8e6c86c,
+ 0x6fe2e5aaf078ebc9,
+ 0x14f994a4c5259381,
+ )
+}
+
+/// Hash some buffer according to a chosen seed.
+///
+/// The keys are expected to be chosen from a uniform distribution. The keys should be mutually
+/// distinct to avoid issues with collisions if the lanes are permuted.
+///
+/// This is not secure, as [the key can be extracted with a bit of computational
+/// work](https://github.com/ticki/tfs/issues/5), as such, it is recommended to have a fallback
+/// hash function (adaptive hashing) in the case of hash flooding. It can be considered unbroken if
+/// the output is not known (i.e. no malicious party has access to the raw values of the keys, only
+/// a permutation thereof).), however I absolutely do not recommend using it for this. If you want
+/// to be strict, this should only be used as a layer of obfuscation, such that the fallback (e.g.
+/// SipHash) is harder to trigger.
+///
+/// In the future, I might strengthen the security if possible while having backward compatibility
+/// with the default initialization vector.
+pub fn hash_seeded(buf: &[u8], a: u64, b: u64, c: u64, d: u64) -> u64 {
+ State::hash(buf, (a, b, c, d)).finalize()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use reference;
+
+ fn hash_match(a: &[u8]) {
+ assert_eq!(hash(a), reference::hash(a));
+ assert_eq!(
+ hash_seeded(a, 1, 1, 1, 1),
+ reference::hash_seeded(a, 1, 1, 1, 1)
+ );
+ assert_eq!(
+ hash_seeded(a, 500, 2873, 2389, 9283),
+ reference::hash_seeded(a, 500, 2873, 2389, 9283)
+ );
+ assert_eq!(
+ hash_seeded(a, 238945723984, 872894734, 239478243, 28937498234),
+ reference::hash_seeded(a, 238945723984, 872894734, 239478243, 28937498234)
+ );
+ assert_eq!(
+ hash_seeded(a, !0, !0, !0, !0),
+ reference::hash_seeded(a, !0, !0, !0, !0)
+ );
+ assert_eq!(
+ hash_seeded(a, 0, 0, 0, 0),
+ reference::hash_seeded(a, 0, 0, 0, 0)
+ );
+ }
+
+ #[test]
+ #[cfg_attr(miri, ignore)] // very slow to run on miri
+ fn zero() {
+ let arr = [0; 4096];
+ for n in 0..4096 {
+ hash_match(&arr[0..n]);
+ }
+ }
+
+ #[test]
+ fn seq() {
+ let mut buf = [0; 4096];
+ for i in 0..4096 {
+ buf[i] = i as u8;
+ }
+ hash_match(&buf);
+ }
+
+ #[test]
+ fn position_depedent() {
+ let mut buf1 = [0; 4098];
+ for i in 0..4098 {
+ buf1[i] = i as u8;
+ }
+ let mut buf2 = [0; 4098];
+ for i in 0..4098 {
+ buf2[i] = i as u8 ^ 1;
+ }
+
+ assert!(hash(&buf1) != hash(&buf2));
+ }
+
+ #[test]
+ fn shakespear() {
+ hash_match(b"to be or not to be");
+ hash_match(b"love is a wonderful terrible thing");
+ }
+
+ #[test]
+ fn zero_senitive() {
+ assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 0, 2, 3, 4]));
+ assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 0, 0, 2, 3, 4]));
+ assert_ne!(hash(&[1, 2, 3, 4]), hash(&[1, 2, 3, 4, 0]));
+ assert_ne!(hash(&[1, 2, 3, 4]), hash(&[0, 1, 2, 3, 4]));
+ assert_ne!(hash(&[0, 0, 0]), hash(&[0, 0, 0, 0, 0]));
+ }
+
+ #[test]
+ fn not_equal() {
+ assert_ne!(hash(b"to be or not to be "), hash(b"to be or not to be"));
+ assert_ne!(hash(b"jkjke"), hash(b"jkjk"));
+ assert_ne!(hash(b"ijkjke"), hash(b"ijkjk"));
+ assert_ne!(hash(b"iijkjke"), hash(b"iijkjk"));
+ assert_ne!(hash(b"iiijkjke"), hash(b"iiijkjk"));
+ assert_ne!(hash(b"iiiijkjke"), hash(b"iiiijkjk"));
+ assert_ne!(hash(b"iiiiijkjke"), hash(b"iiiiijkjk"));
+ assert_ne!(hash(b"iiiiiijkjke"), hash(b"iiiiiijkjk"));
+ assert_ne!(hash(b"iiiiiiijkjke"), hash(b"iiiiiiijkjk"));
+ assert_ne!(hash(b"iiiiiiiijkjke"), hash(b"iiiiiiiijkjk"));
+ assert_ne!(hash(b"ab"), hash(b"bb"));
+ }
+
+ #[test]
+ fn push() {
+ let mut state = State::new(1, 2, 3, 4);
+ state.push(!0);
+ state.push(0);
+
+ assert_eq!(
+ hash_seeded(
+ &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0],
+ 1,
+ 2,
+ 3,
+ 4
+ ),
+ state.finalize()
+ );
+ }
+
+ #[test]
+ fn pop() {
+ let mut state = State::new(1, 2, 3, 4);
+ state.push(!0);
+ state.push(0);
+ state.pop(0);
+
+ assert_eq!(
+ hash_seeded(
+ &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
+ 1,
+ 2,
+ 3,
+ 4
+ ),
+ state.finalize()
+ );
+ }
+}
--- /dev/null
+//! Helper functions.
+
+/// Read a buffer smaller than 8 bytes into an integer in little-endian.
+///
+/// This assumes that `buf.len() < 8`. If this is not satisfied, the behavior is unspecified.
+#[inline(always)]
+pub fn read_int(buf: &[u8]) -> u64 {
+ // Because we want to make sure that it is register allocated, we fetch this into a variable.
+ // It will likely make no difference anyway, though.
+ let ptr = buf.as_ptr();
+
+ unsafe {
+ // Break it down to reads of integers with widths in total spanning the buffer. This minimizes
+ // the number of reads
+ match buf.len() {
+ // u8.
+ 1 => *ptr as u64,
+ // u16.
+ 2 => (ptr as *const u16).read_unaligned().to_le() as u64,
+ // u16 + u8.
+ 3 => {
+ let a = (ptr as *const u16).read_unaligned().to_le() as u64;
+ let b = *ptr.offset(2) as u64;
+
+ a | (b << 16)
+ }
+ // u32.
+ 4 => (ptr as *const u32).read_unaligned().to_le() as u64,
+ // u32 + u8.
+ 5 => {
+ let a = (ptr as *const u32).read_unaligned().to_le() as u64;
+ let b = *ptr.offset(4) as u64;
+
+ a | (b << 32)
+ }
+ // u32 + u16.
+ 6 => {
+ let a = (ptr as *const u32).read_unaligned().to_le() as u64;
+ let b = (ptr.offset(4) as *const u16).read_unaligned().to_le() as u64;
+
+ a | (b << 32)
+ }
+ // u32 + u16 + u8.
+ 7 => {
+ let a = (ptr as *const u32).read_unaligned().to_le() as u64;
+ let b = (ptr.offset(4) as *const u16).read_unaligned().to_le() as u64;
+ let c = *ptr.offset(6) as u64;
+
+ a | (b << 32) | (c << 48)
+ }
+ _ => 0,
+ }
+ }
+}
+
+/// Read a little-endian 64-bit integer from some buffer.
+#[inline(always)]
+pub unsafe fn read_u64(ptr: *const u8) -> u64 {
+ #[cfg(target_pointer_width = "32")]
+ {
+ // We cannot be sure about the memory layout of a potentially emulated 64-bit integer, so
+ // we read it manually. If possible, the compiler should emit proper instructions.
+ let a = (ptr as *const u32).read_unaligned().to_le();
+ let b = (ptr.offset(4) as *const u32).read_unaligned().to_le();
+
+ a as u64 | ((b as u64) << 32)
+ }
+
+ #[cfg(target_pointer_width = "64")]
+ {
+ (ptr as *const u64).read_unaligned().to_le()
+ }
+}
+
+/// The diffusion function.
+///
+/// This is a bijective function emitting chaotic behavior. Such functions are used as building
+/// blocks for hash functions.
+pub const fn diffuse(mut x: u64) -> u64 {
+ // These are derived from the PCG RNG's round. Thanks to @Veedrac for proposing this. The basic
+ // idea is that we use dynamic shifts, which are determined by the input itself. The shift is
+ // chosen by the higher bits, which means that changing those flips the lower bits, which
+ // scatters upwards because of the multiplication.
+
+ x = x.wrapping_mul(0x6eed0e9da4d94a4f);
+ let a = x >> 32;
+ let b = x >> 60;
+ x ^= a >> b;
+ x = x.wrapping_mul(0x6eed0e9da4d94a4f);
+
+ x
+}
+
+/// Reverse the `diffuse` function.
+pub const fn undiffuse(mut x: u64) -> u64 {
+ // 0x2f72b4215a3d8caf is the modular multiplicative inverse of the constant used in `diffuse`.
+
+ x = x.wrapping_mul(0x2f72b4215a3d8caf);
+ let a = x >> 32;
+ let b = x >> 60;
+ x ^= a >> b;
+ x = x.wrapping_mul(0x2f72b4215a3d8caf);
+
+ x
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn diffuse_test(x: u64, y: u64) {
+ assert_eq!(diffuse(x), y);
+ assert_eq!(x, undiffuse(y));
+ assert_eq!(undiffuse(diffuse(x)), x);
+ }
+
+ #[test]
+ fn read_int_() {
+ assert_eq!(read_int(&[2, 3]), 770);
+ assert_eq!(read_int(&[3, 2]), 515);
+ assert_eq!(read_int(&[3, 2, 5]), 328195);
+ }
+
+ #[test]
+ fn read_u64_() {
+ unsafe {
+ assert_eq!(read_u64([1, 0, 0, 0, 0, 0, 0, 0].as_ptr()), 1);
+ assert_eq!(read_u64([2, 1, 0, 0, 0, 0, 0, 0].as_ptr()), 258);
+ }
+ }
+
+ #[test]
+ fn diffuse_test_vectors() {
+ diffuse_test(94203824938, 17289265692384716055);
+ diffuse_test(0xDEADBEEF, 12110756357096144265);
+ diffuse_test(0, 0);
+ diffuse_test(1, 15197155197312260123);
+ diffuse_test(2, 1571904453004118546);
+ diffuse_test(3, 16467633989910088880);
+ }
+}
--- /dev/null
+use crate::SeaHasher;
+use std::hash::Hasher;
+use std::io;
+
+impl io::Write for SeaHasher {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ Hasher::write(self, buf);
+ Ok(buf.len())
+ }
+ fn flush(&mut self) -> io::Result<()> {
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn hash_write_trait() {
+ let reader: &[u8] = &[
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+ ];
+ let mut hasher = SeaHasher::new();
+ // io::copy consumes the mutable reader -> cloning the buffer
+ let _ = io::copy(&mut reader.clone(), &mut hasher).unwrap();
+ let hash = hasher.finish();
+ let control = crate::hash(&reader);
+ assert_eq!(control, hash);
+ }
+}
--- /dev/null
+//! SeaHash: A blazingly fast, portable hash function with proven statistical guarantees.
+//!
+//! SeaHash is a hash function with performance better than (around 3-20% improvement) xxHash and
+//! MetroHash. Furthermore, SeaHash has mathematically provable statistical guarantees.
+//!
+//! SeaHash is a portable hash function, meaning that the output is not dependent on the hosting
+//! architecture, and makes no assumptions on endianness or the alike. This stable layout allows it
+//! to be used for on-disk/permanent storage (e.g. checksums).
+//!
+//! # Design, advantages, and features
+//!
+//! - **High quality**: It beats most other general purpose hash functions because it provides full
+//! avalanche inbetween state updates.
+//! - **Performance**: SeaHash beats every high-quality (grading 10/10 in smhasher) hash function
+//! that I know of.
+//! - **Provable quality guarantees**: Contrary to most other non-cryptographic hash function,
+//! SeaHash can be proved to satisfy the avalanche criterion as well as BIC.
+//! - **Parallelizable**: Consists of multiple, independent states to take advantage of ILP and/or
+//! software threads.
+//! - **Bulk reads**: Reads 8 or 4 bytes a time.
+//! - **Stable and portable**: Does not depend on the target architecture, and produces a stable
+//! value, which is only changed in major version bumps.
+//! - **Keyed**: Designed to not leak the seed/key. Note that it has not gone through
+//! cryptoanalysis yet, so the keyed version shouldn't be relied on when security is needed.
+//! - **Hardware accelerateable**: SeaHash is designed such that ASICs can implement it with really
+//! high performance.
+//!
+//! # A word of warning!
+//!
+//! This is **not** a cryptographic function, and it certainly should not be used as one. If you
+//! want a good cryptographic hash function, you should use SHA-3 (Keccak) or BLAKE2.
+//!
+//! It is not secure, nor does it aim to be. It aims to have high quality pseudorandom output and
+//! few collisions, as well as being fast.
+//!
+//! # Benchmark
+//!
+//! On normal hardware, it is expected to run with a rate around 5.9-6.7 GB/S on a 2.5 GHz CPU.
+//! Further improvement can be seen when hashing very big buffers in parallel.
+//!
+//! | Function | Quality | Cycles per byte (lower is better) | Author
+//! |-------------|---------------|-----------------------------------|-------------------
+//! | **SeaHash** | **Excellent** | **0.24** | **Ticki**
+//! | xxHash | Excellent | 0.31 | Collet
+//! | MetroHash | Excellent | 0.35 | Rogers
+//! | Murmur | Excellent | 0.64 | Appleby
+//! | Rabin | Medium | 1.51 | Rabin
+//! | CityHash | Excellent | 1.62 | Pike, Alakuijala
+//! | LoseLose | Terrible | 2.01 | Kernighan, Ritchie
+//! | FNV | Poor | 3.12 | Fowler, Noll, Vo
+//! | SipHash | Pseudorandom | 3.21 | Aumasson, Bernstein
+//! | CRC | Good | 3.91 | Peterson
+//! | DJB2 | Poor | 4.13 | Bernstein
+//!
+//! ## Ideal architecture
+//!
+//! SeaHash is designed and optimized for the most common architecture in use:
+//!
+//! - Little-endian
+//! - 64-bit
+//! - 64 or more bytes cache lines
+//! - 4 or more instruction pipelines
+//! - 4 or more 64-bit registers
+//!
+//! Anything that does not hold the above requirements will perform worse by up to 30-40%. Note that
+//! this means it is still faster than CityHash (~1 GB/S), MurMurHash (~2.6 GB/S), FNV (~0.5 GB/S),
+//! etc.
+//!
+//! # Achieving the performance
+//!
+//! Like any good general-purpose hash function, SeaHash reads 8 bytes at once effectively reducing
+//! the running time by an order of ~5.
+//!
+//! Secondly, SeaHash achieves the performance by heavily exploiting Instruction-Level Parallelism.
+//! In particular, it fetches 4 integers in every round and independently diffuses them. This
+//! yields four different states, which are finally combined.
+//!
+//! # Statistical guarantees
+//!
+//! SeaHash comes with certain proven guarantees about the statistical properties of the output:
+//!
+//! 1. Pick some _n_-byte sequence, _s_. The number of _n_-byte sequence colliding with _s_ is
+//! independent of the choice of _s_ (all equivalence class have equal size).
+//! 2. If you flip any bit in the input, the probability for any bit in the output to be flipped is
+//! 0.5.
+//! 3. The hash value of a sequence of uniformly distributed bytes is itself uniformly distributed.
+//!
+//! The first guarantee can be derived through deduction, by proving that the diffusion function is
+//! bijective (reverse the XORs and find the congruence inverses to the primes).
+//!
+//! The second guarantee requires more complex calculations: Construct a matrix of probabilities
+//! and set one to certain (1), then apply transformations through the respective operations. The
+//! proof is a bit long, but relatively simple.
+//!
+//! The third guarantee requires proving that the hash value is a tree, such that:
+//! - Leafs represents the input values.
+//! - Single-child nodes reduce to the diffusion of the child.
+//! - Multiple-child nodes reduce to the sum of the children.
+//!
+//! Then simply show that each of these reductions transform uniformly distributed variables to
+//! uniformly distributed variables.
+//!
+//! # Inner workings
+//!
+//! In technical terms, SeaHash follows a alternating 4-state length-padded Merkle–Damgård
+//! construction with an XOR-diffuse compression function (click to enlarge):
+//!
+//! [![A diagram.](http://ticki.github.io/img/seahash_construction_diagram.svg)]
+//! (http://ticki.github.io/img/seahash_construction_diagram.svg)
+//!
+//! It starts with 4 initial states, then it alternates between them (increment, wrap on 4) and
+//! does XOR with the respective block. When a state has been visited the diffusion function (f) is
+//! applied. The very last block is padded with zeros.
+//!
+//! After all the blocks have been gone over, all the states are XOR'd to the number of bytes
+//! written. The sum is then passed through the diffusion function, which produces the final hash
+//! value.
+//!
+//! The diffusion function is drawn below.
+//!
+//! ```notest
+//! x ← px
+//! x ← x ⊕ ((x ≫ 32) ≫ (x ≫ 60))
+//! x ← px
+//! ```
+//!
+//! The advantage of having four completely segregated (note that there is no mix round, so they're
+//! entirely independent) states is that fast parallelism is possible. For example, if I were to
+//! hash 1 TB, I can spawn up four threads which can run independently without _any_
+//! intercommunication or synchronization before the last round.
+//!
+//! If the diffusion function (f) was cryptographically secure, it would pass cryptoanalysis
+//! trivially. This might seem irrelevant, as it clearly isn't cryptographically secure, but it
+//! tells us something about the inner semantics. In particular, any diffusion function with
+//! sufficient statistical quality will make up a good hash function in this construction.
+//!
+//! Read [the blog post](http://ticki.github.io/blog/seahash-explained/) for more details.
+//!
+//! # ASIC version
+//!
+//! SeaHash is specifically designed such that it can be efficiently implemented in the form of
+//! ASIC while only using very few transistors.
+//!
+//! # Specification
+//!
+//! See the [`reference`](./reference) module.
+//!
+//! # Credits
+//!
+//! Aside for myself (@ticki), there are couple of other people who have helped creating this.
+//! Joshua Landau suggested using the [PCG family of diffusions](http://www.pcg-random.org/),
+//! created by Melissa E. O'Neill. Sokolov Yura spotted multiple bugs in SeaHash.
+
+#![warn(missing_docs)]
+#![cfg_attr(all(not(test), not(feature = "use_std")), no_std)]
+#[cfg(all(not(test), not(feature = "use_std")))]
+extern crate core as std;
+
+pub use buffer::{hash, hash_seeded, State};
+pub use stream::SeaHasher;
+
+mod buffer;
+mod helper;
+pub mod reference;
+mod stream;
+
+#[cfg(feature = "use_std")]
+mod impl_std;
--- /dev/null
+//! A slow, but clear reference implementation of SeaHash.
+//!
+//! # Specification
+//!
+//! The input buffer is padded with null bytes until the length is divisible by 8.
+//!
+//! We start out with state
+//!
+//! ```notest
+//! a = 0x16f11fe89b0d677c
+//! b = 0xb480a793d8e6c86c
+//! c = 0x6fe2e5aaf078ebc9
+//! d = 0x14f994a4c5259381
+//! ```
+//!
+//! If a seed is given, each of the initial state component are modularly multiplied by the seed.
+//!
+//! From the stream, we read one 64-bit block (in little-endian) at a time. This number, `n`,
+//! determines the new state by:
+//!
+//! ```notest
+//! a' = b
+//! b' = c
+//! c' = d
+//! d' = g(a ⊕ n)
+//! ```
+//!
+//! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod
+//! 2^64)` with `p = 0x7ed0e9fa0d94a33`.
+//!
+//! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕
+//! l)` where `l` is the number of bytes in the original buffer.
+
+use helper;
+
+/// Read an integer in little-endian.
+fn read_int(int: &[u8]) -> u64 {
+ debug_assert!(
+ int.len() <= 8,
+ "The buffer length of the integer must be less than or equal to \
+ the one of an u64."
+ );
+
+ // Start at 0.
+ let mut x = 0;
+ for &i in int.iter().rev() {
+ // Shift up a byte.
+ x <<= 8;
+ // Set the lower byte.
+ x |= i as u64;
+ }
+
+ x
+}
+
+/// A hash state.
+struct State {
+ /// The `a` substate.
+ a: u64,
+ /// The `b` substate.
+ b: u64,
+ /// The `c` substate.
+ c: u64,
+ /// The `d` substate.
+ d: u64,
+}
+
+impl State {
+ /// Write a 64-bit integer to the state.
+ fn write_u64(&mut self, x: u64) {
+ let mut a = self.a;
+
+ // Mix `x` into `a`.
+ a = helper::diffuse(a ^ x);
+
+ // Rotate around.
+ // _______________________
+ // | v
+ // a <---- b <---- c <---- d
+ self.a = self.b;
+ self.b = self.c;
+ self.c = self.d;
+ self.d = a;
+ }
+
+ /// Calculate the final hash.
+ fn finish(self, total: usize) -> u64 {
+ // Even though XORing is commutative, it doesn't matter, because the state vector's initial
+ // components are mutually distinct, and thus swapping even and odd chunks will affect the
+ // result, because it is sensitive to the initial condition. To add discreteness, we
+ // diffuse.
+ helper::diffuse(
+ self.a ^ self.b ^ self.c ^ self.d
+ // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes
+ // are written (0u32.0u8 ≠ 0u16.0u8).
+ ^ total as u64,
+ )
+ }
+
+ /// Create a new state with some initial values (seed).
+ fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State {
+ State {
+ // These values are randomly generated.
+ a: k1,
+ b: k2,
+ c: k3,
+ d: k4,
+ }
+ }
+}
+
+/// A reference implementation of SeaHash.
+///
+/// This is bloody slow when compared to the optimized version. This is because SeaHash was
+/// specifically designed to take all sorts of hardware and software hacks into account to achieve
+/// maximal performance, but this makes code significantly less readable. As such, this version has
+/// only one goal: to make the algorithm readable and understandable.
+pub fn hash(buf: &[u8]) -> u64 {
+ hash_seeded(
+ buf,
+ 0x16f11fe89b0d677c,
+ 0xb480a793d8e6c86c,
+ 0x6fe2e5aaf078ebc9,
+ 0x14f994a4c5259381,
+ )
+}
+
+/// The seeded version of the reference implementation.
+pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 {
+ // Initialize the state.
+ let mut state = State::with_seeds(k1, k2, k3, k4);
+
+ // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last
+ // block might not be 8 bytes long.
+ for int in buf.chunks(8) {
+ // Read the chunk into an integer and write into the state.
+ state.write_u64(read_int(int));
+ }
+
+ // Finish the hash state and return the final value.
+ state.finish(buf.len())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn shakespear() {
+ assert_eq!(hash(b"to be or not to be"), 1988685042348123509);
+ }
+}
--- /dev/null
+use std::hash::Hasher;
+use std::slice;
+
+use helper;
+
+/// The streaming version of the algorithm.
+#[derive(Clone, Copy)]
+pub struct SeaHasher {
+ /// The state of the hasher.
+ state: (u64, u64, u64, u64),
+ /// The number of bytes we have written in total
+ written: u64,
+ /// Our tail
+ tail: u64,
+ /// The number of bytes in the tail
+ ntail: usize,
+}
+
+impl Default for SeaHasher {
+ fn default() -> SeaHasher {
+ SeaHasher::with_seeds(
+ 0x16f11fe89b0d677c,
+ 0xb480a793d8e6c86c,
+ 0x6fe2e5aaf078ebc9,
+ 0x14f994a4c5259381,
+ )
+ }
+}
+
+impl SeaHasher {
+ /// Create a new `SeaHasher` with default state.
+ pub fn new() -> SeaHasher {
+ SeaHasher::default()
+ }
+
+ /// Construct a new `SeaHasher` given some seed.
+ ///
+ /// For maximum quality, these seeds should be chosen at random.
+ pub fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> SeaHasher {
+ SeaHasher {
+ state: (k1, k2, k3, k4),
+ written: 0,
+ tail: 0,
+ ntail: 0,
+ }
+ }
+
+ #[inline(always)]
+ fn push(&mut self, x: u64) {
+ let a = helper::diffuse(self.state.0 ^ x);
+ self.state.0 = self.state.1;
+ self.state.1 = self.state.2;
+ self.state.2 = self.state.3;
+ self.state.3 = a;
+ self.written += 8;
+ }
+
+ #[inline(always)]
+ fn push_bytes(&mut self, bytes: &[u8]) {
+ // The start of the bytes that aren't in the tail
+ let copied = core::cmp::min(8 - self.ntail, bytes.len());
+ unsafe {
+ let mut this = self.tail.to_le_bytes();
+ let mut ptr = bytes.as_ptr();
+ ptr.copy_to_nonoverlapping(this.as_mut_ptr().add(self.ntail), copied);
+ // It will be at most 8
+ if copied + self.ntail != 8 {
+ self.ntail += copied;
+ self.tail = u64::from_le_bytes(this);
+ } else {
+ self.push(u64::from_le_bytes(this));
+ self.ntail = 0;
+ self.tail = 0;
+
+ // We've done the existing tail, now just do the rest in chunks of 4 x u64.
+ ptr = ptr.offset(copied as isize);
+ let end_ptr = ptr.offset((bytes.len() - copied) as isize & !0x1F);
+ while end_ptr > ptr {
+ self.state.0 = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
+ self.state.1 = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
+ self.state.2 = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16)));
+ self.state.3 = helper::diffuse(self.state.3 ^ helper::read_u64(ptr.offset(24)));
+
+ ptr = ptr.offset(32);
+ self.written += 32;
+ }
+ let mut excessive = bytes.len() + bytes.as_ptr() as usize - ptr as usize;
+ match excessive {
+ 0 => {
+ // input was a multiple of 4 x u64 bytes long; no new tail bytes.
+ }
+ 1..=7 => {
+ self.tail =
+ helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive));
+ self.ntail = excessive;
+ // self.written does not need to be updated as we only gathered self.tail
+ // bytes after larger chunks.
+ }
+ 8 => {
+ self.push(helper::read_u64(ptr));
+ // self.written is updated by self.push
+ }
+ 9..=15 => {
+ self.push(helper::read_u64(ptr));
+ excessive -= 8;
+ self.tail =
+ helper::read_int(slice::from_raw_parts(ptr.offset(8), excessive));
+ self.ntail = excessive;
+ // self.written is updated by self.push
+ }
+ 16 => {
+ let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
+ let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
+ // rotate
+ self.state.0 = self.state.2;
+ self.state.1 = self.state.3;
+ self.state.2 = a;
+ self.state.3 = b;
+ self.written += 16;
+ }
+ 17..=23 => {
+ let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
+ let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
+ // rotate
+ self.state.0 = self.state.2;
+ self.state.1 = self.state.3;
+ self.state.2 = a;
+ self.state.3 = b;
+ excessive -= 16;
+ self.tail =
+ helper::read_int(slice::from_raw_parts(ptr.offset(16), excessive));
+ self.ntail = excessive;
+ self.written += 16;
+ }
+ 24 => {
+ let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
+ let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
+ let c = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16)));
+ self.state.0 = self.state.3;
+ self.state.1 = a;
+ self.state.2 = b;
+ self.state.3 = c;
+ self.written += 24;
+ }
+ _ => {
+ let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
+ let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
+ let c = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16)));
+ self.state.0 = self.state.3;
+ self.state.1 = a;
+ self.state.2 = b;
+ self.state.3 = c;
+ excessive -= 24;
+ self.tail =
+ helper::read_int(slice::from_raw_parts(ptr.offset(24), excessive));
+ self.ntail = excessive;
+ self.written += 24;
+ }
+ }
+ }
+ }
+ }
+}
+
+impl Hasher for SeaHasher {
+ fn finish(&self) -> u64 {
+ let a = if self.ntail > 0 {
+ let tail = helper::read_int(&self.tail.to_le_bytes()[..self.ntail]);
+ helper::diffuse(self.state.0 ^ tail)
+ } else {
+ self.state.0
+ };
+ helper::diffuse(
+ a ^ self.state.1 ^ self.state.2 ^ self.state.3 ^ self.written + self.ntail as u64,
+ )
+ }
+
+ fn write(&mut self, bytes: &[u8]) {
+ self.push_bytes(bytes)
+ }
+
+ fn write_u64(&mut self, n: u64) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_u8(&mut self, n: u8) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_u16(&mut self, n: u16) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_u32(&mut self, n: u32) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_usize(&mut self, n: usize) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_i64(&mut self, n: i64) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_i8(&mut self, n: i8) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_i16(&mut self, n: i16) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_i32(&mut self, n: i32) {
+ self.write(&n.to_le_bytes())
+ }
+
+ fn write_isize(&mut self, n: isize) {
+ self.write(&n.to_le_bytes())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::hash_seeded;
+ use std::hash::Hasher;
+
+ #[test]
+ fn chunked_equiv() {
+ let test_buf: &[u8] = &[
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+ ];
+
+ let mut stream_hasher1 = SeaHasher::default();
+ Hasher::write(&mut stream_hasher1, test_buf);
+
+ let mut stream_hasher2 = SeaHasher::default();
+ Hasher::write(&mut stream_hasher2, &test_buf[..8]);
+ Hasher::write(&mut stream_hasher2, &test_buf[8..]);
+
+ let mut stream_hasher3 = SeaHasher::default();
+ Hasher::write(&mut stream_hasher3, &test_buf[..3]);
+ Hasher::write(&mut stream_hasher3, &test_buf[3..]);
+
+ let mut stream_hasher4 = SeaHasher::default();
+ Hasher::write_u16(&mut stream_hasher4, 0xffff);
+ Hasher::write_u16(&mut stream_hasher4, 0xffff);
+ Hasher::write_u32(&mut stream_hasher4, 0xffffffff);
+ Hasher::write_u64(&mut stream_hasher4, 0);
+
+ assert_eq!(stream_hasher1.finish(), stream_hasher2.finish());
+ assert_eq!(stream_hasher1.finish(), stream_hasher3.finish());
+ assert_eq!(stream_hasher1.finish(), stream_hasher4.finish());
+ }
+
+ #[test]
+ fn match_optimized() {
+ let test_buf: &[u8] = &[
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+ ];
+
+ let mut sea_hasher = SeaHasher::with_seeds(
+ 0xe7b0c93ca8525013,
+ 0x011d02b854ae8182,
+ 0x7bcc5cf9c39cec76,
+ 0xfa336285d102d083,
+ );
+ sea_hasher.write(test_buf);
+ let stream_hash = sea_hasher.finish();
+
+ let buffer_hash = hash_seeded(
+ test_buf,
+ 0xe7b0c93ca8525013,
+ 0x011d02b854ae8182,
+ 0x7bcc5cf9c39cec76,
+ 0xfa336285d102d083,
+ );
+
+ assert_eq!(buffer_hash, stream_hash)
+ }
+}
--- /dev/null
+extern crate seahash;
+use seahash::SeaHasher as H;
+
+use std::hash::Hasher;
+
+#[test]
+fn hash_chunking_vs_not() {
+ // originally from https://gitlab.redox-os.org/redox-os/seahash/issues/5
+ let c1: &[u8] = b"This hashing algorithm was extracted from the Rustc compiler.";
+ let c2: &[u8] =
+ b" This is the same hashing algoirthm used for some internal operations in FireFox.";
+ let c3: &[u8] = b" The strength of this algorithm is in hashing 8 bytes at a time on 64-bit platforms, where the FNV algorithm works on one byte at a time.";
+
+ let mut h1 = H::default();
+ h1.write(c1);
+ h1.write(c2);
+ h1.write(c3);
+ let hash1 = h1.finish();
+
+ let mut c4 = Vec::<u8>::new();
+ c4.extend_from_slice(c1);
+ c4.extend_from_slice(c2);
+ c4.extend_from_slice(c3);
+
+ let mut h2 = H::default();
+ h2.write(&c4);
+ let hash2 = h2.finish();
+
+ let reference = seahash::reference::hash(&c4);
+ let buffer = seahash::hash(&c4);
+
+ println!("hash1: {:016x}", hash1);
+ println!("hash2: {:016x}", hash2);
+ println!("ref : {:016x}", reference);
+ println!("buf : {:016x}", buffer);
+
+ assert_eq!(hash1, hash2);
+ assert_eq!(hash1, reference);
+ assert_eq!(hash1, buffer);
+ assert_eq!(hash1, 0xa06e72e1b06144a0);
+}
+
+#[test]
+fn test_different_chunk_sizes() {
+ let v = {
+ let c1: &[u8] = b"This hashing algorithm was extracted from the Rustc compiler.";
+ let c2: &[u8] =
+ b" This is the same hashing algoirthm used for some internal operations in FireFox.";
+ let c3: &[u8] = b" The strength of this algorithm is in hashing 8 bytes at a time on 64-bit platforms, where the FNV algorithm works on one byte at a time.";
+
+ [c1, c2, c3].concat()
+ };
+
+ let mut h1 = H::default();
+ h1.write(&v);
+ let h1 = h1.finish();
+
+ for chunk_len in 1..v.len() {
+ let mut h2 = H::default();
+ for w in v.chunks(chunk_len) {
+ h2.write(w);
+ }
+ let h2 = h2.finish();
+
+ assert_eq!(h1, h2, "failed with chunk_len={}", chunk_len);
+ }
+}
--- /dev/null
+extern crate seahash;
+
+#[macro_use]
+extern crate quickcheck;
+use quickcheck::TestResult;
+
+use seahash::hash;
+use seahash::reference::hash as reference;
+use seahash::SeaHasher;
+use std::hash::Hasher;
+use std::num::{NonZeroU8, NonZeroUsize};
+
+quickcheck! {
+ #[cfg_attr(miri, ignore)] // very slow to run on miri
+ fn chunked_matches_buffered(xs: Vec<u8>, chunk_size: NonZeroUsize, times: NonZeroU8, additional: u8) -> TestResult {
+ let target_size = xs.len() * times.get() as usize + additional as usize;
+ if xs.is_empty() || target_size > 10_000_000 {
+ TestResult::discard()
+ } else {
+ let xs = xs.into_iter()
+ .cycle()
+ // the vecs produced by quickcheck are perhaps a bit small by default.
+ // additional should add some noise to avoid only getting nice even lengths.
+ .take(target_size)
+ .collect::<Vec<_>>();
+
+ // write all at once
+ let mut h0 = SeaHasher::default();
+ h0.write(&xs);
+ let h0 = h0.finish();
+
+ // write in chunks
+ let mut h1 = SeaHasher::default();
+ for chunk in xs.chunks(chunk_size.get()) {
+ h1.write(chunk);
+ }
+ let h1 = h1.finish();
+
+ // compare all, including to buffered and reference
+ let outcome = h0 == h1
+ && h0 == hash(&xs)
+ && h0 == reference(&xs);
+
+ TestResult::from_bool(outcome)
+ }
+ }
+}