From d20f2d335366bf272aa6c2414909e1dd07404361 Mon Sep 17 00:00:00 2001 From: Roy7Kim Date: Wed, 22 Mar 2023 15:38:40 +0900 Subject: [PATCH 1/1] Import simd-adler32 0.3.5 --- .cargo_vcs_info.json | 6 + .github/workflows/build.yaml | 173 ++++++++++++++++++ .github/workflows/fuzz.yaml | 59 +++++++ .github/workflows/publish.yaml | 35 ++++ .gitignore | 10 ++ .rustfmt.toml | 2 + CHANGELOG.md | 12 ++ Cargo.toml | 92 ++++++++++ Cargo.toml.orig | 64 +++++++ LICENSE.md | 21 +++ README.md | 131 ++++++++++++++ src/hash.rs | 156 +++++++++++++++++ src/imp/avx2.rs | 214 +++++++++++++++++++++++ src/imp/avx512.rs | 242 +++++++++++++++++++++++++ src/imp/mod.rs | 23 +++ src/imp/neon.rs | 241 +++++++++++++++++++++++++ src/imp/scalar.rs | 66 +++++++ src/imp/sse2.rs | 233 +++++++++++++++++++++++++ src/imp/ssse3.rs | 219 +++++++++++++++++++++++ src/imp/wasm.rs | 217 +++++++++++++++++++++++ src/lib.rs | 310 +++++++++++++++++++++++++++++++++ 21 files changed, 2526 insertions(+) create mode 100644 .cargo_vcs_info.json create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/fuzz.yaml create mode 100644 .github/workflows/publish.yaml create mode 100644 .gitignore create mode 100644 .rustfmt.toml create mode 100644 CHANGELOG.md create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 src/hash.rs create mode 100644 src/imp/avx2.rs create mode 100644 src/imp/avx512.rs create mode 100644 src/imp/mod.rs create mode 100644 src/imp/neon.rs create mode 100644 src/imp/scalar.rs create mode 100644 src/imp/sse2.rs create mode 100644 src/imp/ssse3.rs create mode 100644 src/imp/wasm.rs create mode 100644 src/lib.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..7d6b3c9 --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "fdb8d90ec24851e8b421eb9f155e064052f110a9" + }, + "path_in_vcs": "" +} \ No newline at end of file diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..b1f713f --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,173 @@ +name: build + +on: + push: + branches-ignore: [main] + pull_request: + branches: [main] + +jobs: + clippy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + components: clippy + override: true + - uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + args: --all-features + + test-doc: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: nightly + override: true + - uses: Swatinem/rust-cache@v1 + - run: cargo install cargo-deadlinks + - name: doc + env: + RUSTDOCFLAGS: --cfg doc_cfg + run: cargo deadlinks --ignore-fragments -- --all + + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + toolchain: stable + - os: macos-latest + target: x86_64-apple-darwin + toolchain: stable + # TODO: also aarch64 / M1 + - os: windows-latest + target: x86_64-pc-windows-gnu + toolchain: stable + - os: windows-latest + target: x86_64-pc-windows-msvc + toolchain: beta + # Test both windows-gnu and windows-msvc; use beta rust on one + - os: ubuntu-latest + deps: sudo apt-get update ; sudo apt install gcc-multilib + target: i686-unknown-linux-gnu + toolchain: nightly + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + toolchain: nightly + variant: minimal_versions + + steps: + - uses: actions/checkout@v2 + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + target: ${{ matrix.target }} + toolchain: ${{ matrix.toolchain }} + override: true + - uses: Swatinem/rust-cache@v1 + - run: ${{ matrix.deps }} + - name: Maybe minimal versions + if: ${{ matrix.variant == 'minimal_versions' }} + run: cargo generate-lockfile -Z minimal-versions + + - name: Test + run: | + cargo test --lib --target ${{ matrix.target }} ${{ matrix.toolchain == 'nightly' && '--features=nightly' || '' }} + + env: + # Enables address sanitization if supported target and nightly. + RUSTFLAGS: | + ${{ + ( + matrix.toolchain == 'nightly' && ( + matrix.target == 'aarch64-apple-darwin' || + matrix.target == 'aarch64-fuchsia' || + matrix.target == 'aarch64-unknown-linux-gnu' || + matrix.target == 'x86_64-apple-darwin' || + matrix.target == 'x86_64-fuchsia' || + matrix.target == 'x86_64-unknown-freebsd' || + matrix.target == 'x86_64-unknown-linux-gnu' + ) + ) + && '-Z sanitizer=address' + || '' + }} + + test-cross: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: mips-unknown-linux-gnu + toolchain: stable + - os: ubuntu-latest + target: arm-unknown-linux-gnueabi + toolchain: stable + - os: ubuntu-latest + target: aarch64-unknown-linux-gnu + toolchain: stable + - os: ubuntu-latest + target: arm-unknown-linux-gnueabi + toolchain: nightly + - os: ubuntu-latest + target: aarch64-unknown-linux-gnu + toolchain: nightly + + steps: + - uses: actions/checkout@v2 + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + target: ${{ matrix.target }} + toolchain: ${{ matrix.toolchain }} + override: true + + - uses: Swatinem/rust-cache@v1 + - name: Install cross + run: cargo install cross || true + - name: Test + run: | + cross test --no-fail-fast --target ${{ matrix.target }} ${{ matrix.toolchain == 'nightly' && '--features=nightly' || '' }} + + test-msrv: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + target: x86_64-unknown-linux-gnu + toolchain: 1.36.0 # MSRV + - uses: Swatinem/rust-cache@v1 + - name: Test + run: cargo test --target=x86_64-unknown-linux-gnu --no-default-features --features=std + + test-no-std: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: nightly + target: thumbv6m-none-eabi + override: true + - uses: Swatinem/rust-cache@v1 + - name: Build top-level only + run: cargo build --target=thumbv6m-none-eabi --no-default-features diff --git a/.github/workflows/fuzz.yaml b/.github/workflows/fuzz.yaml new file mode 100644 index 0000000..39b1451 --- /dev/null +++ b/.github/workflows/fuzz.yaml @@ -0,0 +1,59 @@ +name: fuzz + +on: + schedule: + # Run every 24h + - cron: "0 0 * * *" + +defaults: + run: + working-directory: fuzz + +jobs: + fuzz: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + target: [avx2, sse2, ssse3] + + steps: + - uses: actions/checkout@v2 + - name: setup + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: nightly + override: true + + - uses: Swatinem/rust-cache@v1 + - run: cargo install cargo-fuzz + + - name: Download corpus artifact + run: | + wget https://nightly.link/mcountryman/simd-adler32/workflows/fuzz.yaml/main/corpus.zip + mkdir -p corpus + unzip corpus.zip -d corpus + + # keep going if artifact doesn't exist + continue-on-error: true + + - name: Run fuzz test (30m) + run: | + cargo fuzz run ${{ matrix.target }} -- -max_total_time=1800 + + - name: Archive artifacts + uses: actions/upload-artifact@v2 + if: failure() + with: + name: artifacts + path: fuzz/artifacts + + # big decision. do we store corpus from failed jobs? for now we don't.. + - uses: actions/upload-artifact@v2 + with: + name: corpus + path: | + fuzz/corpus/ + fuzz/corpus/${{ matrix.target }} diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000..b14a571 --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,35 @@ +name: publish + +on: + push: + branches: [main] + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Configure git user + run: | + git config user.name "GitHub Actions" + git config user.email noreply@github.com + + - name: Install cargo-release + uses: actions-rs/install@v0.1 + with: + crate: cargo-release + version: latest + + - name: Publish + run: | + cargo login ${{ secrets.CRATES_IO_TOKEN }} + cargo release --no-confirm --no-push -x + + - name: Push changes + uses: ad-m/github-push-action@master + with: + tags: true + force: true + branch: ${{ github.ref }} + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7c743e --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# editor +.idea/ +.vscode/ + +# fuzz +fuzz/artifacts +fuzz/corpus + +# cargo +target/ diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..e1f2680 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 90 +tab_spaces = 2 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..95b141e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +## 0.3.3 - 2021-04-14 + +### Features + +- **from_checksum**: add `Adler32::from_checksum` + +### Performance Improvements + +- **scalar**: improve scalar performance by 90-600% + - Defer modulo until right before u16 overflow diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d5435e0 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,92 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "simd-adler32" +version = "0.3.5" +authors = ["Marvin Countryman "] +exclude = ["bench"] +description = "A SIMD-accelerated Adler-32 rolling hash algorithm implementation." +readme = "README.md" +keywords = [ + "simd", + "avx2", + "ssse3", + "adler", + "adler32", +] +categories = [ + "algorithms", + "no-std", +] +license = "MIT" +repository = "https://github.com/mcountryman/simd-adler32" + +[profile.release] +opt-level = 2 +debug = true + +[[bench]] +name = "all" +path = "bench/all.rs" +harness = false + +[[bench]] +name = "avx2" +path = "bench/avx2.rs" +harness = false + +[[bench]] +name = "avx512" +path = "bench/avx512.rs" +harness = false + +[[bench]] +name = "scalar" +path = "bench/scalar.rs" +harness = false + +[[bench]] +name = "sse2" +path = "bench/sse2.rs" +harness = false + +[[bench]] +name = "ssse3" +path = "bench/ssse3.rs" +harness = false + +[[bench]] +name = "compete" +path = "bench/compete.rs" +harness = false + +[dev-dependencies.adler] +version = "1.0.2" + +[dev-dependencies.adler32] +version = "1.2.0" + +[dev-dependencies.criterion] +version = "0.3" + +[dev-dependencies.rand] +version = "0.8" + +[features] +const-generics = [] +default = [ + "std", + "const-generics", +] +nightly = [] +std = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..d861001 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,64 @@ +[package] +name = "simd-adler32" +authors = ["Marvin Countryman "] +license = "MIT" +version = "0.3.5" +edition = "2018" +keywords = ["simd", "avx2", "ssse3", "adler", "adler32"] +categories = ["algorithms", "no-std"] +repository = "https://github.com/mcountryman/simd-adler32" +description = "A SIMD-accelerated Adler-32 rolling hash algorithm implementation." +exclude = ["bench"] + +[profile.release] +debug = true +opt-level = 2 + +[[bench]] +name = "all" +path = "bench/all.rs" +harness = false + +[[bench]] +name = "avx2" +path = "bench/avx2.rs" +harness = false + +[[bench]] +name = "avx512" +path = "bench/avx512.rs" +harness = false + +[[bench]] +name = "scalar" +path = "bench/scalar.rs" +harness = false + +[[bench]] +name = "sse2" +path = "bench/sse2.rs" +harness = false + +[[bench]] +name = "ssse3" +path = "bench/ssse3.rs" +harness = false + +[[bench]] +name = "compete" +path = "bench/compete.rs" +harness = false + +[features] +default = ["std", "const-generics"] +std = [] +nightly = [] +const-generics = [] + +[dev-dependencies] +rand = "0.8" +criterion = "0.3" + +# competition +adler = "1.0.2" +adler32 = "1.2.0" diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..9bd65ce --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [2021] [Marvin Countryman] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c82dddc --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +

simd-adler32

+

+ + docs.rs badge + + + crates.io badge + + + mit license badge + +

+ +A SIMD-accelerated Adler-32 rolling hash algorithm implementation. + +## Features + +- No dependencies +- Support `no_std` (with `default-features = false`) +- Runtime CPU feature detection (when `std` enabled) +- Blazing fast performance on as many targets as possible (currently only x86 and x86_64) +- Default to scalar implementation when simd not available + +## Quick start + +> Cargo.toml + +```toml +[dependencies] +simd-adler32 = "*" +``` + +> example.rs + +```rust +use simd_adler32::Adler32; + +let mut adler = Adler32::new(); +adler.write(b"rust is pretty cool, man"); +let hash = adler.finish(); + +println!("{}", hash); +// 1921255656 +``` + +## Support + +**CPU Features** + +| impl | arch | feature | +| ---- | ---------------- | ------- | +| ✅ | `x86`, `x86_64` | avx512 | +| ✅ | `x86`, `x86_64` | avx2 | +| ✅ | `x86`, `x86_64` | ssse3 | +| ✅ | `x86`, `x86_64` | sse2 | +| 🚧 | `arm`, `aarch64` | neon | +| ✅ | `wasm32` | simd128 | + +**MSRV** `1.36.0`\*\* + +Minimum supported rust version is tested before a new version is published. [**] Feature +`const-generics` needs to disabled to build on rustc versions `<1.51` which can be done +by updating your dependency definition to the following. + +> Cargo.toml + +```toml +[dependencies] +simd-adler32 = { version "*", default-features = false, features = ["std"] } +``` + +## Performance + +Benchmarks listed display number of randomly generated bytes (10k / 100k) and library +name. Benchmarks sources can be found under the [bench](/bench) directory. Crates used for +comparison are [adler](https://crates.io/crates/adler) and +[adler32](https://crates.io/crates/adler32). + +> Windows 10 Pro - Intel i5-8300H @ 2.30GHz + +| name | avg. time | avg. thrpt | +| ----------------------- | --------------- | ------------------ | +| **10k/simd-adler32** | **212.61 ns** | **43.805 GiB/s** | +| 10k/wuffs | 3843 ns | 2.63 GiB/s\* | +| 10k/adler32 | 4.8084 us | 1.9369 GiB/s | +| 10k/adler | 17.979 us | 530.43 MiB/s | +| ----------------------- | --------------- | ------------------ | +| **100k/simd-adler32** | **2.7951 us** | **33.320 GiB/s** | +| 100k/wuffs | 34733 ns | 2.6814 GiB/s\* | +| 100k/adler32 | 48.488 us | 1.9207 GiB/s | +| 100k/adler | 178.36 us | 534.69 MiB/s | + +\* wuffs ran using mingw64/gcc, ran with `wuffs bench -ccompilers=gcc -reps=1 -iterscale=300 std/adler32`. + +> MacBookPro16,1 - Intel i9-9880H CPU @ 2.30GHz + +| name | avg. time | avg. thrpt | +| ----------------------- | --------------- | ------------------ | +| **10k/simd-adler32** | **200.37 ns** | **46.480 GiB/s** | +| 10k/adler32 | 4.1516 us | 2.2433 GiB/s | +| 10k/adler | 10.220 us | 933.15 MiB/s | +| ----------------------- | --------------- | ------------------ | +| **100k/simd-adler32** | **2.3282 us** | **40.003 GiB/s** | +| 100k/adler32 | 41.130 us | 2.2643 GiB/s | +| 100k/adler | 83.776 us | 534.69 MiB/s | + +## Safety + +This crate contains a significant amount of `unsafe` code due to the requirement of `unsafe` +for simd intrinsics. Fuzzing is done on release and debug builds prior to publishing via +`afl`. Fuzzy tests can be found under [fuzz](/fuzz) the directory. + +## Resources + +- [LICENSE](./LICENSE.md) - MIT +- [CHANGELOG](./CHANGELOG.md) + +## Credits + +Thank you to the contributors of the following projects. + +- [adler](https://github.com/jonas-schievink/adler) +- [adler32](https://github.com/remram44/adler32-rs) +- [crc32fast](https://github.com/srijs/rust-crc32fast) +- [wuffs](https://github.com/google/wuffs) +- [chromium](https://bugs.chromium.org/p/chromium/issues/detail?id=762564) +- [zlib](https://zlib.net/) + +## Contributing + +Feel free to submit a issue or pull request. :smile: diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..558542b --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,156 @@ +use crate::{Adler32, Adler32Hash}; + +impl Adler32Hash for &[u8] { + fn hash(&self) -> u32 { + let mut hash = Adler32::new(); + + hash.write(self); + hash.finish() + } +} + +impl Adler32Hash for &str { + fn hash(&self) -> u32 { + let mut hash = Adler32::new(); + + hash.write(self.as_bytes()); + hash.finish() + } +} + +#[cfg(feature = "const-generics")] +impl Adler32Hash for [u8; SIZE] { + fn hash(&self) -> u32 { + let mut hash = Adler32::new(); + + hash.write(self); + hash.finish() + } +} + +macro_rules! array_impl { + ($s:expr, $($size:expr),+) => { + array_impl!($s); + $(array_impl!{$size})* + }; + ($size:expr) => { + #[cfg(not(feature = "const-generics"))] + impl Adler32Hash for [u8; $size] { + fn hash(&self) -> u32 { + let mut hash = Adler32::new(); + + hash.write(self); + hash.finish() + } + } + }; +} + +array_impl!( + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 1024, + 1024 * 1024, + 1024 * 1024 * 1024, + 2048, + 4096 +); diff --git a/src/imp/avx2.rs b/src/imp/avx2.rs new file mode 100644 index 0000000..c16cc99 --- /dev/null +++ b/src/imp/avx2.rs @@ -0,0 +1,214 @@ +use super::Adler32Imp; + +/// Resolves update implementation if CPU supports avx2 instructions. +pub fn get_imp() -> Option { + get_imp_inner() +} + +#[inline] +#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] +fn get_imp_inner() -> Option { + if std::is_x86_feature_detected!("avx2") { + Some(imp::update) + } else { + None + } +} + +#[inline] +#[cfg(all( + target_feature = "avx2", + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + Some(imp::update) +} + +#[inline] +#[cfg(all( + not(target_feature = "avx2"), + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + None +} + +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + any(feature = "std", target_feature = "avx2") +))] +mod imp { + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 32; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + unsafe { update_imp(a, b, data) } + } + + #[inline] + #[target_feature(enable = "avx2")] + unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) + } + + #[inline] + unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; + } + + #[inline] + unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; + } + + #[inline(always)] + unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let one_v = _mm256_set1_epi16(1); + let zero_v = _mm256_setzero_si256(); + let weights = get_weights(); + + let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _); + let mut a_v = _mm256_setzero_si256(); + let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let block = _mm256_loadu_si256(block_ptr); + + p_v = _mm256_add_epi32(p_v, a_v); + + a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v)); + let mad = _mm256_maddubs_epi16(block, weights); + b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v)); + } + + b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder + } + + #[inline(always)] + unsafe fn reduce_add(v: __m256i) -> u32 { + let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1)); + let hi = _mm_unpackhi_epi64(sum, sum); + + let sum = _mm_add_epi32(hi, sum); + let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); + + let sum = _mm_add_epi32(sum, hi); + + _mm_cvtsi128_si32(sum) as _ + } + + #[inline(always)] + unsafe fn get_weights() -> __m256i { + _mm256_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, + ) + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + #[test] + fn zeroes() { + assert_sum_eq(&[]); + assert_sum_eq(&[0]); + assert_sum_eq(&[0, 0]); + assert_sum_eq(&[0; 100]); + assert_sum_eq(&[0; 1024]); + assert_sum_eq(&[0; 1024 * 1024]); + } + + #[test] + fn ones() { + assert_sum_eq(&[]); + assert_sum_eq(&[1]); + assert_sum_eq(&[1, 1]); + assert_sum_eq(&[1; 100]); + assert_sum_eq(&[1; 1024]); + assert_sum_eq(&[1; 1024 * 1024]); + } + + #[test] + fn random() { + let mut random = [0; 1024 * 1024]; + rand::thread_rng().fill(&mut random[..]); + + assert_sum_eq(&random[..1]); + assert_sum_eq(&random[..100]); + assert_sum_eq(&random[..1024]); + assert_sum_eq(&random[..1024 * 1024]); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_sum_eq(b"Wikipedia"); + } + + fn assert_sum_eq(data: &[u8]) { + if let Some(update) = super::get_imp() { + let (a, b) = update(1, 0, data); + let left = u32::from(b) << 16 | u32::from(a); + let right = adler::adler32_slice(data); + + assert_eq!(left, right, "len({})", data.len()); + } + } +} diff --git a/src/imp/avx512.rs b/src/imp/avx512.rs new file mode 100644 index 0000000..ebb32fa --- /dev/null +++ b/src/imp/avx512.rs @@ -0,0 +1,242 @@ +use super::Adler32Imp; + +/// Resolves update implementation if CPU supports avx512f and avx512bw instructions. +pub fn get_imp() -> Option { + get_imp_inner() +} + +#[inline] +#[cfg(all( + feature = "std", + feature = "nightly", + any(target_arch = "x86", target_arch = "x86_64") +))] +fn get_imp_inner() -> Option { + let has_avx512f = std::is_x86_feature_detected!("avx512f"); + let has_avx512bw = std::is_x86_feature_detected!("avx512bw"); + + if has_avx512f && has_avx512bw { + Some(imp::update) + } else { + None + } +} + +#[inline] +#[cfg(all( + feature = "nightly", + all(target_feature = "avx512f", target_feature = "avx512bw"), + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + Some(imp::update) +} + +#[inline] +#[cfg(all( + not(all(feature = "nightly", target_feature = "avx512f", target_feature = "avx512bw")), + not(all( + feature = "std", + feature = "nightly", + any(target_arch = "x86", target_arch = "x86_64") + )) +))] +fn get_imp_inner() -> Option { + None +} + +#[cfg(all( + feature = "nightly", + any(target_arch = "x86", target_arch = "x86_64"), + any( + feature = "std", + all(target_feature = "avx512f", target_feature = "avx512bw") + ) +))] +mod imp { + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 64; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + unsafe { update_imp(a, b, data) } + } + + #[inline] + #[target_feature(enable = "avx512f")] + #[target_feature(enable = "avx512bw")] + unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) + } + + #[inline] + unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; + } + + #[inline] + unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; + } + + #[inline(always)] + unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let one_v = _mm512_set1_epi16(1); + let zero_v = _mm512_setzero_si512(); + let weights = get_weights(); + + let p_v = (*a * blocks.len() as u32) as _; + let mut p_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p_v); + let mut a_v = _mm512_setzero_si512(); + let mut b_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let block = _mm512_loadu_si512(block_ptr); + + p_v = _mm512_add_epi32(p_v, a_v); + + a_v = _mm512_add_epi32(a_v, _mm512_sad_epu8(block, zero_v)); + let mad = _mm512_maddubs_epi16(block, weights); + b_v = _mm512_add_epi32(b_v, _mm512_madd_epi16(mad, one_v)); + } + + b_v = _mm512_add_epi32(b_v, _mm512_slli_epi32(p_v, 6)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder + } + + #[inline(always)] + unsafe fn reduce_add(v: __m512i) -> u32 { + let v: [__m256i; 2] = core::mem::transmute(v); + + reduce_add_256(v[0]) + reduce_add_256(v[1]) + } + + #[inline(always)] + unsafe fn reduce_add_256(v: __m256i) -> u32 { + let v: [__m128i; 2] = core::mem::transmute(v); + let sum = _mm_add_epi32(v[0], v[1]); + let hi = _mm_unpackhi_epi64(sum, sum); + + let sum = _mm_add_epi32(hi, sum); + let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); + + let sum = _mm_add_epi32(sum, hi); + let sum = _mm_cvtsi128_si32(sum) as _; + + sum + } + + #[inline(always)] + unsafe fn get_weights() -> __m512i { + _mm512_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ) + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + #[test] + fn zeroes() { + assert_sum_eq(&[]); + assert_sum_eq(&[0]); + assert_sum_eq(&[0, 0]); + assert_sum_eq(&[0; 100]); + assert_sum_eq(&[0; 1024]); + assert_sum_eq(&[0; 1024 * 1024]); + } + + #[test] + fn ones() { + assert_sum_eq(&[]); + assert_sum_eq(&[1]); + assert_sum_eq(&[1, 1]); + assert_sum_eq(&[1; 100]); + assert_sum_eq(&[1; 1024]); + assert_sum_eq(&[1; 1024 * 1024]); + } + + #[test] + fn random() { + let mut random = [0; 1024 * 1024]; + rand::thread_rng().fill(&mut random[..]); + + assert_sum_eq(&random[..1]); + assert_sum_eq(&random[..100]); + assert_sum_eq(&random[..1024]); + assert_sum_eq(&random[..1024 * 1024]); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_sum_eq(b"Wikipedia"); + } + + fn assert_sum_eq(data: &[u8]) { + if let Some(update) = super::get_imp() { + let (a, b) = update(1, 0, data); + let left = u32::from(b) << 16 | u32::from(a); + let right = adler::adler32_slice(data); + + assert_eq!(left, right, "len({})", data.len()); + } + } +} diff --git a/src/imp/mod.rs b/src/imp/mod.rs new file mode 100644 index 0000000..957b50a --- /dev/null +++ b/src/imp/mod.rs @@ -0,0 +1,23 @@ +pub mod avx2; +pub mod avx512; +pub mod scalar; +pub mod sse2; +pub mod ssse3; +pub mod wasm; + +pub type Adler32Imp = fn(u16, u16, &[u8]) -> (u16, u16); + +#[inline] +#[allow(non_snake_case)] +pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { + ((z << 6) | (y << 4) | (x << 2) | w) as i32 +} + +pub fn get_imp() -> Adler32Imp { + avx512::get_imp() + .or_else(avx2::get_imp) + .or_else(ssse3::get_imp) + .or_else(sse2::get_imp) + .or_else(wasm::get_imp) + .unwrap_or(scalar::update) +} diff --git a/src/imp/neon.rs b/src/imp/neon.rs new file mode 100644 index 0000000..8398b6d --- /dev/null +++ b/src/imp/neon.rs @@ -0,0 +1,241 @@ +use super::Adler32Imp; + +/// Resolves update implementation if CPU supports avx512f and avx512bw instructions. +pub fn get_imp() -> Option { + get_imp_inner() +} + +#[inline] +#[cfg(all(feature = "std", feature = "nightly", target_arch = "arm"))] +fn get_imp_inner() -> Option { + if std::is_arm_feature_detected("neon") { + Some(imp::update) + } else { + None + } +} + +#[inline] +#[cfg(all(feature = "std", feature = "nightly", target_arch = "aarch64"))] +fn get_imp_inner() -> Option { + if std::is_aarch64_feature_detected("neon") { + Some(imp::update) + } else { + None + } +} + +#[inline] +#[cfg(all( + feature = "nightly", + target_feature = "neon", + not(all(feature = "std", any(target_arch = "arm", target_arch = "aarch64"))) +))] +fn get_imp_inner() -> Option { + Some(imp::update) +} + +#[inline] +#[cfg(all( + not(target_feature = "neon"), + not(all( + feature = "std", + feature = "nightly", + any(target_arch = "arm", target_arch = "aarch64") + )) +))] +fn get_imp_inner() -> Option { + None +} + +#[cfg(all( + feature = "nightly", + any(target_arch = "arm", target_arch = "aarch64"), + any(feature = "std", target_feature = "neon") +))] +mod imp { + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 64; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + + #[cfg(target_arch = "aarch64")] + use core::arch::aarch64::*; + #[cfg(target_arch = "arm")] + use core::arch::arm::*; + + pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + unsafe { update_imp(a, b, data) } + } + + #[inline] + #[target_feature(enable = "neon")] + unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) + } + + #[inline] + unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; + } + + #[inline] + unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; + } + + #[inline(always)] + unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let one_v = _mm512_set1_epi16(1); + let zero_v = _mm512_setzero_si512(); + let weights = get_weights(); + + let p_v = (*a * blocks.len() as u32) as _; + let mut p_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p_v); + let mut a_v = _mm512_setzero_si512(); + let mut b_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let block = _mm512_loadu_si512(block_ptr); + + p_v = _mm512_add_epi32(p_v, a_v); + + a_v = _mm512_add_epi32(a_v, _mm512_sad_epu8(block, zero_v)); + let mad = _mm512_maddubs_epi16(block, weights); + b_v = _mm512_add_epi32(b_v, _mm512_madd_epi16(mad, one_v)); + } + + b_v = _mm512_add_epi32(b_v, _mm512_slli_epi32(p_v, 6)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder + } + + #[inline(always)] + unsafe fn reduce_add(v: __m512i) -> u32 { + let v: [__m256i; 2] = core::mem::transmute(v); + + reduce_add_256(v[0]) + reduce_add_256(v[1]) + } + + #[inline(always)] + unsafe fn reduce_add_256(v: __m256i) -> u32 { + let v: [__m128i; 2] = core::mem::transmute(v); + let sum = _mm_add_epi32(v[0], v[1]); + let hi = _mm_unpackhi_epi64(sum, sum); + + let sum = _mm_add_epi32(hi, sum); + let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); + + let sum = _mm_add_epi32(sum, hi); + let sum = _mm_cvtsi128_si32(sum) as _; + + sum + } + + #[inline(always)] + unsafe fn get_weights() -> __m512i { + _mm512_set_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + ) + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + #[test] + fn zeroes() { + assert_sum_eq(&[]); + assert_sum_eq(&[0]); + assert_sum_eq(&[0, 0]); + assert_sum_eq(&[0; 100]); + assert_sum_eq(&[0; 1024]); + assert_sum_eq(&[0; 1024 * 1024]); + } + + #[test] + fn ones() { + assert_sum_eq(&[]); + assert_sum_eq(&[1]); + assert_sum_eq(&[1, 1]); + assert_sum_eq(&[1; 100]); + assert_sum_eq(&[1; 1024]); + assert_sum_eq(&[1; 1024 * 1024]); + } + + #[test] + fn random() { + let mut random = [0; 1024 * 1024]; + rand::thread_rng().fill(&mut random[..]); + + assert_sum_eq(&random[..1]); + assert_sum_eq(&random[..100]); + assert_sum_eq(&random[..1024]); + assert_sum_eq(&random[..1024 * 1024]); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_sum_eq(b"Wikipedia"); + } + + fn assert_sum_eq(data: &[u8]) { + if let Some(update) = super::get_imp() { + let (a, b) = update(1, 0, data); + let left = u32::from(b) << 16 | u32::from(a); + let right = adler::adler32_slice(data); + + assert_eq!(left, right, "len({})", data.len()); + } + } +} diff --git a/src/imp/scalar.rs b/src/imp/scalar.rs new file mode 100644 index 0000000..b47950a --- /dev/null +++ b/src/imp/scalar.rs @@ -0,0 +1,66 @@ +const MOD: u32 = 65521; +const NMAX: usize = 5552; + +pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(NMAX); + let remainder = chunks.remainder(); + + for chunk in chunks { + for byte in chunk { + a = a.wrapping_add(*byte as _); + b = b.wrapping_add(a); + } + + a %= MOD; + b %= MOD; + } + + for byte in remainder { + a = a.wrapping_add(*byte as _) % MOD; + b = b.wrapping_add(a) % MOD; + } + + (a as u16, b as u16) +} + +#[cfg(test)] +mod tests { + #[test] + fn zeroes() { + assert_eq!(adler32(&[]), 1); + assert_eq!(adler32(&[0]), 1 | 1 << 16); + assert_eq!(adler32(&[0, 0]), 1 | 2 << 16); + assert_eq!(adler32(&[0; 100]), 0x00640001); + assert_eq!(adler32(&[0; 1024]), 0x04000001); + assert_eq!(adler32(&[0; 1024 * 1024]), 0x00f00001); + } + + #[test] + fn ones() { + assert_eq!(adler32(&[0xff; 1024]), 0x79a6fc2e); + assert_eq!(adler32(&[0xff; 1024 * 1024]), 0x8e88ef11); + } + + #[test] + fn mixed() { + assert_eq!(adler32(&[1]), 2 | 2 << 16); + assert_eq!(adler32(&[40]), 41 | 41 << 16); + + assert_eq!(adler32(&[0xA5; 1024 * 1024]), 0xd5009ab1); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_eq!(adler32(b"Wikipedia"), 0x11E60398); + } + + fn adler32(data: &[u8]) -> u32 { + let (a, b) = super::update(1, 0, data); + + u32::from(b) << 16 | u32::from(a) + } +} diff --git a/src/imp/sse2.rs b/src/imp/sse2.rs new file mode 100644 index 0000000..b76df52 --- /dev/null +++ b/src/imp/sse2.rs @@ -0,0 +1,233 @@ +use super::Adler32Imp; + +/// Resolves update implementation if CPU supports sse2 instructions. +pub fn get_imp() -> Option { + get_imp_inner() +} + +#[inline] +#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] +fn get_imp_inner() -> Option { + if std::is_x86_feature_detected!("sse2") { + Some(imp::update) + } else { + None + } +} + +#[inline] +#[cfg(all( + target_feature = "sse2", + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + Some(imp::update) +} + +#[inline] +#[cfg(all( + not(target_feature = "sse2"), + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + None +} + +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + any(feature = "std", target_feature = "sse2") +))] +mod imp { + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 32; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + unsafe { update_imp(a, b, data) } + } + + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) + } + + unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; + } + + unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; + } + + #[inline(always)] + unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let zero_v = _mm_setzero_si128(); + let weight_hi_v = get_weight_hi(); + let weight_lo_v = get_weight_lo(); + + let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); + let mut a_v = _mm_setzero_si128(); + let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let left_v = _mm_loadu_si128(block_ptr); + let right_v = _mm_loadu_si128(block_ptr.add(1)); + + p_v = _mm_add_epi32(p_v, a_v); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); + let mad = maddubs(left_v, weight_hi_v); + b_v = _mm_add_epi32(b_v, mad); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); + let mad = maddubs(right_v, weight_lo_v); + b_v = _mm_add_epi32(b_v, mad); + } + + b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder + } + + #[inline(always)] + unsafe fn maddubs(a: __m128i, b: __m128i) -> __m128i { + let a_lo = _mm_unpacklo_epi8(a, _mm_setzero_si128()); + let a_hi = _mm_unpackhi_epi8(a, _mm_setzero_si128()); + + let b_lo = _mm_unpacklo_epi8(b, _mm_setzero_si128()); + let b_hi = _mm_unpackhi_epi8(b, _mm_setzero_si128()); + + let lo = _mm_madd_epi16(a_lo, b_lo); + let hi = _mm_madd_epi16(a_hi, b_hi); + + _mm_add_epi32(lo, hi) + } + + #[inline(always)] + unsafe fn reduce_add(v: __m128i) -> u32 { + let hi = _mm_unpackhi_epi64(v, v); + let sum = _mm_add_epi32(hi, v); + let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); + + let sum = _mm_add_epi32(sum, hi); + + _mm_cvtsi128_si32(sum) as _ + } + + #[inline(always)] + unsafe fn get_weight_lo() -> __m128i { + _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + } + + #[inline(always)] + unsafe fn get_weight_hi() -> __m128i { + _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ) + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + #[test] + fn zeroes() { + assert_sum_eq(&[]); + assert_sum_eq(&[0]); + assert_sum_eq(&[0, 0]); + assert_sum_eq(&[0; 100]); + assert_sum_eq(&[0; 1024]); + assert_sum_eq(&[0; 1024 * 1024]); + } + + #[test] + fn ones() { + assert_sum_eq(&[]); + assert_sum_eq(&[1]); + assert_sum_eq(&[1, 1]); + assert_sum_eq(&[1; 100]); + assert_sum_eq(&[1; 1024]); + assert_sum_eq(&[1; 1024 * 1024]); + } + + #[test] + fn random() { + let mut random = [0; 1024 * 1024]; + rand::thread_rng().fill(&mut random[..]); + + assert_sum_eq(&random[..1]); + assert_sum_eq(&random[..100]); + assert_sum_eq(&random[..1024]); + assert_sum_eq(&random[..1024 * 1024]); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_sum_eq(b"Wikipedia"); + } + + fn assert_sum_eq(data: &[u8]) { + if let Some(update) = super::get_imp() { + let (a, b) = update(1, 0, data); + let left = u32::from(b) << 16 | u32::from(a); + let right = adler::adler32_slice(data); + + assert_eq!(left, right, "len({})", data.len()); + } + } +} diff --git a/src/imp/ssse3.rs b/src/imp/ssse3.rs new file mode 100644 index 0000000..2602d47 --- /dev/null +++ b/src/imp/ssse3.rs @@ -0,0 +1,219 @@ +use super::Adler32Imp; + +/// Resolves update implementation if CPU supports ssse3 instructions. +pub fn get_imp() -> Option { + get_imp_inner() +} + +#[inline] +#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] +fn get_imp_inner() -> Option { + if std::is_x86_feature_detected!("ssse3") { + Some(imp::update) + } else { + None + } +} + +#[inline] +#[cfg(all( + target_feature = "ssse3", + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + Some(imp::update) +} + +#[inline] +#[cfg(all( + not(target_feature = "ssse3"), + not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) +))] +fn get_imp_inner() -> Option { + None +} + +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + any(feature = "std", target_feature = "ssse3") +))] +mod imp { + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 32; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + + pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + unsafe { update_imp(a, b, data) } + } + + #[inline] + #[target_feature(enable = "ssse3")] + unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) + } + + unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; + } + + unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; + } + + #[inline(always)] + unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let one_v = _mm_set1_epi16(1); + let zero_v = _mm_set1_epi16(0); + let weight_hi_v = get_weight_hi(); + let weight_lo_v = get_weight_lo(); + + let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); + let mut a_v = _mm_set_epi32(0, 0, 0, 0); + let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); + + for block in blocks { + let block_ptr = block.as_ptr() as *const _; + let left_v = _mm_loadu_si128(block_ptr); + let right_v = _mm_loadu_si128(block_ptr.add(1)); + + p_v = _mm_add_epi32(p_v, a_v); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); + let mad = _mm_maddubs_epi16(left_v, weight_hi_v); + b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); + + a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); + let mad = _mm_maddubs_epi16(right_v, weight_lo_v); + b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); + } + + b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder + } + + #[inline(always)] + unsafe fn reduce_add(v: __m128i) -> u32 { + let hi = _mm_unpackhi_epi64(v, v); + let sum = _mm_add_epi32(hi, v); + let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); + let sum = _mm_add_epi32(sum, hi); + + _mm_cvtsi128_si32(sum) as _ + } + + #[inline(always)] + unsafe fn get_weight_lo() -> __m128i { + _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + } + + #[inline(always)] + unsafe fn get_weight_hi() -> __m128i { + _mm_set_epi8( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ) + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + #[test] + fn zeroes() { + assert_sum_eq(&[]); + assert_sum_eq(&[0]); + assert_sum_eq(&[0, 0]); + assert_sum_eq(&[0; 100]); + assert_sum_eq(&[0; 1024]); + assert_sum_eq(&[0; 1024 * 1024]); + } + + #[test] + fn ones() { + assert_sum_eq(&[]); + assert_sum_eq(&[1]); + assert_sum_eq(&[1, 1]); + assert_sum_eq(&[1; 100]); + assert_sum_eq(&[1; 1024]); + assert_sum_eq(&[1; 1024 * 1024]); + } + + #[test] + fn random() { + let mut random = [0; 1024 * 1024]; + rand::thread_rng().fill(&mut random[..]); + + assert_sum_eq(&random[..1]); + assert_sum_eq(&random[..100]); + assert_sum_eq(&random[..1024]); + assert_sum_eq(&random[..1024 * 1024]); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_sum_eq(b"Wikipedia"); + } + + fn assert_sum_eq(data: &[u8]) { + if let Some(update) = super::get_imp() { + let (a, b) = update(1, 0, data); + let left = u32::from(b) << 16 | u32::from(a); + let right = adler::adler32_slice(data); + + assert_eq!(left, right, "len({})", data.len()); + } + } +} diff --git a/src/imp/wasm.rs b/src/imp/wasm.rs new file mode 100644 index 0000000..7ded0b0 --- /dev/null +++ b/src/imp/wasm.rs @@ -0,0 +1,217 @@ +use super::Adler32Imp; + +/// Resolves update implementation if CPU supports simd128 instructions. +pub fn get_imp() -> Option { + get_imp_inner() +} + +#[inline] +#[cfg(target_feature = "simd128")] +fn get_imp_inner() -> Option { + Some(imp::update) +} + +#[inline] +#[cfg(not(target_feature = "simd128"))] +fn get_imp_inner() -> Option { + None +} + +#[cfg(target_feature = "simd128")] +mod imp { + const MOD: u32 = 65521; + const NMAX: usize = 5552; + const BLOCK_SIZE: usize = 32; + const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; + + #[cfg(target_arch = "wasm32")] + use core::arch::wasm32::*; + #[cfg(target_arch = "wasm64")] + use core::arch::wasm64::*; + + pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + update_imp(a, b, data) + } + + #[inline] + #[target_feature(enable = "simd128")] + fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { + let mut a = a as u32; + let mut b = b as u32; + + let chunks = data.chunks_exact(CHUNK_SIZE); + let remainder = chunks.remainder(); + for chunk in chunks { + update_chunk_block(&mut a, &mut b, chunk); + } + + update_block(&mut a, &mut b, remainder); + + (a as u16, b as u16) + } + + fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert_eq!( + chunk.len(), + CHUNK_SIZE, + "Unexpected chunk size (expected {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + reduce_add_blocks(a, b, chunk); + + *a %= MOD; + *b %= MOD; + } + + fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { + debug_assert!( + chunk.len() <= CHUNK_SIZE, + "Unexpected chunk size (expected <= {}, got {})", + CHUNK_SIZE, + chunk.len() + ); + + for byte in reduce_add_blocks(a, b, chunk) { + *a += *byte as u32; + *b += *a; + } + + *a %= MOD; + *b %= MOD; + } + + #[inline(always)] + fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { + if chunk.len() < BLOCK_SIZE { + return chunk; + } + + let blocks = chunk.chunks_exact(BLOCK_SIZE); + let blocks_remainder = blocks.remainder(); + + let weight_hi_v = get_weight_hi(); + let weight_lo_v = get_weight_lo(); + + let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0); + let mut a_v = u32x4(0, 0, 0, 0); + let mut b_v = u32x4(*b, 0, 0, 0); + + for block in blocks { + let block_ptr = block.as_ptr() as *const v128; + let v_lo = unsafe { *(block_ptr) }; + let v_hi = unsafe { *(block_ptr.add(1)) }; + + p_v = u32x4_add(p_v, a_v); + + a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo)); + let mad = i32x4_dot_i8x16(v_lo, weight_lo_v); + b_v = u32x4_add(b_v, mad); + + a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi)); + let mad = i32x4_dot_i8x16(v_hi, weight_hi_v); + b_v = u32x4_add(b_v, mad); + } + + b_v = u32x4_add(b_v, u32x4_shl(p_v, 5)); + + *a += reduce_add(a_v); + *b = reduce_add(b_v); + + blocks_remainder + } + + #[inline(always)] + fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 { + let a_lo = u16x8_extend_low_u8x16(a); + let a_hi = u16x8_extend_high_u8x16(a); + + let b_lo = u16x8_extend_low_u8x16(b); + let b_hi = u16x8_extend_high_u8x16(b); + + let lo = i32x4_dot_i16x8(a_lo, b_lo); + let hi = i32x4_dot_i16x8(a_hi, b_hi); + + i32x4_add(lo, hi) + } + + #[inline(always)] + fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 { + u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a)) + } + + #[inline(always)] + fn reduce_add(v: v128) -> u32 { + let arr: [u32; 4] = unsafe { std::mem::transmute(v) }; + let mut sum = 0u32; + for val in arr { + sum = sum.wrapping_add(val); + } + sum + } + + #[inline(always)] + fn get_weight_lo() -> v128 { + u8x16( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + ) + } + + #[inline(always)] + fn get_weight_hi() -> v128 { + u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + #[test] + fn zeroes() { + assert_sum_eq(&[]); + assert_sum_eq(&[0]); + assert_sum_eq(&[0, 0]); + assert_sum_eq(&[0; 100]); + assert_sum_eq(&[0; 1024]); + assert_sum_eq(&[0; 512 * 1024]); + } + + #[test] + fn ones() { + assert_sum_eq(&[]); + assert_sum_eq(&[1]); + assert_sum_eq(&[1, 1]); + assert_sum_eq(&[1; 100]); + assert_sum_eq(&[1; 1024]); + assert_sum_eq(&[1; 512 * 1024]); + } + + #[test] + fn random() { + let mut random = [0; 512 * 1024]; + rand::thread_rng().fill(&mut random[..]); + + assert_sum_eq(&random[..1]); + assert_sum_eq(&random[..100]); + assert_sum_eq(&random[..1024]); + assert_sum_eq(&random[..512 * 1024]); + } + + /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. + #[test] + fn wiki() { + assert_sum_eq(b"Wikipedia"); + } + + fn assert_sum_eq(data: &[u8]) { + if let Some(update) = super::get_imp() { + let (a, b) = update(1, 0, data); + let left = u32::from(b) << 16 | u32::from(a); + let right = adler::adler32_slice(data); + + assert_eq!(left, right, "len({})", data.len()); + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4111358 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,310 @@ +//! # simd-adler32 +//! +//! A SIMD-accelerated Adler-32 rolling hash algorithm implementation. +//! +//! ## Features +//! +//! - No dependencies +//! - Support `no_std` (with `default-features = false`) +//! - Runtime CPU feature detection (when `std` enabled) +//! - Blazing fast performance on as many targets as possible (currently only x86 and x86_64) +//! - Default to scalar implementation when simd not available +//! +//! ## Quick start +//! +//! > Cargo.toml +//! +//! ```toml +//! [dependencies] +//! simd-adler32 = "*" +//! ``` +//! +//! > example.rs +//! +//! ```rust +//! use simd_adler32::Adler32; +//! +//! let mut adler = Adler32::new(); +//! adler.write(b"rust is pretty cool, man"); +//! let hash = adler.finish(); +//! +//! println!("{}", hash); +//! // 1921255656 +//! ``` +//! +//! ## Feature flags +//! +//! * `std` - Enabled by default +//! +//! Enables std support, see [CPU Feature Detection](#cpu-feature-detection) for runtime +//! detection support. +//! * `nightly` +//! +//! Enables nightly features required for avx512 support. +//! +//! * `const-generics` - Enabled by default +//! +//! Enables const-generics support allowing for user-defined array hashing by value. See +//! [`Adler32Hash`] for details. +//! +//! ## Support +//! +//! **CPU Features** +//! +//! | impl | arch | feature | +//! | ---- | ---------------- | ------- | +//! | ✅ | `x86`, `x86_64` | avx512 | +//! | ✅ | `x86`, `x86_64` | avx2 | +//! | ✅ | `x86`, `x86_64` | ssse3 | +//! | ✅ | `x86`, `x86_64` | sse2 | +//! | 🚧 | `arm`, `aarch64` | neon | +//! | | `wasm32` | simd128 | +//! +//! **MSRV** `1.36.0`\*\* +//! +//! Minimum supported rust version is tested before a new version is published. [**] Feature +//! `const-generics` needs to disabled to build on rustc versions `<1.51` which can be done +//! by updating your dependency definition to the following. +//! +//! ## CPU Feature Detection +//! simd-adler32 supports both runtime and compile time CPU feature detection using the +//! `std::is_x86_feature_detected` macro when the `Adler32` struct is instantiated with +//! the `new` fn. +//! +//! Without `std` feature enabled simd-adler32 falls back to compile time feature detection +//! using `target-feature` or `target-cpu` flags supplied to rustc. See [https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html) +//! for more information. +//! +//! Feature detection tries to use the fastest supported feature first. +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(feature = "nightly", feature(stdsimd, avx512_target_feature))] + +#[doc(hidden)] +pub mod hash; +#[doc(hidden)] +pub mod imp; + +pub use hash::*; +use imp::{get_imp, Adler32Imp}; + +/// A rolling hash generator type. +#[derive(Clone)] +pub struct Adler32 { + a: u16, + b: u16, + update: Adler32Imp, +} + +impl Adler32 { + /// Constructs a new `Adler32`. + /// + /// Potential overhead here due to runtime feature detection although in testing on 100k + /// and 10k random byte arrays it was not really noticeable. + /// + /// # Examples + /// ```rust + /// use simd_adler32::Adler32; + /// + /// let mut adler = Adler32::new(); + /// ``` + pub fn new() -> Self { + Default::default() + } + + /// Constructs a new `Adler32` using existing checksum. + /// + /// Potential overhead here due to runtime feature detection although in testing on 100k + /// and 10k random byte arrays it was not really noticeable. + /// + /// # Examples + /// ```rust + /// use simd_adler32::Adler32; + /// + /// let mut adler = Adler32::from_checksum(0xdeadbeaf); + /// ``` + pub fn from_checksum(checksum: u32) -> Self { + Self { + a: checksum as u16, + b: (checksum >> 16) as u16, + update: get_imp(), + } + } + + /// Computes hash for supplied data and stores results in internal state. + pub fn write(&mut self, data: &[u8]) { + let (a, b) = (self.update)(self.a, self.b, data); + + self.a = a; + self.b = b; + } + + /// Returns the hash value for the values written so far. + /// + /// Despite its name, the method does not reset the hasher’s internal state. Additional + /// writes will continue from the current value. If you need to start a fresh hash + /// value, you will have to use `reset`. + pub fn finish(&self) -> u32 { + (u32::from(self.b) << 16) | u32::from(self.a) + } + + /// Resets the internal state. + pub fn reset(&mut self) { + self.a = 1; + self.b = 0; + } +} + +/// Compute Adler-32 hash on `Adler32Hash` type. +/// +/// # Arguments +/// * `hash` - A Adler-32 hash-able type. +/// +/// # Examples +/// ```rust +/// use simd_adler32::adler32; +/// +/// let hash = adler32(b"Adler-32"); +/// println!("{}", hash); // 800813569 +/// ``` +pub fn adler32(hash: &H) -> u32 { + hash.hash() +} + +/// A Adler-32 hash-able type. +pub trait Adler32Hash { + /// Feeds this value into `Adler32`. + fn hash(&self) -> u32; +} + +impl Default for Adler32 { + fn default() -> Self { + Self { + a: 1, + b: 0, + update: get_imp(), + } + } +} + +#[cfg(feature = "std")] +pub mod read { + //! Reader-based hashing. + //! + //! # Example + //! ```rust + //! use std::io::Cursor; + //! use simd_adler32::read::adler32; + //! + //! let mut reader = Cursor::new(b"Hello there"); + //! let hash = adler32(&mut reader).unwrap(); + //! + //! println!("{}", hash) // 800813569 + //! ``` + use crate::Adler32; + use std::io::{Read, Result}; + + /// Compute Adler-32 hash on reader until EOF. + /// + /// # Example + /// ```rust + /// use std::io::Cursor; + /// use simd_adler32::read::adler32; + /// + /// let mut reader = Cursor::new(b"Hello there"); + /// let hash = adler32(&mut reader).unwrap(); + /// + /// println!("{}", hash) // 800813569 + /// ``` + pub fn adler32(reader: &mut R) -> Result { + let mut hash = Adler32::new(); + let mut buf = [0; 4096]; + + loop { + match reader.read(&mut buf) { + Ok(0) => return Ok(hash.finish()), + Ok(n) => { + hash.write(&buf[..n]); + } + Err(err) => return Err(err), + } + } + } +} + +#[cfg(feature = "std")] +pub mod bufread { + //! BufRead-based hashing. + //! + //! Separate `BufRead` trait implemented to allow for custom buffer size optimization. + //! + //! # Example + //! ```rust + //! use std::io::{Cursor, BufReader}; + //! use simd_adler32::bufread::adler32; + //! + //! let mut reader = Cursor::new(b"Hello there"); + //! let mut reader = BufReader::new(reader); + //! let hash = adler32(&mut reader).unwrap(); + //! + //! println!("{}", hash) // 800813569 + //! ``` + use crate::Adler32; + use std::io::{BufRead, ErrorKind, Result}; + + /// Compute Adler-32 hash on buf reader until EOF. + /// + /// # Example + /// ```rust + /// use std::io::{Cursor, BufReader}; + /// use simd_adler32::bufread::adler32; + /// + /// let mut reader = Cursor::new(b"Hello there"); + /// let mut reader = BufReader::new(reader); + /// let hash = adler32(&mut reader).unwrap(); + /// + /// println!("{}", hash) // 800813569 + /// ``` + pub fn adler32(reader: &mut R) -> Result { + let mut hash = Adler32::new(); + + loop { + let consumed = match reader.fill_buf() { + Ok(buf) => { + if buf.is_empty() { + return Ok(hash.finish()); + } + + hash.write(buf); + buf.len() + } + Err(err) => match err.kind() { + ErrorKind::Interrupted => continue, + ErrorKind::UnexpectedEof => return Ok(hash.finish()), + _ => return Err(err), + }, + }; + + reader.consume(consumed); + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_from_checksum() { + let buf = b"rust is pretty cool man"; + let sum = 0xdeadbeaf; + + let mut simd = super::Adler32::from_checksum(sum); + let mut adler = adler::Adler32::from_checksum(sum); + + simd.write(buf); + adler.write_slice(buf); + + let simd = simd.finish(); + let scalar = adler.checksum(); + + assert_eq!(simd, scalar); + } +} -- 2.34.1