From adfead3e0bf2088be7c1da0170da234c306707f5 Mon Sep 17 00:00:00 2001 From: Noah Stiltner <43357580+nstilt1@users.noreply.github.com> Date: Mon, 29 Apr 2024 10:01:57 -0500 Subject: [PATCH] chacha20: bring back `rand_core` support (#333) Allows the crate's AVX2 / NEON implementations to be used as `rand_core`-compatible RNGs. See also: rust-random/rand#934 --- .gitignore | 2 +- Cargo.lock | 143 ++++- benches/Cargo.toml | 12 +- benches/README.md | 46 ++ benches/src/chacha20.rs | 38 +- benches/src/lib.rs | 23 + chacha20/Cargo.toml | 13 +- chacha20/src/backends/avx2.rs | 98 ++- chacha20/src/backends/neon.rs | 546 ++++++++-------- chacha20/src/backends/soft.rs | 36 +- chacha20/src/backends/sse2.rs | 74 ++- chacha20/src/chacha.rs | 38 ++ chacha20/src/legacy.rs | 54 +- chacha20/src/lib.rs | 143 +++-- chacha20/src/rng.rs | 1111 +++++++++++++++++++++++++++++++++ chacha20/src/variants.rs | 25 + chacha20/src/xchacha.rs | 51 +- chacha20/tests/mod.rs | 33 +- rust-toolchain.toml.save | 38 ++ salsa20/benches/mod.rs | 0 20 files changed, 2038 insertions(+), 486 deletions(-) create mode 100644 benches/README.md create mode 100644 chacha20/src/chacha.rs create mode 100644 chacha20/src/rng.rs create mode 100644 chacha20/src/variants.rs create mode 100644 rust-toolchain.toml.save mode change 100755 => 100644 salsa20/benches/mod.rs diff --git a/.gitignore b/.gitignore index b9d396dd..7a662a96 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ target/ -**/Cargo.lock +**/Cargo.lock \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index a5540d1d..0ab22cf6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,5 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -# version = 3 [[package]] @@ -32,6 +31,11 @@ dependencies = [ "cipher", "cpufeatures", "hex-literal", + "rand_chacha", + "rand_core 0.9.0-alpha.1", + "serde", + "serde_json", + "zeroize", ] [[package]] @@ -63,14 +67,14 @@ checksum = "b7aa2ec04f5120b830272a481e8d9d8ba4dda140d2cda59b0f1110d5eb93c38e" dependencies = [ "getrandom", "hybrid-array", - "rand_core", + "rand_core 0.6.4", ] [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "a06fddc2749e0528d2813f95e050e87e52c8cbbae56223b9babf73b3e53b0cc6" dependencies = [ "cfg-if", "libc", @@ -93,9 +97,9 @@ checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" [[package]] name = "hybrid-array" -version = "0.2.0-rc.4" +version = "0.2.0-rc.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18e63b66aee2df5599ba69b17a48113dfc68d2e143ea387ef836509e433bbd7e" +checksum = "53668f5da5a41d9eaf4bf7064be46d1ebe6a4e1ceed817f387587b18f2b51047" dependencies = [ "typenum", ] @@ -110,11 +114,41 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + [[package]] name = "libc" -version = "0.2.149" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] [[package]] name = "rabbit" @@ -124,6 +158,16 @@ dependencies = [ "hex-literal", ] +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + [[package]] name = "rand_core" version = "0.6.4" @@ -133,6 +177,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_core" +version = "0.9.0-alpha.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc89dffba8377c5ec847d12bb41492bda235dba31a25e8b695cd0fe6589eb8c9" +dependencies = [ + "zerocopy", +] + [[package]] name = "rc4" version = "0.2.0-pre" @@ -141,6 +194,12 @@ dependencies = [ "hex-literal", ] +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + [[package]] name = "salsa20" version = "0.11.0-pre" @@ -149,18 +208,86 @@ dependencies = [ "hex-literal", ] +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "typenum" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "zerocopy" +version = "0.8.0-alpha.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db678a6ee512bd06adf35c35be471cae2f9c82a5aed2b5d15e03628c98bddd57" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.0-alpha.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201585ea96d37ee69f2ac769925ca57160cef31acb137c16f38b02b76f4c1e62" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zeroize" version = "1.7.0" diff --git a/benches/Cargo.toml b/benches/Cargo.toml index bcb000e2..47e3700c 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -9,12 +9,14 @@ publish = false [workspace] -[dev-dependencies] -criterion = "0.3" -criterion-cycles-per-byte = "0.1" -chacha20 = { path = "../chacha20/" } +[dependencies] +criterion = "0.5" +chacha20 = { path = "../chacha20/", features = ["rng", "zeroize"] } + +[target.'cfg(any(target_arch = "x86_64", target_arch = "x86", all(target_arch = "aarch64", target_os = "linux")))'.dependencies] +criterion-cycles-per-byte = "0.6.0" [[bench]] name = "chacha20" path = "src/chacha20.rs" -harness = false +harness = false \ No newline at end of file diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 00000000..6ad8e309 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,46 @@ +# Benching ChaCha20 + +## A note from the criterion-cycles-per-byte github +``` +[`criterion-cycles-per-byte`] measures clock ticks rather than cycles. It will not provide accurate results on modern machines unless you calculate the ratio of ticks to cycles and take steps to ensure that that ratio remains consistent. +``` + +## ChaCha20 Cipher benching +You can bench the ChaCha20 cipher using `cargo bench -- apply_keystream` + +## ChaCha20Rng benching +You can bench ChaCha20Rng using `cargo bench -- fill_bytes` + +## Measuring CPB for aarch64 +`criterion-cycles-per-byte` can work on `aarch64` with Linux, but it might produce an error. This error occurred on an up-to-date Raspberry Pi 4b (as of 12/14/2023): +``` + Running src/chacha20.rs (target/release/deps/chacha20-02f555ae0af3670b) +Gnuplot not found, using plotters backend +Benchmarking stream-cipher/apply_keystream/1024: Warming up for 3.0000 serror: bench failed, to rerun pass `--bench chacha20` + +Caused by: + process didn't exit successfully: `..../benches/target/release/deps/chacha20-02f555ae0af3670b --bench` (signal: 4, SIGILL: illegal instruction) +``` + +The following adjustment can fix this. + +### Installing the cycle counter Linux Kernel Module on a Raspberry Pi 4b +``` +$ sudo apt-get update +$ sudo apt-get upgrade +$ sudo apt-get install build-essential raspberrypi-kernel-headers +# cd to your chosen directory +$ cd ../.. +$ git clone https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0.git +$ cd armv8_pmu_cycle_counter_el10 +$ make +$ sudo insmod pmu_el0_cycle_counter.ko +# Verifying that it is installed +$ lsmod | grep pmu_el0_cycle_counter +pmu_el0_cycle_counter 16384 0 +``` +Without any other commands, this module will be deactivated after every reboot, and can be reactivated using +``` +$ cd armv8_pmu_cycle_counter_el10 +$ sudo insmod pmu_el0_cycle_counter.ko +``` \ No newline at end of file diff --git a/benches/src/chacha20.rs b/benches/src/chacha20.rs index 6b772af2..a7f92f2e 100644 --- a/benches/src/chacha20.rs +++ b/benches/src/chacha20.rs @@ -1,6 +1,6 @@ //! ChaCha20 benchmark use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use criterion_cycles_per_byte::CyclesPerByte; +use benches::{criterion_group_bench, Benchmarker}; use chacha20::{ cipher::{KeyIvInit, StreamCipher}, @@ -8,8 +8,7 @@ use chacha20::{ }; const KB: usize = 1024; - -fn bench(c: &mut Criterion) { +fn bench(c: &mut Benchmarker) { let mut group = c.benchmark_group("stream-cipher"); for size in &[KB, 2 * KB, 4 * KB, 8 * KB, 16 * KB] { @@ -28,9 +27,32 @@ fn bench(c: &mut Criterion) { group.finish(); } -criterion_group!( - name = benches; - config = Criterion::default().with_measurement(CyclesPerByte); - targets = bench +use chacha20::rand_core::{RngCore, SeedableRng}; + +fn bench_chacha20rng(c: &mut Benchmarker) { + let mut group = c.benchmark_group("ChaCha20Rng"); + + for size in &[KB, 2 * KB, 4 * KB, 8 * KB, 16 * KB] { + let mut buf = vec![0u8; *size]; + + group.throughput(Throughput::Bytes(*size as u64)); + + group.bench_function(BenchmarkId::new("fill_bytes", size), |b| { + let mut rng = chacha20::ChaCha20Rng::from_seed([0u8; 32]); + b.iter(|| rng.fill_bytes(&mut buf)); + }); + } + + group.finish(); +} +criterion_group_bench!( + benches_chacha20rng, + bench_chacha20rng ); -criterion_main!(benches); + +criterion_group_bench!( + benches, + bench +); + +criterion_main!(benches, benches_chacha20rng); \ No newline at end of file diff --git a/benches/src/lib.rs b/benches/src/lib.rs index 8b137891..645d7f94 100644 --- a/benches/src/lib.rs +++ b/benches/src/lib.rs @@ -1 +1,24 @@ +use criterion::Criterion; +#[cfg(any(target_arch = "x86_64", target_arch = "x86", all(target_arch = "aarch64", target_os = "linux")))] +pub type Benchmarker = Criterion; +#[cfg(not(any(target_arch = "x86_64", target_arch = "x86", all(target_arch = "aarch64", target_os = "linux"))))] +pub type Benchmarker = Criterion; + +#[macro_export] +macro_rules! criterion_group_bench { + ($Name:ident, $Target:ident) => { + #[cfg(any(target_arch = "x86_64", target_arch = "x86", all(target_arch = "aarch64", target_os = "linux")))] + criterion_group!( + name = $Name; + config = Criterion::default().with_measurement(criterion_cycles_per_byte::CyclesPerByte); + targets = $Target + ); + #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", all(target_arch = "aarch64", target_os = "linux"))))] + criterion_group!( + name = $Name; + config = Criterion::default(); + targets = $Target + ); + } +} \ No newline at end of file diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index dffaf6db..76624cf7 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -20,7 +20,10 @@ categories = ["cryptography", "no-std"] [dependencies] cfg-if = "1" -cipher = "=0.5.0-pre.4" +cipher = { version = "=0.5.0-pre.4", optional = true} +rand_core = { version = "0.9.0-alpha.1", optional = true, default-features = false } +serde = { version = "1.0", features = ["derive"], optional = true } +zeroize = { version = "1.6.0", optional = true } [target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies] cpufeatures = "0.2" @@ -28,10 +31,16 @@ cpufeatures = "0.2" [dev-dependencies] cipher = { version = "=0.5.0-pre.4", features = ["dev"] } hex-literal = "0.4" +rand_chacha = "0.3.1" +serde_json = "1.0" # Only to test serde1 [features] +default = ["cipher"] +legacy = ["cipher"] +rng = ["rand_core"] +serde1 = ["serde"] std = ["cipher/std"] -zeroize = ["cipher/zeroize"] +xchacha = ["cipher"] [package.metadata.docs.rs] all-features = true diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 5a097425..d0f05d12 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -1,9 +1,24 @@ -use crate::{Block, StreamClosure, Unsigned, STATE_WORDS}; +use crate::Rounds; +use core::marker::PhantomData; + +#[cfg(feature = "rng")] +use crate::{ChaChaCore, Variant}; + +#[cfg(feature = "cipher")] +use crate::{ + STATE_WORDS, + chacha::Block +}; + +#[cfg(feature = "cipher")] use cipher::{ - consts::{U4, U64}, - BlockSizeUser, ParBlocks, ParBlocksSizeUser, StreamBackend, + StreamClosure, + consts::{U64, U4}, + BlockSizeUser, + ParBlocksSizeUser, + ParBlocks, + StreamBackend }; -use core::marker::PhantomData; #[cfg(target_arch = "x86")] use core::arch::x86::*; @@ -17,9 +32,10 @@ const N: usize = PAR_BLOCKS / 2; #[inline] #[target_feature(enable = "avx2")] +#[cfg(feature = "cipher")] pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where - R: Unsigned, + R: Rounds, F: StreamClosure, { let state_ptr = state.as_ptr() as *const __m128i; @@ -46,21 +62,56 @@ where state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; } -struct Backend { +#[inline] +#[target_feature(enable = "avx2")] +#[cfg(feature = "rng")] +pub(crate) unsafe fn rng_inner(core: &mut ChaChaCore, buffer: &mut [u32; 64]) +where + R: Rounds, + V: Variant +{ + let state_ptr = core.state.as_ptr() as *const __m128i; + let v = [ + _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(0))), + _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(1))), + _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), + ]; + let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); + c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + let mut ctr = [c; N]; + for i in 0..N { + ctr[i] = c; + c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + } + let mut backend = Backend:: { + v, + ctr, + _pd: PhantomData, + }; + + backend.rng_gen_par_ks_blocks(buffer); + + core.state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; +} + +struct Backend { v: [__m256i; 3], ctr: [__m256i; N], _pd: PhantomData, } -impl BlockSizeUser for Backend { +#[cfg(feature = "cipher")] +impl BlockSizeUser for Backend { type BlockSize = U64; } -impl ParBlocksSizeUser for Backend { +#[cfg(feature = "cipher")] +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } -impl StreamBackend for Backend { +#[cfg(feature = "cipher")] +impl StreamBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { @@ -101,14 +152,39 @@ impl StreamBackend for Backend { } } +#[cfg(feature = "rng")] +impl Backend { + #[inline(always)] + fn rng_gen_par_ks_blocks(&mut self, blocks: &mut [u32; 64]) { + unsafe { + let vs = rounds::(&self.v, &self.ctr); + + let pb = PAR_BLOCKS as i32; + for c in self.ctr.iter_mut() { + *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + } + + let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; + for v in vs { + let t: [__m128i; 8] = core::mem::transmute(v); + for i in 0..4 { + _mm_storeu_si128(block_ptr.add(i), t[2 * i]); + _mm_storeu_si128(block_ptr.add(4 + i), t[2 * i + 1]); + } + block_ptr = block_ptr.add(8); + } + } + } +} + #[inline] #[target_feature(enable = "avx2")] -unsafe fn rounds(v: &[__m256i; 3], c: &[__m256i; N]) -> [[__m256i; 4]; N] { +unsafe fn rounds(v: &[__m256i; 3], c: &[__m256i; N]) -> [[__m256i; 4]; N] { let mut vs: [[__m256i; 4]; N] = [[_mm256_setzero_si256(); 4]; N]; for i in 0..N { vs[i] = [v[0], v[1], v[2], c[i]]; } - for _ in 0..R::USIZE { + for _ in 0..R::COUNT { double_quarter_round(&mut vs); } diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 5c6d0aeb..a4f0be5c 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -3,45 +3,88 @@ //! Adapted from the Crypto++ `chacha_simd` implementation by Jack Lloyd and //! Jeffrey Walton (public domain). -use crate::{Block, StreamClosure, Unsigned, STATE_WORDS}; +use crate::{Rounds, STATE_WORDS}; +use core::{arch::aarch64::*, marker::PhantomData}; + +#[cfg(feature = "rand_core")] +use crate::{ChaChaCore, Variant}; + +#[cfg(feature = "cipher")] +use crate::chacha::Block; + +#[cfg(feature = "cipher")] use cipher::{ consts::{U4, U64}, - BlockSizeUser, ParBlocks, ParBlocksSizeUser, StreamBackend, + BlockSizeUser, ParBlocks, ParBlocksSizeUser, StreamBackend, StreamClosure, }; -use core::{arch::aarch64::*, marker::PhantomData}; + +struct Backend { + state: [uint32x4_t; 4], + ctrs: [uint32x4_t; 4], + _pd: PhantomData, +} + +impl Backend { + #[inline] + unsafe fn new(state: &mut [u32; STATE_WORDS]) -> Self { + let state = [ + vld1q_u32(state.as_ptr().offset(0)), + vld1q_u32(state.as_ptr().offset(4)), + vld1q_u32(state.as_ptr().offset(8)), + vld1q_u32(state.as_ptr().offset(12)), + ]; + let ctrs = [ + vld1q_u32([1, 0, 0, 0].as_ptr()), + vld1q_u32([2, 0, 0, 0].as_ptr()), + vld1q_u32([3, 0, 0, 0].as_ptr()), + vld1q_u32([4, 0, 0, 0].as_ptr()), + ]; + Backend:: { + state, + ctrs, + _pd: PhantomData, + } + } +} #[inline] +#[cfg(feature = "cipher")] #[target_feature(enable = "neon")] pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where - R: Unsigned, + R: Rounds, F: StreamClosure, { - let mut backend = Backend:: { - state: [ - vld1q_u32(state.as_ptr().offset(0)), - vld1q_u32(state.as_ptr().offset(4)), - vld1q_u32(state.as_ptr().offset(8)), - vld1q_u32(state.as_ptr().offset(12)), - ], - _pd: PhantomData, - }; + let mut backend = Backend::::new(state); f.call(&mut backend); vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); } -struct Backend { - state: [uint32x4_t; 4], - _pd: PhantomData, +#[inline] +#[cfg(feature = "rand_core")] +#[target_feature(enable = "neon")] +/// Sets up backend and blindly writes 4 blocks to dest_ptr. +#[cfg(feature = "rng")] +pub(crate) unsafe fn rng_inner(core: &mut ChaChaCore, buffer: &mut [u32; 64]) +where + R: Rounds, + V: Variant, +{ + let mut backend = Backend::::new(&mut core.state); + + backend.write_par_ks_blocks(buffer); + + vst1q_u32(core.state.as_mut_ptr().offset(12), backend.state[3]); } -impl BlockSizeUser for Backend { +#[cfg(feature = "cipher")] +impl BlockSizeUser for Backend { type BlockSize = U64; } - -impl ParBlocksSizeUser for Backend { +#[cfg(feature = "cipher")] +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } @@ -54,7 +97,15 @@ macro_rules! add64 { }; } -impl StreamBackend for Backend { +/// Evaluates to `a = a + b`, where the operands are u32x4s +macro_rules! add_assign_vec { + ($a:expr, $b:expr) => { + $a = vaddq_u32($a, $b) + }; +} + +#[cfg(feature = "cipher")] +impl StreamBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let state3 = self.state[3]; @@ -67,290 +118,187 @@ impl StreamBackend for Backend { } #[inline(always)] - fn gen_par_ks_blocks(&mut self, blocks: &mut ParBlocks) { - macro_rules! rotate_left { - ($v:ident, 8) => {{ - let maskb = [3u8, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14]; - let mask = vld1q_u8(maskb.as_ptr()); - - vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32($v), mask)) - }}; - ($v:ident, 16) => { - vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32($v))) - }; - ($v:ident, $r:literal) => { - vorrq_u32(vshlq_n_u32($v, $r), vshrq_n_u32($v, 32 - $r)) - }; - } - - macro_rules! extract { - ($v:ident, $s:literal) => { - vextq_u32($v, $v, $s) - }; - } - + fn gen_par_ks_blocks(&mut self, dest: &mut ParBlocks) { unsafe { - let ctrs = [ - vld1q_u32([1, 0, 0, 0].as_ptr()), - vld1q_u32([2, 0, 0, 0].as_ptr()), - vld1q_u32([3, 0, 0, 0].as_ptr()), - vld1q_u32([4, 0, 0, 0].as_ptr()), + let mut blocks = [ + [self.state[0], self.state[1], self.state[2], self.state[3]], + [ + self.state[0], + self.state[1], + self.state[2], + add64!(self.state[3], self.ctrs[0]), + ], + [ + self.state[0], + self.state[1], + self.state[2], + add64!(self.state[3], self.ctrs[1]), + ], + [ + self.state[0], + self.state[1], + self.state[2], + add64!(self.state[3], self.ctrs[2]), + ], ]; - let mut r0_0 = self.state[0]; - let mut r0_1 = self.state[1]; - let mut r0_2 = self.state[2]; - let mut r0_3 = self.state[3]; - - let mut r1_0 = self.state[0]; - let mut r1_1 = self.state[1]; - let mut r1_2 = self.state[2]; - let mut r1_3 = add64!(r0_3, ctrs[0]); - - let mut r2_0 = self.state[0]; - let mut r2_1 = self.state[1]; - let mut r2_2 = self.state[2]; - let mut r2_3 = add64!(r0_3, ctrs[1]); - - let mut r3_0 = self.state[0]; - let mut r3_1 = self.state[1]; - let mut r3_2 = self.state[2]; - let mut r3_3 = add64!(r0_3, ctrs[2]); - - for _ in 0..R::USIZE { - r0_0 = vaddq_u32(r0_0, r0_1); - r1_0 = vaddq_u32(r1_0, r1_1); - r2_0 = vaddq_u32(r2_0, r2_1); - r3_0 = vaddq_u32(r3_0, r3_1); - - r0_3 = veorq_u32(r0_3, r0_0); - r1_3 = veorq_u32(r1_3, r1_0); - r2_3 = veorq_u32(r2_3, r2_0); - r3_3 = veorq_u32(r3_3, r3_0); - - r0_3 = rotate_left!(r0_3, 16); - r1_3 = rotate_left!(r1_3, 16); - r2_3 = rotate_left!(r2_3, 16); - r3_3 = rotate_left!(r3_3, 16); - - r0_2 = vaddq_u32(r0_2, r0_3); - r1_2 = vaddq_u32(r1_2, r1_3); - r2_2 = vaddq_u32(r2_2, r2_3); - r3_2 = vaddq_u32(r3_2, r3_3); - - r0_1 = veorq_u32(r0_1, r0_2); - r1_1 = veorq_u32(r1_1, r1_2); - r2_1 = veorq_u32(r2_1, r2_2); - r3_1 = veorq_u32(r3_1, r3_2); - - r0_1 = rotate_left!(r0_1, 12); - r1_1 = rotate_left!(r1_1, 12); - r2_1 = rotate_left!(r2_1, 12); - r3_1 = rotate_left!(r3_1, 12); - - r0_0 = vaddq_u32(r0_0, r0_1); - r1_0 = vaddq_u32(r1_0, r1_1); - r2_0 = vaddq_u32(r2_0, r2_1); - r3_0 = vaddq_u32(r3_0, r3_1); - - r0_3 = veorq_u32(r0_3, r0_0); - r1_3 = veorq_u32(r1_3, r1_0); - r2_3 = veorq_u32(r2_3, r2_0); - r3_3 = veorq_u32(r3_3, r3_0); - - r0_3 = rotate_left!(r0_3, 8); - r1_3 = rotate_left!(r1_3, 8); - r2_3 = rotate_left!(r2_3, 8); - r3_3 = rotate_left!(r3_3, 8); - - r0_2 = vaddq_u32(r0_2, r0_3); - r1_2 = vaddq_u32(r1_2, r1_3); - r2_2 = vaddq_u32(r2_2, r2_3); - r3_2 = vaddq_u32(r3_2, r3_3); - - r0_1 = veorq_u32(r0_1, r0_2); - r1_1 = veorq_u32(r1_1, r1_2); - r2_1 = veorq_u32(r2_1, r2_2); - r3_1 = veorq_u32(r3_1, r3_2); - - r0_1 = rotate_left!(r0_1, 7); - r1_1 = rotate_left!(r1_1, 7); - r2_1 = rotate_left!(r2_1, 7); - r3_1 = rotate_left!(r3_1, 7); - - r0_1 = extract!(r0_1, 1); - r0_2 = extract!(r0_2, 2); - r0_3 = extract!(r0_3, 3); - - r1_1 = extract!(r1_1, 1); - r1_2 = extract!(r1_2, 2); - r1_3 = extract!(r1_3, 3); - - r2_1 = extract!(r2_1, 1); - r2_2 = extract!(r2_2, 2); - r2_3 = extract!(r2_3, 3); - - r3_1 = extract!(r3_1, 1); - r3_2 = extract!(r3_2, 2); - r3_3 = extract!(r3_3, 3); - - r0_0 = vaddq_u32(r0_0, r0_1); - r1_0 = vaddq_u32(r1_0, r1_1); - r2_0 = vaddq_u32(r2_0, r2_1); - r3_0 = vaddq_u32(r3_0, r3_1); - - r0_3 = veorq_u32(r0_3, r0_0); - r1_3 = veorq_u32(r1_3, r1_0); - r2_3 = veorq_u32(r2_3, r2_0); - r3_3 = veorq_u32(r3_3, r3_0); - - r0_3 = rotate_left!(r0_3, 16); - r1_3 = rotate_left!(r1_3, 16); - r2_3 = rotate_left!(r2_3, 16); - r3_3 = rotate_left!(r3_3, 16); - - r0_2 = vaddq_u32(r0_2, r0_3); - r1_2 = vaddq_u32(r1_2, r1_3); - r2_2 = vaddq_u32(r2_2, r2_3); - r3_2 = vaddq_u32(r3_2, r3_3); - - r0_1 = veorq_u32(r0_1, r0_2); - r1_1 = veorq_u32(r1_1, r1_2); - r2_1 = veorq_u32(r2_1, r2_2); - r3_1 = veorq_u32(r3_1, r3_2); - - r0_1 = rotate_left!(r0_1, 12); - r1_1 = rotate_left!(r1_1, 12); - r2_1 = rotate_left!(r2_1, 12); - r3_1 = rotate_left!(r3_1, 12); - - r0_0 = vaddq_u32(r0_0, r0_1); - r1_0 = vaddq_u32(r1_0, r1_1); - r2_0 = vaddq_u32(r2_0, r2_1); - r3_0 = vaddq_u32(r3_0, r3_1); - - r0_3 = veorq_u32(r0_3, r0_0); - r1_3 = veorq_u32(r1_3, r1_0); - r2_3 = veorq_u32(r2_3, r2_0); - r3_3 = veorq_u32(r3_3, r3_0); - - r0_3 = rotate_left!(r0_3, 8); - r1_3 = rotate_left!(r1_3, 8); - r2_3 = rotate_left!(r2_3, 8); - r3_3 = rotate_left!(r3_3, 8); - - r0_2 = vaddq_u32(r0_2, r0_3); - r1_2 = vaddq_u32(r1_2, r1_3); - r2_2 = vaddq_u32(r2_2, r2_3); - r3_2 = vaddq_u32(r3_2, r3_3); - - r0_1 = veorq_u32(r0_1, r0_2); - r1_1 = veorq_u32(r1_1, r1_2); - r2_1 = veorq_u32(r2_1, r2_2); - r3_1 = veorq_u32(r3_1, r3_2); - - r0_1 = rotate_left!(r0_1, 7); - r1_1 = rotate_left!(r1_1, 7); - r2_1 = rotate_left!(r2_1, 7); - r3_1 = rotate_left!(r3_1, 7); - - r0_1 = extract!(r0_1, 3); - r0_2 = extract!(r0_2, 2); - r0_3 = extract!(r0_3, 1); - - r1_1 = extract!(r1_1, 3); - r1_2 = extract!(r1_2, 2); - r1_3 = extract!(r1_3, 1); - - r2_1 = extract!(r2_1, 3); - r2_2 = extract!(r2_2, 2); - r2_3 = extract!(r2_3, 1); - - r3_1 = extract!(r3_1, 3); - r3_2 = extract!(r3_2, 2); - r3_3 = extract!(r3_3, 1); + for _ in 0..R::COUNT { + double_quarter_round(&mut blocks); } - r0_0 = vaddq_u32(r0_0, self.state[0]); - r0_1 = vaddq_u32(r0_1, self.state[1]); - r0_2 = vaddq_u32(r0_2, self.state[2]); - r0_3 = vaddq_u32(r0_3, self.state[3]); - - r1_0 = vaddq_u32(r1_0, self.state[0]); - r1_1 = vaddq_u32(r1_1, self.state[1]); - r1_2 = vaddq_u32(r1_2, self.state[2]); - r1_3 = vaddq_u32(r1_3, self.state[3]); - r1_3 = add64!(r1_3, ctrs[0]); - - r2_0 = vaddq_u32(r2_0, self.state[0]); - r2_1 = vaddq_u32(r2_1, self.state[1]); - r2_2 = vaddq_u32(r2_2, self.state[2]); - r2_3 = vaddq_u32(r2_3, self.state[3]); - r2_3 = add64!(r2_3, ctrs[1]); - - r3_0 = vaddq_u32(r3_0, self.state[0]); - r3_1 = vaddq_u32(r3_1, self.state[1]); - r3_2 = vaddq_u32(r3_2, self.state[2]); - r3_3 = vaddq_u32(r3_3, self.state[3]); - r3_3 = add64!(r3_3, ctrs[2]); - - vst1q_u8(blocks[0].as_mut_ptr().offset(0), vreinterpretq_u8_u32(r0_0)); - vst1q_u8( - blocks[0].as_mut_ptr().offset(16), - vreinterpretq_u8_u32(r0_1), - ); - vst1q_u8( - blocks[0].as_mut_ptr().offset(2 * 16), - vreinterpretq_u8_u32(r0_2), - ); - vst1q_u8( - blocks[0].as_mut_ptr().offset(3 * 16), - vreinterpretq_u8_u32(r0_3), - ); - - vst1q_u8(blocks[1].as_mut_ptr().offset(0), vreinterpretq_u8_u32(r1_0)); - vst1q_u8( - blocks[1].as_mut_ptr().offset(16), - vreinterpretq_u8_u32(r1_1), - ); - vst1q_u8( - blocks[1].as_mut_ptr().offset(2 * 16), - vreinterpretq_u8_u32(r1_2), - ); - vst1q_u8( - blocks[1].as_mut_ptr().offset(3 * 16), - vreinterpretq_u8_u32(r1_3), - ); - - vst1q_u8(blocks[2].as_mut_ptr().offset(0), vreinterpretq_u8_u32(r2_0)); - vst1q_u8( - blocks[2].as_mut_ptr().offset(16), - vreinterpretq_u8_u32(r2_1), - ); - vst1q_u8( - blocks[2].as_mut_ptr().offset(2 * 16), - vreinterpretq_u8_u32(r2_2), - ); - vst1q_u8( - blocks[2].as_mut_ptr().offset(3 * 16), - vreinterpretq_u8_u32(r2_3), - ); - - vst1q_u8(blocks[3].as_mut_ptr().offset(0), vreinterpretq_u8_u32(r3_0)); - vst1q_u8( - blocks[3].as_mut_ptr().offset(16), - vreinterpretq_u8_u32(r3_1), - ); - vst1q_u8( - blocks[3].as_mut_ptr().offset(2 * 16), - vreinterpretq_u8_u32(r3_2), - ); - vst1q_u8( - blocks[3].as_mut_ptr().offset(3 * 16), - vreinterpretq_u8_u32(r3_3), - ); - - self.state[3] = add64!(self.state[3], ctrs[3]); + for block in 0..4 { + // add state to block + for state_row in 0..4 { + add_assign_vec!(blocks[block][state_row], self.state[state_row]); + } + if block > 0 { + blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); + } + // write blocks to dest + for state_row in 0..4 { + vst1q_u8( + dest[block].as_mut_ptr().offset(state_row << 4), + vreinterpretq_u8_u32(blocks[block][state_row as usize]), + ); + } + } + self.state[3] = add64!(self.state[3], self.ctrs[3]); } } } + +macro_rules! rotate_left { + ($v:expr, 8) => {{ + let maskb = [3u8, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14]; + let mask = vld1q_u8(maskb.as_ptr()); + + $v = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32($v), mask)) + }}; + ($v:expr, 16) => { + $v = vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32($v))) + }; + ($v:expr, $r:literal) => { + $v = vorrq_u32(vshlq_n_u32($v, $r), vshrq_n_u32($v, 32 - $r)) + }; +} + +macro_rules! extract { + ($v:expr, $s:literal) => { + $v = vextq_u32($v, $v, $s) + }; +} + +impl Backend { + #[inline(always)] + /// Generates `num_blocks` blocks and blindly writes them to `dest_ptr` + /// + /// `num_blocks` must be greater than 0, and less than or equal to 4. + /// + /// # Safety + /// `dest_ptr` must have at least `64 * num_blocks` bytes available to be + /// overwritten, or else it could produce undefined behavior + #[cfg(feature = "rng")] + unsafe fn write_par_ks_blocks(&mut self, buffer: &mut [u32; 64]) { + let mut blocks = [ + [self.state[0], self.state[1], self.state[2], self.state[3]], + [ + self.state[0], + self.state[1], + self.state[2], + add64!(self.state[3], self.ctrs[0]), + ], + [ + self.state[0], + self.state[1], + self.state[2], + add64!(self.state[3], self.ctrs[1]), + ], + [ + self.state[0], + self.state[1], + self.state[2], + add64!(self.state[3], self.ctrs[2]), + ], + ]; + + for _ in 0..R::COUNT { + double_quarter_round(&mut blocks); + } + + let mut dest_ptr = buffer.as_mut_ptr() as *mut u8; + for block in 0..4 { + // add state to block + for state_row in 0..4 { + add_assign_vec!(blocks[block][state_row], self.state[state_row]); + } + if block > 0 { + blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); + } + // write blocks to buffer + for state_row in 0..4 { + vst1q_u8( + dest_ptr.offset(state_row << 4), + vreinterpretq_u8_u32(blocks[block][state_row as usize]), + ); + } + dest_ptr = dest_ptr.add(64); + } + self.state[3] = add64!(self.state[3], self.ctrs[3]); + } +} + +#[inline] +unsafe fn double_quarter_round(blocks: &mut [[uint32x4_t; 4]; 4]) { + add_xor_rot(blocks); + rows_to_cols(blocks); + add_xor_rot(blocks); + cols_to_rows(blocks); +} + +#[inline] +unsafe fn add_xor_rot(blocks: &mut [[uint32x4_t; 4]; 4]) { + /// Evaluates to `a = a ^ b`, where the operands are u32x4s + macro_rules! xor_assign_vec { + ($a:expr, $b:expr) => { + $a = veorq_u32($a, $b) + }; + } + for block in blocks.iter_mut() { + // this part of the code cannot be reduced much more without having + // to deal with some problems regarding `rotate_left` requiring the second + // argument to be a const, and const arrays cannot be indexed by non-consts + add_assign_vec!(block[0], block[1]); + xor_assign_vec!(block[3], block[0]); + rotate_left!(block[3], 16); + + add_assign_vec!(block[2], block[3]); + xor_assign_vec!(block[1], block[2]); + rotate_left!(block[1], 12); + + add_assign_vec!(block[0], block[1]); + xor_assign_vec!(block[3], block[0]); + rotate_left!(block[3], 8); + + add_assign_vec!(block[2], block[3]); + xor_assign_vec!(block[1], block[2]); + rotate_left!(block[1], 7); + } +} + +#[inline] +unsafe fn rows_to_cols(blocks: &mut [[uint32x4_t; 4]; 4]) { + for block in blocks.iter_mut() { + extract!(block[1], 1); + extract!(block[2], 2); + extract!(block[3], 3); + } +} + +#[inline] +unsafe fn cols_to_rows(blocks: &mut [[uint32x4_t; 4]; 4]) { + for block in blocks.iter_mut() { + extract!(block[1], 3); + extract!(block[2], 2); + extract!(block[3], 1); + } +} diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 3eabb659..7c570b99 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -1,23 +1,30 @@ //! Portable implementation which does not rely on architecture-specific //! intrinsics. -use crate::{Block, ChaChaCore, Unsigned, STATE_WORDS}; +use crate::{ChaChaCore, Rounds, Variant, STATE_WORDS}; + +#[cfg(feature = "cipher")] +use crate::chacha::Block; +#[cfg(feature = "cipher")] use cipher::{ consts::{U1, U64}, BlockSizeUser, ParBlocksSizeUser, StreamBackend, }; -pub(crate) struct Backend<'a, R: Unsigned>(pub(crate) &'a mut ChaChaCore); +pub(crate) struct Backend<'a, R: Rounds, V: Variant>(pub(crate) &'a mut ChaChaCore); -impl<'a, R: Unsigned> BlockSizeUser for Backend<'a, R> { +#[cfg(feature = "cipher")] +impl<'a, R: Rounds, V: Variant> BlockSizeUser for Backend<'a, R, V> { type BlockSize = U64; } -impl<'a, R: Unsigned> ParBlocksSizeUser for Backend<'a, R> { +#[cfg(feature = "cipher")] +impl<'a, R: Rounds, V: Variant> ParBlocksSizeUser for Backend<'a, R, V> { type ParBlocksSize = U1; } -impl<'a, R: Unsigned> StreamBackend for Backend<'a, R> { +#[cfg(feature = "cipher")] +impl<'a, R: Rounds, V: Variant> StreamBackend for Backend<'a, R, V> { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); @@ -29,11 +36,26 @@ impl<'a, R: Unsigned> StreamBackend for Backend<'a, R> { } } +#[cfg(feature = "rng")] +impl<'a, R: Rounds, V: Variant> Backend<'a, R, V> { + #[inline(always)] + pub(crate) fn gen_ks_blocks(&mut self, buffer: &mut [u32; 64]) { + for i in 0..4 { + let res = run_rounds::(&self.0.state); + self.0.state[12] = self.0.state[12].wrapping_add(1); + + for (word, val) in buffer[i << 4..(i + 1) << 4].iter_mut().zip(res.iter()) { + *word = val.to_le(); + } + } + } +} + #[inline(always)] -fn run_rounds(state: &[u32; STATE_WORDS]) -> [u32; STATE_WORDS] { +fn run_rounds(state: &[u32; STATE_WORDS]) -> [u32; STATE_WORDS] { let mut res = *state; - for _ in 0..R::USIZE { + for _ in 0..R::COUNT { // column rounds quarter_round(0, 4, 8, 12, &mut res); quarter_round(1, 5, 9, 13, &mut res); diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 82692c01..748c59c0 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -1,7 +1,17 @@ -use crate::{Block, StreamClosure, Unsigned, STATE_WORDS}; +use crate::Rounds; + +#[cfg(feature = "rng")] +use crate::{ChaChaCore, Variant}; + +#[cfg(feature = "cipher")] +use crate::{STATE_WORDS, chacha::Block}; +#[cfg(feature = "cipher")] use cipher::{ + StreamClosure, consts::{U1, U64}, - BlockSizeUser, ParBlocksSizeUser, StreamBackend, + StreamBackend, + BlockSizeUser, + ParBlocksSizeUser }; use core::marker::PhantomData; @@ -12,9 +22,10 @@ use core::arch::x86_64::*; #[inline] #[target_feature(enable = "sse2")] +#[cfg(feature = "cipher")] pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where - R: Unsigned, + R: Rounds, F: StreamClosure, { let state_ptr = state.as_ptr() as *const __m128i; @@ -33,20 +44,23 @@ where state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; } -struct Backend { +struct Backend { v: [__m128i; 4], _pd: PhantomData, } -impl BlockSizeUser for Backend { +#[cfg(feature = "cipher")] +impl BlockSizeUser for Backend { type BlockSize = U64; } -impl ParBlocksSizeUser for Backend { +#[cfg(feature = "cipher")] +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U1; } -impl StreamBackend for Backend { +#[cfg(feature = "cipher")] +impl StreamBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { @@ -63,9 +77,51 @@ impl StreamBackend for Backend { #[inline] #[target_feature(enable = "sse2")] -unsafe fn rounds(v: &[__m128i; 4]) -> [__m128i; 4] { +#[cfg(feature = "rng")] +pub(crate) unsafe fn rng_inner(core: &mut ChaChaCore, buffer: &mut [u32; 64]) +where + R: Rounds, + V: Variant +{ + let state_ptr = core.state.as_ptr() as *const __m128i; + let mut backend = Backend:: { + v: [ + _mm_loadu_si128(state_ptr.add(0)), + _mm_loadu_si128(state_ptr.add(1)), + _mm_loadu_si128(state_ptr.add(2)), + _mm_loadu_si128(state_ptr.add(3)), + ], + _pd: PhantomData, + }; + + for i in 0..4 { + backend.gen_ks_block(&mut buffer[i << 4..(i+1) << 4]); + } + + core.state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; +} + +#[cfg(feature = "rng")] +impl Backend { + #[inline(always)] + fn gen_ks_block(&mut self, block: &mut [u32]) { + unsafe { + let res = rounds::(&self.v); + self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); + + let block_ptr = block.as_mut_ptr() as *mut __m128i; + for i in 0..4 { + _mm_storeu_si128(block_ptr.add(i), res[i]); + } + } + } +} + +#[inline] +#[target_feature(enable = "sse2")] +unsafe fn rounds(v: &[__m128i; 4]) -> [__m128i; 4] { let mut res = *v; - for _ in 0..R::USIZE { + for _ in 0..R::COUNT { double_quarter_round(&mut res); } diff --git a/chacha20/src/chacha.rs b/chacha20/src/chacha.rs new file mode 100644 index 00000000..441887dd --- /dev/null +++ b/chacha20/src/chacha.rs @@ -0,0 +1,38 @@ +pub use cipher::{ + array::Array, + consts::{U12, U32, U64}, + IvSizeUser, KeyIvInit, KeySizeUser, StreamCipherCoreWrapper, +}; + +use crate::{variants::Ietf, ChaChaCore, Rounds, R12, R20, R8}; + +/// Key type used by all ChaCha variants. +pub type Key = Array; + +/// Nonce type used by ChaCha variants. +pub type Nonce = Array; + +/// ChaCha8 stream cipher (reduced-round variant of [`ChaCha20`] with 8 rounds) +pub type ChaCha8 = StreamCipherCoreWrapper>; + +/// ChaCha12 stream cipher (reduced-round variant of [`ChaCha20`] with 12 rounds) +pub type ChaCha12 = StreamCipherCoreWrapper>; + +/// ChaCha20 stream cipher (RFC 8439 version with 96-bit nonce) +pub type ChaCha20 = StreamCipherCoreWrapper>; + +pub(crate) type Block = Array; + +impl KeySizeUser for ChaChaCore { + type KeySize = U32; +} + +impl IvSizeUser for ChaChaCore { + type IvSize = U12; +} +impl KeyIvInit for ChaChaCore { + #[inline] + fn new(key: &Key, iv: &Nonce) -> Self { + ChaChaCore::::new(key.as_ref(), iv.as_ref()) + } +} diff --git a/chacha20/src/legacy.rs b/chacha20/src/legacy.rs index e0b85f47..b33b820e 100644 --- a/chacha20/src/legacy.rs +++ b/chacha20/src/legacy.rs @@ -1,18 +1,16 @@ //! Legacy version of ChaCha20 with a 64-bit nonce -use super::{ChaChaCore, Key, Nonce}; +use crate::chacha::Key; +use crate::{ChaChaCore, R20}; use cipher::{ array::Array, - consts::{U10, U32, U64, U8}, - BlockSizeUser, IvSizeUser, KeyIvInit, KeySizeUser, StreamCipherCore, StreamCipherCoreWrapper, - StreamCipherSeekCore, StreamClosure, + consts::{U32, U8}, + IvSizeUser, KeyIvInit, KeySizeUser, StreamCipherCoreWrapper, }; -#[cfg(feature = "zeroize")] -use cipher::zeroize::ZeroizeOnDrop; - /// Nonce type used by [`ChaCha20Legacy`]. pub type LegacyNonce = Array; +use crate::variants::Legacy; /// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). /// @@ -21,8 +19,8 @@ pub type LegacyNonce = Array; /// not allow encrypting of more than 256 GiB of data. pub type ChaCha20Legacy = StreamCipherCoreWrapper; -/// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). -pub struct ChaCha20LegacyCore(ChaChaCore); +/// /// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). +pub type ChaCha20LegacyCore = ChaChaCore; impl KeySizeUser for ChaCha20LegacyCore { type KeySize = U32; @@ -32,45 +30,9 @@ impl IvSizeUser for ChaCha20LegacyCore { type IvSize = U8; } -impl BlockSizeUser for ChaCha20LegacyCore { - type BlockSize = U64; -} - impl KeyIvInit for ChaCha20LegacyCore { #[inline(always)] fn new(key: &Key, iv: &LegacyNonce) -> Self { - let mut padded_iv = Nonce::default(); - padded_iv[4..].copy_from_slice(iv); - ChaCha20LegacyCore(ChaChaCore::new(key, &padded_iv)) - } -} - -impl StreamCipherCore for ChaCha20LegacyCore { - #[inline(always)] - fn remaining_blocks(&self) -> Option { - self.0.remaining_blocks() - } - - #[inline(always)] - fn process_with_backend(&mut self, f: impl StreamClosure) { - self.0.process_with_backend(f); - } -} - -impl StreamCipherSeekCore for ChaCha20LegacyCore { - type Counter = u32; - - #[inline(always)] - fn get_block_pos(&self) -> u32 { - self.0.get_block_pos() - } - - #[inline(always)] - fn set_block_pos(&mut self, pos: u32) { - self.0.set_block_pos(pos); + ChaChaCore::::new(key.as_ref(), iv.as_ref()) } } - -#[cfg(feature = "zeroize")] -#[cfg_attr(docsrs, doc(cfg(feature = "zeroize")))] -impl ZeroizeOnDrop for ChaCha20LegacyCore {} diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index 1716f60c..9b89f8e7 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -41,7 +41,8 @@ //! - ⊕ xor //! //! # Example -//! ``` +#![cfg_attr(feature = "cipher", doc = " ```")] +#![cfg_attr(not(feature = "cipher"), doc = " ```ignore")] //! use chacha20::ChaCha20; //! // Import relevant traits //! use chacha20::cipher::{KeyIvInit, StreamCipher, StreamCipherSeek}; @@ -109,26 +110,41 @@ #![allow(clippy::needless_range_loop)] #![warn(missing_docs, rust_2018_idioms, trivial_casts, unused_qualifications)] +#[cfg(feature = "cipher")] pub use cipher; +#[cfg(feature = "cipher")] +use cipher::{consts::U64, BlockSizeUser, StreamCipherCore, StreamCipherSeekCore}; use cfg_if::cfg_if; -use cipher::{ - array::{typenum::Unsigned, Array}, - consts::{U10, U12, U32, U4, U6, U64}, - BlockSizeUser, IvSizeUser, KeyIvInit, KeySizeUser, StreamCipherCore, StreamCipherCoreWrapper, - StreamCipherSeekCore, StreamClosure, -}; use core::marker::PhantomData; #[cfg(feature = "zeroize")] -use cipher::zeroize::{Zeroize, ZeroizeOnDrop}; +use zeroize::{Zeroize, ZeroizeOnDrop}; mod backends; +#[cfg(feature = "cipher")] +mod chacha; +#[cfg(feature = "legacy")] mod legacy; +#[cfg(feature = "rng")] +mod rng; +#[cfg(feature = "xchacha")] mod xchacha; -pub use legacy::{ChaCha20Legacy, ChaCha20LegacyCore, LegacyNonce}; -pub use xchacha::{hchacha, XChaCha12, XChaCha20, XChaCha8, XChaChaCore, XNonce}; +mod variants; +use variants::Variant; + +#[cfg(feature = "cipher")] +pub use chacha::{ChaCha12, ChaCha20, ChaCha8, Key, KeyIvInit}; +#[cfg(feature = "rng")] +pub use rand_core; +#[cfg(feature = "rng")] +pub use rng::{ChaCha12Core, ChaCha12Rng, ChaCha20Core, ChaCha20Rng, ChaCha8Core, ChaCha8Rng}; + +#[cfg(feature = "legacy")] +pub use legacy::{ChaCha20Legacy, LegacyNonce}; +#[cfg(feature = "xchacha")] +pub use xchacha::{hchacha, XChaCha12, XChaCha20, XChaCha8, XNonce}; /// State initialization constant ("expand 32-byte k") const CONSTANTS: [u32; 4] = [0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]; @@ -136,23 +152,35 @@ const CONSTANTS: [u32; 4] = [0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574] /// Number of 32-bit words in the ChaCha state const STATE_WORDS: usize = 16; -/// Block type used by all ChaCha variants. -type Block = Array; +/// Marker type for a number of ChaCha rounds to perform. +pub trait Rounds: Copy { + /// The amount of rounds to perform + const COUNT: usize; +} -/// Key type used by all ChaCha variants. -pub type Key = Array; +/// 8-rounds +#[derive(Copy, Clone)] +pub struct R8; -/// Nonce type used by ChaCha variants. -pub type Nonce = Array; +impl Rounds for R8 { + const COUNT: usize = 4; +} -/// ChaCha8 stream cipher (reduced-round variant of [`ChaCha20`] with 8 rounds) -pub type ChaCha8 = StreamCipherCoreWrapper>; +/// 12-rounds +#[derive(Copy, Clone)] +pub struct R12; -/// ChaCha12 stream cipher (reduced-round variant of [`ChaCha20`] with 12 rounds) -pub type ChaCha12 = StreamCipherCoreWrapper>; +impl Rounds for R12 { + const COUNT: usize = 6; +} + +/// 20-rounds +#[derive(Copy, Clone)] +pub struct R20; -/// ChaCha20 stream cipher (RFC 8439 version with 96-bit nonce) -pub type ChaCha20 = StreamCipherCoreWrapper>; +impl Rounds for R20 { + const COUNT: usize = 10; +} cfg_if! { if #[cfg(chacha20_force_soft)] { @@ -181,7 +209,8 @@ cfg_if! { } /// The ChaCha core function. -pub struct ChaChaCore { +#[cfg_attr(feature = "rng", derive(Clone))] +pub struct ChaChaCore { /// Internal state of the core function state: [u32; STATE_WORDS], /// CPU target feature tokens @@ -189,32 +218,23 @@ pub struct ChaChaCore { tokens: Tokens, /// Number of rounds to perform rounds: PhantomData, + /// the variant of the implementation + variant: PhantomData, } -impl KeySizeUser for ChaChaCore { - type KeySize = U32; -} - -impl IvSizeUser for ChaChaCore { - type IvSize = U12; -} - -impl BlockSizeUser for ChaChaCore { - type BlockSize = U64; -} - -impl KeyIvInit for ChaChaCore { - #[inline] - #[allow(clippy::let_unit_value)] - fn new(key: &Key, iv: &Nonce) -> Self { +impl ChaChaCore { + /// Constructs a ChaChaCore with the specified key, iv, and amount of rounds. + /// You must ensure that the iv is of the correct size when using this method + /// directly. + fn new(key: &[u8; 32], iv: &[u8]) -> Self { let mut state = [0u32; STATE_WORDS]; state[0..4].copy_from_slice(&CONSTANTS); let key_chunks = key.chunks_exact(4); for (val, chunk) in state[4..12].iter_mut().zip(key_chunks) { *val = u32::from_le_bytes(chunk.try_into().unwrap()); } - let iv_chunks = iv.chunks_exact(4); - for (val, chunk) in state[13..16].iter_mut().zip(iv_chunks) { + let iv_chunks = iv.as_ref().chunks_exact(4); + for (val, chunk) in state[V::NONCE_INDEX..16].iter_mut().zip(iv_chunks) { *val = u32::from_le_bytes(chunk.try_into().unwrap()); } @@ -235,23 +255,39 @@ impl KeyIvInit for ChaChaCore { let tokens = (); } } - Self { state, tokens, rounds: PhantomData, + variant: PhantomData, } } } -impl StreamCipherCore for ChaChaCore { +#[cfg(feature = "cipher")] +impl StreamCipherSeekCore for ChaChaCore { + type Counter = u32; + + #[inline(always)] + fn get_block_pos(&self) -> Self::Counter { + self.state[12] + } + + #[inline(always)] + fn set_block_pos(&mut self, pos: Self::Counter) { + self.state[12] = pos + } +} + +#[cfg(feature = "cipher")] +impl StreamCipherCore for ChaChaCore { #[inline(always)] fn remaining_blocks(&self) -> Option { let rem = u32::MAX - self.get_block_pos(); rem.try_into().ok() } - fn process_with_backend(&mut self, f: impl StreamClosure) { + fn process_with_backend(&mut self, f: impl cipher::StreamClosure) { cfg_if! { if #[cfg(chacha20_force_soft)] { f.call(&mut backends::soft::Backend(self)); @@ -291,23 +327,14 @@ impl StreamCipherCore for ChaChaCore { } } -impl StreamCipherSeekCore for ChaChaCore { - type Counter = u32; - - #[inline(always)] - fn get_block_pos(&self) -> u32 { - self.state[12] - } - - #[inline(always)] - fn set_block_pos(&mut self, pos: u32) { - self.state[12] = pos; - } +#[cfg(feature = "cipher")] +impl BlockSizeUser for ChaChaCore { + type BlockSize = U64; } #[cfg(feature = "zeroize")] #[cfg_attr(docsrs, doc(cfg(feature = "zeroize")))] -impl Drop for ChaChaCore { +impl Drop for ChaChaCore { fn drop(&mut self) { self.state.zeroize(); } @@ -315,4 +342,4 @@ impl Drop for ChaChaCore { #[cfg(feature = "zeroize")] #[cfg_attr(docsrs, doc(cfg(feature = "zeroize")))] -impl ZeroizeOnDrop for ChaChaCore {} +impl ZeroizeOnDrop for ChaChaCore {} diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs new file mode 100644 index 00000000..5f0ee44d --- /dev/null +++ b/chacha20/src/rng.rs @@ -0,0 +1,1111 @@ +// Copyright 2018 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::fmt::Debug; + +use rand_core::{ + block::{BlockRng, BlockRngCore, CryptoBlockRng}, + CryptoRng, Error, RngCore, SeedableRng, +}; + +#[cfg(feature = "serde1")] +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +#[cfg(feature = "zeroize")] +use zeroize::{Zeroize, ZeroizeOnDrop}; + +use crate::{ + backends, + variants::{Ietf, Variant}, + ChaChaCore, Rounds, R12, R20, R8, +}; + +use cfg_if::cfg_if; + +// number of 32-bit words per ChaCha block (fixed by algorithm definition) +const BLOCK_WORDS: u8 = 16; + +/// The seed for ChaCha20. Implements ZeroizeOnDrop when the +/// zeroize feature is enabled. +#[derive(PartialEq, Eq)] +#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] +pub struct Seed([u8; 32]); + +impl Default for Seed { + fn default() -> Self { + Self([0u8; 32]) + } +} +impl AsRef<[u8; 32]> for Seed { + fn as_ref(&self) -> &[u8; 32] { + &self.0 + } +} +impl AsMut<[u8]> for Seed { + fn as_mut(&mut self) -> &mut [u8] { + self.0.as_mut() + } +} + +impl From<[u8; 32]> for Seed { + #[cfg(feature = "zeroize")] + fn from(mut value: [u8; 32]) -> Self { + let input = Self(value); + value.zeroize(); + input + } + #[cfg(not(feature = "zeroize"))] + fn from(value: [u8; 32]) -> Self { + Self(value) + } +} + +#[cfg(feature = "zeroize")] +impl Drop for Seed { + fn drop(&mut self) { + self.0.zeroize(); + } +} +#[cfg(feature = "zeroize")] +impl ZeroizeOnDrop for Seed {} + +impl Debug for Seed { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt(f) + } +} + +/// A wrapper for set_word_pos() input that can be assembled from: +/// * `u64` +/// * `[u8; 5]` +pub struct WordPosInput { + block_pos: u32, + index: usize, +} + +impl From<[u8; 5]> for WordPosInput { + fn from(value: [u8; 5]) -> Self { + Self { + block_pos: u32::from_le_bytes(value[0..4].try_into().unwrap()), + index: (value[4] & 0b1111) as usize, + } + } +} + +impl From for WordPosInput { + fn from(value: u64) -> Self { + Self { + block_pos: u32::from_le_bytes((value >> 4).to_le_bytes()[0..4].try_into().unwrap()), + index: (value.to_le_bytes()[0] & 0b1111) as usize, + } + } +} + +/// A wrapper for the `stream_id`. It can be used with a: +/// * `[u32; 3]` +/// * `[u8; 12]` or +/// * a `u128` +pub struct StreamId([u32; 3]); + +impl From<[u32; 3]> for StreamId { + fn from(value: [u32; 3]) -> Self { + Self(value) + } +} + +impl From<[u8; 12]> for StreamId { + fn from(value: [u8; 12]) -> Self { + let mut result = Self([0u32; 3]); + for (n, chunk) in result.0.iter_mut().zip(value.chunks_exact(4)) { + *n = u32::from_le_bytes(chunk.try_into().unwrap()) + } + result + } +} + +impl From for StreamId { + fn from(value: u128) -> Self { + let bytes = value.to_le_bytes(); + let mut result = Self([0u32; 3]); + for (n, chunk) in result.0.iter_mut().zip(bytes[0..12].chunks_exact(4)) { + *n = u32::from_le_bytes(chunk.try_into().unwrap()); + } + result + } +} + +/// A wrapper for `block_pos`. It can be used with: +/// * u32 +/// * [u8; 4] +pub struct BlockPos(u32); + +impl From for BlockPos { + fn from(value: u32) -> Self { + Self(value.to_le()) + } +} + +impl From<[u8; 4]> for BlockPos { + fn from(value: [u8; 4]) -> Self { + Self(u32::from_le_bytes(value)) + } +} + +/// The results buffer that zeroizes on drop when the `zeroize` feature is enabled. +#[derive(Clone)] +pub struct BlockRngResults([u32; BUFFER_SIZE as usize]); + +impl AsRef<[u32]> for BlockRngResults { + fn as_ref(&self) -> &[u32] { + &self.0 + } +} + +impl AsMut<[u32]> for BlockRngResults { + fn as_mut(&mut self) -> &mut [u32] { + &mut self.0 + } +} + +impl Default for BlockRngResults { + fn default() -> Self { + Self([0u32; BUFFER_SIZE as usize]) + } +} + +#[cfg(feature = "zeroize")] +impl Drop for BlockRngResults { + fn drop(&mut self) { + self.0.zeroize(); + } +} + +const BUFFER_SIZE: usize = 64; + +// NB. this must remain consistent with some currently hard-coded numbers in this module +const BUF_BLOCKS: u8 = BUFFER_SIZE as u8 >> 4; + +impl ChaChaCore { + /// Generates 4 blocks in parallel with avx2 & neon, but merely fills + /// 4 blocks with sse2 & soft + #[cfg(feature = "rand_core")] + fn generate(&mut self, buffer: &mut [u32; 64]) { + cfg_if! { + if #[cfg(chacha20_force_soft)] { + backends::soft::Backend(self).gen_ks_blocks(buffer); + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + cfg_if! { + if #[cfg(chacha20_force_avx2)] { + unsafe { + backends::avx2::rng_inner::(self, buffer); + } + } else if #[cfg(chacha20_force_sse2)] { + unsafe { + backends::sse2::rng_inner::(self, buffer); + } + } else { + let (avx2_token, sse2_token) = self.tokens; + if avx2_token.get() { + unsafe { + backends::avx2::rng_inner::(self, buffer); + } + } else if sse2_token.get() { + unsafe { + backends::sse2::rng_inner::(self, buffer); + } + } else { + backends::soft::Backend(self).gen_ks_blocks(buffer); + } + } + } + } else if #[cfg(all(chacha20_force_neon, target_arch = "aarch64", target_feature = "neon"))] { + unsafe { + backends::neon::rng_inner::(self, buffer); + } + } else { + backends::soft::Backend(self).gen_ks_blocks(buffer); + } + } + } +} + +macro_rules! impl_chacha_rng { + ($ChaChaXRng:ident, $ChaChaXCore:ident, $rounds:ident, $abst: ident) => { + /// A cryptographically secure random number generator that uses the ChaCha algorithm. + /// + /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is + /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream + /// ciphers suitable for widespread adoption" by eSTREAM[^2]. + /// + /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing + /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD + /// implementation to support high throughput on a variety of common hardware platforms. + /// + /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm + /// should run. The number of rounds is a tradeoff between performance and security, where 8 + /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a + /// conservative choice. + /// + /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] + /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte + /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows + /// 296 unique streams of output per seed. Both counter and stream are initialized + /// to zero but may be set via the `set_word_pos` and `set_stream` methods. + /// + /// The word layout is: + /// + /// ```text + /// constant constant constant constant + /// seed seed seed seed + /// seed seed seed seed + /// counter stream_id stream_id stream_id + /// ``` + /// This implementation uses an output buffer of sixteen `u32` words, and uses + /// [`BlockRng`] to implement the [`RngCore`] methods. + /// + /// # Example for `ChaCha20Rng` + /// + /// ```rust + /// use chacha20::ChaCha20Rng; + /// // use rand_core traits + /// use rand_core::{SeedableRng, RngCore}; + /// + /// // the following inputs are examples and are neither + /// // recommended nor suggested values + /// + /// let seed = [42u8; 32]; + /// let mut rng = ChaCha20Rng::from_seed(seed); + /// rng.set_stream(100); + /// + /// // you can also use a [u8; 12] in `.set_stream()` + /// rng.set_stream([3u8; 12]); + /// // or a [u32; 3] + /// rng.set_stream([4u32; 3]); + /// + /// + /// rng.set_word_pos(5); + /// + /// // you can also use a [u8; 5] in `.set_word_pos()` + /// rng.set_word_pos([2u8; 5]); + /// + /// let x = rng.next_u32(); + /// let mut array = [0u8; 32]; + /// rng.fill_bytes(&mut array); + /// + /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" + /// // feature is enabled in Cargo.toml, and then it will zeroize on + /// // drop automatically + /// # #[cfg(feature = "zeroize")] + /// use zeroize::Zeroize; + /// ``` + /// + /// The other Rngs from this crate are initialized similarly. + /// + /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( + /// https://cr.yp.to/chacha.html) + /// + /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( + /// http://www.ecrypt.eu.org/stream/) + /// + /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( + /// https://www.rfc-editor.org/rfc/rfc8439) + #[cfg_attr(docsrs, doc(cfg(feature = "rng")))] + #[derive(Clone)] + pub struct $ChaChaXRng { + /// The ChaChaCore struct + pub core: BlockRng<$ChaChaXCore>, + } + + /// The ChaCha core random number generator + #[derive(Clone)] + pub struct $ChaChaXCore(ChaChaCore<$rounds, Ietf>); + + impl SeedableRng for $ChaChaXRng { + type Seed = [u8; 32]; + + #[inline] + fn from_seed(seed: Self::Seed) -> Self { + Self { + core: BlockRng::new($ChaChaXCore::from_seed(seed.into())), + } + } + } + + impl BlockRngCore for $ChaChaXCore { + type Item = u32; + type Results = BlockRngResults; + + #[inline] + fn generate(&mut self, r: &mut Self::Results) { + self.0.generate(&mut r.0); + } + } + + impl CryptoBlockRng for $ChaChaXCore {} + impl CryptoRng for $ChaChaXRng {} + + #[cfg(feature = "zeroize")] + #[cfg_attr(docsrs, doc(cfg(feature = "zeroize")))] + impl ZeroizeOnDrop for $ChaChaXCore {} + + #[cfg(feature = "zeroize")] + #[cfg_attr(docsrs, doc(cfg(feature = "zeroize")))] + impl ZeroizeOnDrop for $ChaChaXRng {} + + // Custom Debug implementation that does not expose the internal state + impl Debug for $ChaChaXRng { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "ChaChaXCore {{}}") + } + } + + impl SeedableRng for $ChaChaXCore { + type Seed = Seed; + + #[inline] + fn from_seed(seed: Self::Seed) -> Self { + Self(ChaChaCore::<$rounds, Ietf>::new(seed.as_ref(), &[0u8; 12])) + } + } + + impl RngCore for $ChaChaXRng { + #[inline] + fn next_u32(&mut self) -> u32 { + self.core.next_u32() + } + #[inline] + fn next_u64(&mut self) -> u64 { + self.core.next_u64() + } + #[inline] + fn fill_bytes(&mut self, dest: &mut [u8]) { + self.core.fill_bytes(dest) + } + #[inline] + fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> { + Ok(self.fill_bytes(dest)) + } + } + + impl $ChaChaXRng { + // The buffer is a 4-block window, i.e. it is always at a block-aligned position in the + // stream but if the stream has been sought it may not be self-aligned. + + /// Get the offset from the start of the stream, in 32-bit words. + /// + /// Since the generated blocks are 64 words (26) long and the + /// counter is 32-bits, the offset is a 36-bit number. Sub-word offsets are + /// not supported, hence the result can simply be multiplied by 4 to get a + /// byte-offset. + #[inline] + pub fn get_word_pos(&self) -> u64 { + let mut result = + u64::from(self.core.core.0.state[12].wrapping_sub(BUF_BLOCKS.into())) << 4; + result += self.core.index() as u64; + // eliminate bits above the 36th bit + result & 0xfffffffff + } + + /// Set the offset from the start of the stream, in 32-bit words. This method + /// takes either: + /// * u64 + /// * [u8; 5] + /// + /// As with `get_word_pos`, we use a 36-bit number. When given a `u64`, we use + /// the least significant 4 bits as the RNG's index, and the 32 bits before it + /// as the block position. + /// + /// When given a `[u8; 5]`, the word_pos is set similarly, but it is more + /// arbitrary. + #[inline] + pub fn set_word_pos>(&mut self, word_offset: W) { + let word_pos: WordPosInput = word_offset.into(); + self.core.core.0.state[12] = word_pos.block_pos; + // generate will increase block_pos by 4 + self.core.generate_and_set(word_pos.index); + } + + /// Sets the block pos and resets the RNG's index. + /// + /// The word pos will be equal to `block_pos * 16 words per block`. + /// + /// This can be used with either: + /// * u32 + /// * [u8; 4] + #[inline] + pub fn set_block_pos>(&mut self, block_pos: B) { + self.core.reset(); + self.core.core.0.state[12] = block_pos.into().0 + } + + /// Gets the block pos. + #[inline] + pub fn get_block_pos(&self) -> u32 { + self.core.core.0.state[12] + } + + /// Set the stream number. The lower 96 bits are used and the rest are + /// discarded. This method takes either: + /// * [u32; 3] + /// * [u8; 12] + /// * u128 + /// + /// This is initialized to zero; 296 unique streams of output + /// are available per seed/key. + #[inline] + pub fn set_stream>(&mut self, stream: S) { + let stream: StreamId = stream.into(); + for (n, val) in self.core.core.0.state[Ietf::NONCE_INDEX..BLOCK_WORDS as usize] + .as_mut() + .iter_mut() + .zip(stream.0.iter()) + { + *n = *val; + } + if self.core.index() != BUFFER_SIZE { + self.core.generate_and_set(self.core.index()); + } + } + + /// Get the stream number. + #[inline] + pub fn get_stream(&self) -> u128 { + let mut result = [0u8; 16]; + for (i, &big) in self.core.core.0.state[Ietf::NONCE_INDEX..BLOCK_WORDS as usize] + .iter() + .enumerate() + { + let index = i * 4; + result[index + 0] = big as u8; + result[index + 1] = (big >> 8) as u8; + result[index + 2] = (big >> 16) as u8; + result[index + 3] = (big >> 24) as u8; + } + u128::from_le_bytes(result) + } + + /// Get the seed. + #[inline] + pub fn get_seed(&self) -> [u8; 32] { + let mut result = [0u8; 32]; + for (i, &big) in self.core.core.0.state[4..12].iter().enumerate() { + let index = i * 4; + result[index + 0] = big as u8; + result[index + 1] = (big >> 8) as u8; + result[index + 2] = (big >> 16) as u8; + result[index + 3] = (big >> 24) as u8; + } + result + } + } + + impl PartialEq<$ChaChaXRng> for $ChaChaXRng { + fn eq(&self, rhs: &$ChaChaXRng) -> bool { + let a: $abst::$ChaChaXRng = self.into(); + let b: $abst::$ChaChaXRng = rhs.into(); + a == b + } + } + + impl Eq for $ChaChaXRng {} + + #[cfg(feature = "serde1")] + impl Serialize for $ChaChaXRng { + fn serialize(&self, s: S) -> Result + where + S: Serializer, + { + $abst::$ChaChaXRng::from(self).serialize(s) + } + } + #[cfg(feature = "serde1")] + impl<'de> Deserialize<'de> for $ChaChaXRng { + fn deserialize(d: D) -> Result + where + D: Deserializer<'de>, + { + $abst::$ChaChaXRng::deserialize(d).map(|x| Self::from(&x)) + } + } + + impl From<$ChaChaXCore> for $ChaChaXRng { + fn from(core: $ChaChaXCore) -> Self { + $ChaChaXRng { + core: BlockRng::new(core), + } + } + } + + mod $abst { + #[cfg(feature = "serde1")] + use serde::{Deserialize, Serialize}; + + // The abstract state of a ChaCha stream, independent of implementation choices. The + // comparison and serialization of this object is considered a semver-covered part of + // the API. + #[derive(Debug, PartialEq, Eq)] + #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] + pub(crate) struct $ChaChaXRng { + seed: crate::rng::Seed, + stream: u128, + word_pos: u64, + } + + impl From<&super::$ChaChaXRng> for $ChaChaXRng { + // Forget all information about the input except what is necessary to determine the + // outputs of any sequence of pub API calls. + fn from(r: &super::$ChaChaXRng) -> Self { + Self { + seed: r.get_seed().into(), + stream: r.get_stream(), + word_pos: r.get_word_pos(), + } + } + } + + impl From<&$ChaChaXRng> for super::$ChaChaXRng { + // Construct one of the possible concrete RNGs realizing an abstract state. + fn from(a: &$ChaChaXRng) -> Self { + use rand_core::SeedableRng; + let mut r = Self::from_seed(a.seed.0.into()); + r.set_stream(a.stream); + r.set_word_pos(a.word_pos); + r + } + } + } + }; +} + +impl_chacha_rng!(ChaCha8Rng, ChaCha8Core, R8, abst8); + +impl_chacha_rng!(ChaCha12Rng, ChaCha12Core, R12, abst12); + +impl_chacha_rng!(ChaCha20Rng, ChaCha20Core, R20, abst20); + +#[cfg(test)] +pub(crate) mod tests { + + use super::*; + + #[cfg(feature = "serde1")] + use serde_json; + + const KEY: [u8; 32] = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, + ]; + + #[test] + #[cfg(feature = "zeroize")] + fn test_zeroize_inputs_internal() { + let ptr = { + let initial_seed: Seed = KEY.clone().into(); + initial_seed.0.as_ptr() + }; + let memory_inspection = unsafe { core::slice::from_raw_parts(ptr, 32) }; + assert_ne!(&KEY, memory_inspection); + } + + #[test] + fn test_rng_output() { + let mut rng = ChaCha20Rng::from_seed(KEY); + let mut bytes = [0u8; 13]; + + rng.fill_bytes(&mut bytes); + assert_eq!( + bytes, + [177, 105, 126, 159, 198, 70, 30, 25, 131, 209, 49, 207, 105] + ); + + rng.fill_bytes(&mut bytes); + assert_eq!( + bytes, + [167, 163, 252, 19, 79, 20, 152, 128, 232, 187, 43, 93, 35] + ); + } + + #[test] + fn test_wrapping_add() { + let mut rng = ChaCha20Rng::from_seed(KEY); + rng.set_stream(1337 as u128); + // test counter wrapping-add + rng.set_word_pos((2 as u64).pow(36) - 1); + let mut output = [3u8; 128]; + rng.fill_bytes(&mut output); + + assert_ne!(output, [0u8; 128]); + + assert!(rng.get_word_pos() < 2000 && rng.get_word_pos() != 0); + } + + #[test] + fn test_set_and_get_equivalence() { + let seed = [44u8; 32]; + let mut rng = ChaCha20Rng::from_seed(seed); + + // test set_stream with [u32; 3] + rng.set_stream([313453u32, 0u32, 0u32]); + assert_eq!(rng.get_stream(), 313453); + + // test set_stream with [u8; 12] + rng.set_stream([89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(rng.get_stream(), 89); + + // test set_stream with u128 + rng.set_stream(11111111); + assert_eq!(rng.get_stream(), 11111111); + + // test set_block_pos with u32 + rng.set_block_pos(58392); + assert_eq!(rng.get_block_pos(), 58392); + // test word_pos = 16 * block_pos + assert_eq!(rng.get_word_pos(), 58392 * 16); + + // test set_block_pos with [u8; 4] + rng.set_block_pos([77, 0, 0, 0]); + assert_eq!(rng.get_block_pos(), 77); + + // test set_word_pos with u64 + rng.set_word_pos(8888); + assert_eq!(rng.get_word_pos(), 8888); + + // test set_word_pos with [u8; 5] + rng.set_word_pos([55, 0, 0, 0, 0]) + } + + #[cfg(feature = "serde1")] + use super::{ChaCha12Rng, ChaCha20Rng, ChaCha8Rng}; + + type ChaChaRng = ChaCha20Rng; + + #[cfg(feature = "serde1")] + #[test] + fn test_chacha_serde_roundtrip() { + let seed = [ + 1, 0, 52, 0, 0, 0, 0, 0, 1, 0, 10, 0, 22, 32, 0, 0, 2, 0, 55, 49, 0, 11, 0, 0, 3, 0, 0, + 0, 0, 0, 2, 92, + ]; + let mut rng1 = ChaCha20Rng::from_seed(seed); + let mut rng2 = ChaCha12Rng::from_seed(seed); + let mut rng3 = ChaCha8Rng::from_seed(seed); + + let encoded1 = serde_json::to_string(&rng1).unwrap(); + let encoded2 = serde_json::to_string(&rng2).unwrap(); + let encoded3 = serde_json::to_string(&rng3).unwrap(); + + let mut decoded1: ChaCha20Rng = serde_json::from_str(&encoded1).unwrap(); + let mut decoded2: ChaCha12Rng = serde_json::from_str(&encoded2).unwrap(); + let mut decoded3: ChaCha8Rng = serde_json::from_str(&encoded3).unwrap(); + + assert_eq!(rng1, decoded1); + assert_eq!(rng2, decoded2); + assert_eq!(rng3, decoded3); + + assert_eq!(rng1.next_u32(), decoded1.next_u32()); + assert_eq!(rng2.next_u32(), decoded2.next_u32()); + assert_eq!(rng3.next_u32(), decoded3.next_u32()); + } + + // This test validates that: + // 1. a hard-coded serialization demonstrating the format at time of initial release can still + // be deserialized to a ChaChaRng + // 2. re-serializing the resultant object produces exactly the original string + // + // Condition 2 is stronger than necessary: an equivalent serialization (e.g. with field order + // permuted, or whitespace differences) would also be admissible, but would fail this test. + // However testing for equivalence of serialized data is difficult, and there shouldn't be any + // reason we need to violate the stronger-than-needed condition, e.g. by changing the field + // definition order. + #[cfg(feature = "serde1")] + #[test] + fn test_chacha_serde_format_stability() { + let j = r#"{"seed":[4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8],"stream":27182818284,"word_pos":3141592653}"#; + let r: ChaChaRng = serde_json::from_str(&j).unwrap(); + let j1 = serde_json::to_string(&r).unwrap(); + assert_eq!(j, j1); + } + + #[test] + fn test_chacha_construction() { + let seed = [ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 0, + ]; + let mut rng1 = ChaChaRng::from_seed(seed); + assert_eq!(rng1.next_u32(), 137206642); + + let mut rng2 = ChaChaRng::from_rng(rng1).unwrap(); + assert_eq!(rng2.next_u32(), 1325750369); + } + + #[test] + fn test_chacha_true_values_a() { + // Test vectors 1 and 2 from + // https://tools.ietf.org/html/draft-nir-cfrg-chacha20-poly1305-04 + let seed = [0u8; 32]; + let mut rng = ChaChaRng::from_seed(seed); + + let mut results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng.next_u32(); + } + let expected = [ + 0xade0b876, 0x903df1a0, 0xe56a5d40, 0x28bd8653, 0xb819d2bd, 0x1aed8da0, 0xccef36a8, + 0xc70d778b, 0x7c5941da, 0x8d485751, 0x3fe02477, 0x374ad8b8, 0xf4b8436a, 0x1ca11815, + 0x69b687c3, 0x8665eeb2, + ]; + assert_eq!(results, expected); + + for i in results.iter_mut() { + *i = rng.next_u32(); + } + let expected = [ + 0xbee7079f, 0x7a385155, 0x7c97ba98, 0x0d082d73, 0xa0290fcb, 0x6965e348, 0x3e53c612, + 0xed7aee32, 0x7621b729, 0x434ee69c, 0xb03371d5, 0xd539d874, 0x281fed31, 0x45fb0a51, + 0x1f0ae1ac, 0x6f4d794b, + ]; + assert_eq!(results, expected); + } + + #[test] + fn test_chacha_true_values_b() { + // Test vector 3 from + // https://tools.ietf.org/html/draft-nir-cfrg-chacha20-poly1305-04 + let seed = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, + ]; + let mut rng = ChaChaRng::from_seed(seed); + + // Skip block 0 + for _ in 0..16 { + rng.next_u32(); + } + + let mut results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng.next_u32(); + } + let expected = [ + 0x2452eb3a, 0x9249f8ec, 0x8d829d9b, 0xddd4ceb1, 0xe8252083, 0x60818b01, 0xf38422b8, + 0x5aaa49c9, 0xbb00ca8e, 0xda3ba7b4, 0xc4b592d1, 0xfdf2732f, 0x4436274e, 0x2561b3c8, + 0xebdd4aa6, 0xa0136c00, + ]; + assert_eq!(results, expected); + } + + #[test] + fn test_chacha_true_values_c() { + // Test vector 4 from + // https://tools.ietf.org/html/draft-nir-cfrg-chacha20-poly1305-04 + let seed = [ + 0, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + ]; + let expected = [ + 0xfb4dd572, 0x4bc42ef1, 0xdf922636, 0x327f1394, 0xa78dea8f, 0x5e269039, 0xa1bebbc1, + 0xcaf09aae, 0xa25ab213, 0x48a6b46c, 0x1b9d9bcb, 0x092c5be6, 0x546ca624, 0x1bec45d5, + 0x87f47473, 0x96f0992e, + ]; + let expected_end = 3 * 16; + let mut results = [0u32; 16]; + + // Test block 2 by skipping block 0 and 1 + let mut rng1 = ChaChaRng::from_seed(seed); + for _ in 0..32 { + rng1.next_u32(); + } + for i in results.iter_mut() { + *i = rng1.next_u32(); + } + assert_eq!(results, expected); + assert_eq!(rng1.get_word_pos(), expected_end); + + // Test block 2 by using `set_word_pos` + let mut rng2 = ChaChaRng::from_seed(seed); + rng2.set_word_pos(2 * 16); + for i in results.iter_mut() { + *i = rng2.next_u32(); + } + assert_eq!(results, expected); + assert_eq!(rng2.get_word_pos(), expected_end); + + // Test block 2 by using `set_block_pos` and u32 + let mut rng3 = ChaChaRng::from_seed(seed); + rng3.set_block_pos(2); + results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng3.next_u32(); + } + assert_eq!(results, expected); + assert_eq!(rng3.get_word_pos(), expected_end); + + // Test block 2 by using `set_block_pos` and [u8; 4] + let mut rng4 = ChaChaRng::from_seed(seed); + rng4.set_block_pos([2, 0, 0, 0]); + results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng4.next_u32(); + } + assert_eq!(results, expected); + assert_eq!(rng4.get_word_pos(), expected_end); + + // Test skipping behaviour with other types + let mut buf = [0u8; 32]; + rng2.fill_bytes(&mut buf[..]); + assert_eq!(rng2.get_word_pos(), expected_end + 8); + rng2.fill_bytes(&mut buf[0..25]); + assert_eq!(rng2.get_word_pos(), expected_end + 15); + rng2.next_u64(); + assert_eq!(rng2.get_word_pos(), expected_end + 17); + rng2.next_u32(); + rng2.next_u64(); + assert_eq!(rng2.get_word_pos(), expected_end + 20); + rng2.fill_bytes(&mut buf[0..1]); + assert_eq!(rng2.get_word_pos(), expected_end + 21); + } + + #[test] + fn test_chacha_multiple_blocks() { + let seed = [ + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, + 0, 0, 0, + ]; + let mut rng = ChaChaRng::from_seed(seed); + + // Store the 17*i-th 32-bit word, + // i.e., the i-th word of the i-th 16-word block + let mut results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng.next_u32(); + for _ in 0..16 { + rng.next_u32(); + } + } + let expected = [ + 0xf225c81a, 0x6ab1be57, 0x04d42951, 0x70858036, 0x49884684, 0x64efec72, 0x4be2d186, + 0x3615b384, 0x11cfa18e, 0xd3c50049, 0x75c775f6, 0x434c6530, 0x2c5bad8f, 0x898881dc, + 0x5f1c86d9, 0xc1f8e7f4, + ]; + assert_eq!(results, expected); + } + + #[test] + fn test_chacha_true_bytes() { + let seed = [0u8; 32]; + let mut rng = ChaChaRng::from_seed(seed); + let mut results = [0u8; 32]; + rng.fill_bytes(&mut results); + let expected = [ + 118, 184, 224, 173, 160, 241, 61, 144, 64, 93, 106, 229, 83, 134, 189, 40, 189, 210, + 25, 184, 160, 141, 237, 26, 168, 54, 239, 204, 139, 119, 13, 199, + ]; + assert_eq!(results, expected); + } + + #[test] + fn test_chacha_nonce() { + use hex_literal::hex; + // Test vector 5 from + // https://www.rfc-editor.org/rfc/rfc8439#section-2.3.2 + let seed = hex!("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"); + let mut rng = ChaChaRng::from_seed(seed); + + let stream_id = hex!("000000090000004a00000000"); + rng.set_stream(stream_id); + + // The test vectors omit the first 64-bytes of the keystream + let mut discard_first_64 = [0u8; 64]; + rng.fill_bytes(&mut discard_first_64); + + let mut results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng.next_u32(); + } + let expected = [ + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, + 0x4e6cd4c3, 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, 0xd19c12b5, 0xb94e16de, + 0xe883d0cb, 0x4e3c50a2, + ]; + + assert_eq!(results, expected); + } + + #[test] + fn test_chacha_clone_streams() { + let seed = [ + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, + 0, 0, 0, + ]; + let mut rng = ChaChaRng::from_seed(seed); + let mut clone = rng.clone(); + for _ in 0..16 { + assert_eq!(rng.next_u64(), clone.next_u64()); + } + + rng.set_stream(51); + assert_eq!(rng.get_stream(), 51); + assert_eq!(clone.get_stream(), 0); + let mut fill_1 = [0u8; 7]; + rng.fill_bytes(&mut fill_1); + let mut fill_2 = [0u8; 7]; + clone.fill_bytes(&mut fill_2); + assert_ne!(fill_1, fill_2); + for _ in 0..7 { + assert!(rng.next_u64() != clone.next_u64()); + } + clone.set_stream(51); // switch part way through block + for _ in 7..16 { + assert_eq!(rng.next_u64(), clone.next_u64()); + } + } + + #[test] + fn test_chacha_word_pos_wrap_exact() { + use super::{BLOCK_WORDS, BUF_BLOCKS}; + let mut rng = ChaChaRng::from_seed(Default::default()); + // refilling the buffer in set_word_pos will wrap the block counter to 0 + let last_block = (2 as u64).pow(36) - u64::from(BUF_BLOCKS * BLOCK_WORDS); + rng.set_word_pos(last_block); + assert_eq!(rng.get_word_pos(), last_block); + } + + #[test] + fn test_chacha_word_pos_wrap_excess() { + use super::BLOCK_WORDS; + let mut rng = ChaChaRng::from_seed(Default::default()); + // refilling the buffer in set_word_pos will wrap the block counter past 0 + let last_block = (1 << 36) - u64::from(BLOCK_WORDS); + rng.set_word_pos(last_block); + assert_eq!(rng.get_word_pos(), last_block); + } + + #[test] + fn test_chacha_word_pos_zero() { + let mut rng = ChaChaRng::from_seed(Default::default()); + assert_eq!(rng.core.core.0.state[12], 0); + assert_eq!(rng.core.index(), 64); + assert_eq!(rng.get_word_pos(), 0); + rng.set_word_pos(0); + assert_eq!(rng.get_word_pos(), 0); + } + + #[test] + /// Testing the edge cases of `fill_bytes()` by brute-forcing it with dest sizes + /// that start at 1, and increase by 1 up to `N`, then they decrease from `N` + /// to 1, and this can repeat multiple times if desired. + /// + /// This test uses `rand_chacha v0.3.1` because this version's API is directly + /// based on `rand_chacha v0.3.1`, and previous versions of `chacha20` could be + /// affected by rust flags for changing the backend. Also, it doesn't seem to work + /// with `chacha20 v0.8` + /// + /// Because this test uses `rand_chacha v0.3.1` which uses a 64-bit counter, these + /// test results should be accurate up to `block_pos = 2^32 - 1`. + fn test_fill_bytes_v2() { + use rand_chacha::rand_core::{RngCore as _, SeedableRng as _}; + use rand_chacha::ChaCha20Rng as TesterRng; + + let mut rng = ChaChaRng::from_seed([0u8; 32]); + let mut tester_rng = TesterRng::from_seed([0u8; 32]); + + let num_iterations = 32; + + // If N is too large, it could cause stack overflow. + // With N = 1445, the arrays are 1044735 bytes each, or 0.9963 MiB + const N: usize = 1000; + // compute the sum from 1 to N, with increments of 1 + const LEN: usize = (N * (N + 1)) / 2; + + let mut test_array: [u8; LEN]; + let mut tester_array: [u8; LEN]; + + for _iteration in 0..num_iterations { + test_array = [0u8; LEN]; + tester_array = [0u8; LEN]; + + let mut dest_pos = 0; + // test fill_bytes with lengths starting at 1 byte, increasing by 1, + // up to N bytes + for test_len in 1..=N { + let debug_start_word_pos = rng.get_word_pos(); + let end_pos = dest_pos + test_len; + + // ensure that the current dest_pos index isn't overwritten already + assert_eq!(test_array[dest_pos], 0); + rng.fill_bytes(&mut test_array[dest_pos..end_pos]); + tester_rng.fill_bytes(&mut tester_array[dest_pos..end_pos]); + + if test_array[dest_pos..end_pos] != tester_array[dest_pos..end_pos] { + for (t, (index, expected)) in test_array[dest_pos..end_pos] + .iter() + .zip(tester_array[dest_pos..end_pos].iter().enumerate()) + { + if t != expected { + panic!( + "Failed test at start_word_pos = {},\nfailed index: {:?}\nFailing word_pos = {}", + debug_start_word_pos, + index, + debug_start_word_pos + (index / 4) as u64 + ); + } + } + } + assert_eq!(rng.next_u32(), tester_rng.next_u32()); + + dest_pos = end_pos; + } + test_array = [0u8; LEN]; + tester_array = [0u8; LEN]; + dest_pos = 0; + + // test fill_bytes with lengths starting at N bytes, decreasing by 1, + // down to 1 byte + for test_len in 1..=N { + let debug_start_word_pos = rng.get_word_pos(); + let end_pos = dest_pos + N - test_len; + + // ensure that the current dest_pos index isn't overwritten already + assert_eq!(test_array[dest_pos], 0); + rng.fill_bytes(&mut test_array[dest_pos..end_pos]); + tester_rng.fill_bytes(&mut tester_array[dest_pos..end_pos]); + + if test_array[dest_pos..end_pos] != tester_array[dest_pos..end_pos] { + for (t, (index, expected)) in test_array[dest_pos..end_pos] + .iter() + .zip(tester_array[dest_pos..end_pos].iter().enumerate()) + { + if t != expected { + panic!( + "Failed test at start_word_pos = {},\nfailed index: {:?}\nFailing word_pos = {}", + debug_start_word_pos, + index, + debug_start_word_pos + (index / 4) as u64 + ); + } + } + } + assert_eq!(rng.next_u32(), tester_rng.next_u32()); + dest_pos = end_pos; + } + } + } + + #[test] + #[allow(trivial_casts)] + fn test_trait_objects() { + use rand_core::CryptoRng; + + let mut rng1 = ChaChaRng::from_seed(Default::default()); + let rng2 = &mut rng1.clone() as &mut dyn CryptoRng; + for _ in 0..1000 { + assert_eq!(rng1.next_u64(), rng2.next_u64()); + } + } +} diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs new file mode 100644 index 00000000..58043a75 --- /dev/null +++ b/chacha20/src/variants.rs @@ -0,0 +1,25 @@ +//! Distinguishing features of ChaCha variants. +//! +//! To be revisited for the 64-bit counter. + +/// A trait that distinguishes some ChaCha variants +pub trait Variant: Clone { + /// the size of the Nonce in u32s + const NONCE_INDEX: usize; +} + +#[derive(Clone)] +/// The details pertaining to the IETF variant +pub struct Ietf(); +impl Variant for Ietf { + const NONCE_INDEX: usize = 13; +} + +#[derive(Clone)] +#[cfg(feature = "legacy")] +pub struct Legacy(); + +#[cfg(feature = "legacy")] +impl Variant for Legacy { + const NONCE_INDEX: usize = 14; +} diff --git a/chacha20/src/xchacha.rs b/chacha20/src/xchacha.rs index 74e556c6..02d5fdba 100644 --- a/chacha20/src/xchacha.rs +++ b/chacha20/src/xchacha.rs @@ -1,15 +1,19 @@ //! XChaCha is an extended nonce variant of ChaCha -use super::{ChaChaCore, Key, Nonce, CONSTANTS, STATE_WORDS}; use cipher::{ - array::{typenum::Unsigned, Array}, - consts::{U10, U16, U24, U32, U4, U6, U64}, + array::Array, + consts::{U16, U24, U32, U64}, BlockSizeUser, IvSizeUser, KeyIvInit, KeySizeUser, StreamCipherCore, StreamCipherCoreWrapper, StreamCipherSeekCore, StreamClosure, }; +use crate::{variants::Ietf, ChaChaCore, Rounds, CONSTANTS, R12, R20, R8, STATE_WORDS}; + #[cfg(feature = "zeroize")] -use cipher::zeroize::ZeroizeOnDrop; +use zeroize::ZeroizeOnDrop; + +/// Key type used by all ChaCha variants. +pub type Key = Array; /// Nonce type used by XChaCha variants. pub type XNonce = Array; @@ -29,37 +33,40 @@ pub type XNonce = Array; /// and is documented in an (expired) IETF draft: /// /// -pub type XChaCha20 = StreamCipherCoreWrapper>; +pub type XChaCha20 = StreamCipherCoreWrapper>; /// XChaCha12 stream cipher (reduced-round variant of [`XChaCha20`] with 12 rounds) -pub type XChaCha12 = StreamCipherCoreWrapper>; +pub type XChaCha12 = StreamCipherCoreWrapper>; /// XChaCha8 stream cipher (reduced-round variant of [`XChaCha20`] with 8 rounds) -pub type XChaCha8 = StreamCipherCoreWrapper>; +pub type XChaCha8 = StreamCipherCoreWrapper>; /// The XChaCha core function. -pub struct XChaChaCore(ChaChaCore); +pub struct XChaChaCore(ChaChaCore); -impl KeySizeUser for XChaChaCore { +impl KeySizeUser for XChaChaCore { type KeySize = U32; } -impl IvSizeUser for XChaChaCore { +impl IvSizeUser for XChaChaCore { type IvSize = U24; } -impl BlockSizeUser for XChaChaCore { +impl BlockSizeUser for XChaChaCore { type BlockSize = U64; } -impl KeyIvInit for XChaChaCore { +impl KeyIvInit for XChaChaCore { fn new(key: &Key, iv: &XNonce) -> Self { - let subkey = hchacha::(key, iv[..16].try_into().unwrap()); - let mut padded_iv = Nonce::default(); - padded_iv[4..].copy_from_slice(&iv[16..]); - XChaChaCore(ChaChaCore::new(&subkey, &padded_iv)) + let subkey = hchacha::(key, iv[..16].as_ref().try_into().unwrap()); + + let mut nonce = [0u8; 12]; + // first 4 bytes are 0, last 8 bytes are last 8 from the iv + // according to draft-arciszewski-xchacha-03 + nonce[4..].copy_from_slice(&iv[16..]); + Self(ChaChaCore::::new(subkey.as_ref(), &nonce)) } } -impl StreamCipherCore for XChaChaCore { +impl StreamCipherCore for XChaChaCore { #[inline(always)] fn remaining_blocks(&self) -> Option { self.0.remaining_blocks() @@ -71,7 +78,7 @@ impl StreamCipherCore for XChaChaCore { } } -impl StreamCipherSeekCore for XChaChaCore { +impl StreamCipherSeekCore for XChaChaCore { type Counter = u32; #[inline(always)] @@ -87,7 +94,7 @@ impl StreamCipherSeekCore for XChaChaCore { #[cfg(feature = "zeroize")] #[cfg_attr(docsrs, doc(cfg(feature = "zeroize")))] -impl ZeroizeOnDrop for XChaChaCore {} +impl ZeroizeOnDrop for XChaChaCore {} /// The HChaCha function: adapts the ChaCha core function in the same /// manner that HSalsa adapts the Salsa function. @@ -103,7 +110,7 @@ impl ZeroizeOnDrop for XChaChaCore {} /// For more information on HSalsa on which HChaCha is based, see: /// /// -pub fn hchacha(key: &Key, input: &Array) -> Array { +pub fn hchacha(key: &Key, input: &Array) -> Array { let mut state = [0u32; STATE_WORDS]; state[..4].copy_from_slice(&CONSTANTS); @@ -117,7 +124,7 @@ pub fn hchacha(key: &Key, input: &Array) -> Array } // R rounds consisting of R/2 column rounds and R/2 diagonal rounds - for _ in 0..R::USIZE { + for _ in 0..R::COUNT { // column rounds quarter_round(0, 4, 8, 12, &mut state); quarter_round(1, 5, 9, 13, &mut state); @@ -185,7 +192,7 @@ mod hchacha20_tests { "a0f9e4d58a74a853c12ec41326d3ecdc" ); - let actual = hchacha::(Array::from_slice(&KEY), Array::from_slice(&INPUT)); + let actual = hchacha::(Array::from_slice(&KEY), Array::from_slice(&INPUT)); assert_eq!(actual.as_slice(), &OUTPUT); } } diff --git a/chacha20/tests/mod.rs b/chacha20/tests/mod.rs index 0f672a5b..4e4aa33c 100644 --- a/chacha20/tests/mod.rs +++ b/chacha20/tests/mod.rs @@ -1,15 +1,27 @@ //! Tests for ChaCha20 (IETF and "djb" versions) as well as XChaCha20 -use chacha20::{ChaCha20, ChaCha20Legacy, XChaCha20}; +#[cfg(feature = "cipher")] +use chacha20::ChaCha20; + +#[cfg(feature = "legacy")] +use chacha20::ChaCha20Legacy; + +#[cfg(feature = "xchacha")] +use chacha20::XChaCha20; // IETF version of ChaCha20 (96-bit nonce) +#[cfg(feature = "cipher")] cipher::stream_cipher_test!(chacha20_core, "chacha20", ChaCha20); +#[cfg(feature = "cipher")] cipher::stream_cipher_seek_test!(chacha20_seek, ChaCha20); +#[cfg(feature = "xchacha")] cipher::stream_cipher_seek_test!(xchacha20_seek, XChaCha20); +#[cfg(feature = "legacy")] cipher::stream_cipher_seek_test!(chacha20legacy_seek, ChaCha20Legacy); +#[cfg(feature = "cipher")] mod chacha20test { - use chacha20::{ChaCha20, Key, Nonce}; - use cipher::{KeyIvInit, StreamCipher}; + use chacha20::{ChaCha20, KeyIvInit}; + use cipher::StreamCipher; use hex_literal::hex; // @@ -59,7 +71,7 @@ mod chacha20test { #[test] fn chacha20_keystream() { - let mut cipher = ChaCha20::new(&Key::from(KEY), &Nonce::from(IV)); + let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); // The test vectors omit the first 64-bytes of the keystream let mut prefix = [0u8; 64]; @@ -72,7 +84,7 @@ mod chacha20test { #[test] fn chacha20_encryption() { - let mut cipher = ChaCha20::new(&Key::from(KEY), &Nonce::from(IV)); + let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); let mut buf = PLAINTEXT; // The test vectors omit the first 64-bytes of the keystream @@ -85,6 +97,7 @@ mod chacha20test { } #[rustfmt::skip] +#[cfg(feature = "xchacha")] mod xchacha20 { use chacha20::{Key, XChaCha20, XNonce}; use cipher::{KeyIvInit, StreamCipher}; @@ -94,7 +107,7 @@ mod xchacha20 { // // XChaCha20 test vectors from: - // + // // const KEY: [u8; 32] = hex!(" @@ -175,11 +188,11 @@ mod xchacha20 { #[cfg(feature = "legacy")] #[rustfmt::skip] mod legacy { - use chacha20::{ChaCha20Legacy, Key, LegacyNonce}; - use cipher::{NewCipher, StreamCipher, StreamCipherSeek}; + use chacha20::{ChaCha20Legacy, LegacyNonce}; + use cipher::{StreamCipher, StreamCipherSeek, KeyIvInit}; use hex_literal::hex; - cipher::stream_cipher_test!(chacha20_legacy_core, ChaCha20Legacy, "chacha20-legacy"); + cipher::stream_cipher_test!(chacha20_legacy_core, "chacha20-legacy", ChaCha20Legacy); cipher::stream_cipher_seek_test!(chacha20_legacy_seek, ChaCha20Legacy); const KEY_LONG: [u8; 32] = hex!(" @@ -206,7 +219,7 @@ mod legacy { for middle in idx..256 { for last in middle..256 { let mut cipher = - ChaCha20Legacy::new(&Key::from(KEY_LONG), &LegacyNonce::from(IV_LONG)); + ChaCha20Legacy::new(&KEY_LONG.into(), &LegacyNonce::from(IV_LONG)); let mut buf = [0; 256]; cipher.seek(idx as u64); diff --git a/rust-toolchain.toml.save b/rust-toolchain.toml.save new file mode 100644 index 00000000..f3201139 --- /dev/null +++ b/rust-toolchain.toml.save @@ -0,0 +1,38 @@ +// Tested for N=32; could be bugs in the loop bounds for other N +// returns bytes written, like fwrite: N means no error, 0 means error in all fwrites +size_t LongNumPrint( uint8_t *num, size_t N) +{ + // caller can print a name if it wants + + const int revbufsize = 8192; // 8kiB on the stack should be fine + alignas(32) char revbuf[revbufsize]; + + if (N<32) { + // TODO: maybe use a smaller revbuf for this case to avoid touching new stack pages + ASCIIrev32B(revbuf, num); // the data we want is at the *end* of a 32-byte reverse + return fwrite(revbuf+32-N, 1, N, stdout); + } + + size_t bytes_written = 0; + const uint8_t *inp = num+N; // start with last 32 bytes of num[] + do { + size_t chunksize = (inp - num >= revbufsize) ? revbufsize : inp - num; + + const uint8_t *inp_stop = inp - chunksize + 32; // leave one full vector for the end + uint8_t *outp = revbuf; + while (inp > inp_stop) { // may run 0 times + inp -= 32; + ASCIIrev32B(outp, inp); + outp += 32; + } + // reverse first (lowest address) 32 bytes of this chunk of num + // into last 32 bytes of this chunk of revbuf + // if chunksize%32 != 0 this will overlap, which is fine. + ASCIIrev32B(revbuf + chunksize - 32, inp_stop - 32); + bytes_written += fwrite(revbuf, 1, chunksize, stdout); + inp = inp_stop - 32; + } while ( inp > num ); + + return bytes_written; + // caller can putchar('\n') if it wants +} diff --git a/salsa20/benches/mod.rs b/salsa20/benches/mod.rs old mode 100755 new mode 100644