Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make chunk size user defined #390

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ use std::time::Duration;
// https://bheisler.github.io/criterion.rs/book/analysis.html#measurement
const SAMPLE_SIZE: usize = 20;

/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB.
const MAX_CHUNK_SIZE: usize = 1024 * 1024;
/// The minimum size (before compression) of an individual chunk of a file, defined as 1B.
const MIN_CHUNK_SIZE: usize = 1;

fn custom_criterion() -> Criterion {
Criterion::default()
.measurement_time(Duration::from_secs(40))
Expand All @@ -63,7 +68,8 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) {
|| random_bytes(bytes_len),
// actual benchmark
|bytes| {
let (_data_map, _encrypted_chunks) = encrypt(bytes).unwrap();
let (_data_map, _encrypted_chunks) =
encrypt(bytes, MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap();
},
BatchSize::SmallInput,
);
Expand All @@ -72,7 +78,7 @@ fn write(b: &mut Bencher<'_>, bytes_len: usize) {
fn read(b: &mut Bencher, bytes_len: usize) {
b.iter_batched(
// the setup
|| encrypt(random_bytes(bytes_len)).unwrap(),
|| encrypt(random_bytes(bytes_len), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap(),
// actual benchmark
|(data_map, encrypted_chunks)| {
let _raw_data = decrypt_full_set(&data_map, &encrypted_chunks).unwrap();
Expand Down
8 changes: 7 additions & 1 deletion examples/basic_encryptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ fn file_name(name: XorName) -> String {
string
}

/// The maximum size (before compression) of an individual chunk of a file, defined as 1024kiB.
const MAX_CHUNK_SIZE: usize = 1024 * 1024;
/// The minimum size (before compression) of an individual chunk of a file, defined as 1B.
const MIN_CHUNK_SIZE: usize = 1;

#[derive(Clone)]
struct DiskBasedStorage {
pub(crate) storage_path: String,
Expand Down Expand Up @@ -147,7 +152,8 @@ async fn main() {
Err(error) => return println!("{}", error),
}

let (data_map, encrypted_chunks) = encrypt(Bytes::from(data)).unwrap();
let (data_map, encrypted_chunks) =
encrypt(Bytes::from(data), MIN_CHUNK_SIZE, MAX_CHUNK_SIZE).unwrap();

let result = encrypted_chunks
.par_iter()
Expand Down
12 changes: 6 additions & 6 deletions src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ pub struct RawChunk {

/// Hash all the chunks.
/// Creates [num cores] batches.
pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec<EncryptionBatch>) {
pub(crate) fn batch_chunks(bytes: Bytes, max_chunk_size: usize) -> (usize, Vec<EncryptionBatch>) {
let data_size = bytes.len();
let num_chunks = get_num_chunks(data_size);
let num_chunks = get_num_chunks(data_size, max_chunk_size);

let raw_chunks: Vec<_> = (0..num_chunks)
.map(|index| (index, bytes.clone()))
.par_bridge()
.map(|(index, bytes)| {
let (start, end) = get_start_end_positions(data_size, index);
let (start, end) = get_start_end_positions(data_size, index, max_chunk_size);
let data = bytes.slice(start..end);
let hash = XorName::from_content(data.as_ref());
RawChunk { index, data, hash }
Expand All @@ -63,10 +63,10 @@ pub(crate) fn batch_chunks(bytes: Bytes) -> (usize, Vec<EncryptionBatch>) {
}

/// Calculate (start_position, end_position) for each chunk for the input file size
pub(crate) fn batch_positions(data_size: usize) -> Vec<(usize, usize)> {
let num_chunks = get_num_chunks(data_size);
pub(crate) fn batch_positions(data_size: usize, max_chunk_size: usize) -> Vec<(usize, usize)> {
let num_chunks = get_num_chunks(data_size, max_chunk_size);

(0..num_chunks)
.map(|index| get_start_end_positions(data_size, index))
.map(|index| get_start_end_positions(data_size, index, max_chunk_size))
.collect()
}
29 changes: 20 additions & 9 deletions src/data_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ use xor_name::XorName;

/// Holds the information that is required to recover the content of the encrypted file.
/// This is held as a vector of `ChunkInfo`, i.e. a list of the file's chunk hashes.
/// Only files larger than 3072 bytes (3 * MIN_CHUNK_SIZE) can be self-encrypted.
/// Only files larger than 3072 bytes (3 * chunk size) can be self-encrypted.
/// Smaller files will have to be batched together.
#[derive(Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DataMap(Vec<ChunkInfo>);
pub struct DataMap {
/// max chunk size used during encryption
max_chunk_size: usize,
chunks: Vec<ChunkInfo>,
}

#[allow(clippy::len_without_is_empty)]
impl DataMap {
Expand All @@ -25,19 +29,26 @@ impl DataMap {
/// Sorts on instantiation.
/// The algorithm requires this to be a sorted list to allow get_pad_iv_key to obtain the
/// correct pre-encryption hashes for decryption/encryption.
pub fn new(mut keys: Vec<ChunkInfo>) -> Self {
pub fn new(max_chunk_size: usize, mut keys: Vec<ChunkInfo>) -> Self {
keys.sort_by(|a, b| a.index.cmp(&b.index));
Self(keys)
Self {
max_chunk_size,
chunks: keys,
}
}

/// Original (pre-encryption) size of the file.
pub fn file_size(&self) -> usize {
DataMap::total_size(&self.0)
DataMap::total_size(&self.chunks)
}

/// Returns the maximum chunk size used during encryption.
pub fn max_chunk_size(&self) -> usize {
self.max_chunk_size
}
/// Returns the list of chunks pre and post encryption hashes if present.
pub fn infos(&self) -> Vec<ChunkInfo> {
self.0.to_vec()
self.chunks.to_vec()
}

/// Iterates through the keys to figure out the total size of the data, i.e. the file size.
Expand All @@ -48,9 +59,9 @@ impl DataMap {

impl Debug for DataMap {
fn fmt(&self, formatter: &mut Formatter) -> Result<(), std::fmt::Error> {
writeln!(formatter, "DataMap:")?;
let len = self.0.len();
for (index, chunk) in self.0.iter().enumerate() {
writeln!(formatter, "DataMap max_chunk {:?}:", self.max_chunk_size)?;
let len = self.chunks.len();
for (index, chunk) in self.chunks.iter().enumerate() {
if index + 1 == len {
write!(formatter, " {:?}", chunk)?
} else {
Expand Down
7 changes: 5 additions & 2 deletions src/encrypt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ use std::sync::Arc;
use xor_name::XorName;

/// Encrypt the chunks
pub(crate) fn encrypt(batches: Vec<EncryptionBatch>) -> (DataMap, Vec<EncryptedChunk>) {
pub(crate) fn encrypt(
max_chunk_size: usize,
batches: Vec<EncryptionBatch>,
) -> (DataMap, Vec<EncryptedChunk>) {
let src_hashes = Arc::new(
batches
.iter()
Expand Down Expand Up @@ -84,7 +87,7 @@ pub(crate) fn encrypt(batches: Vec<EncryptionBatch>) -> (DataMap, Vec<EncryptedC
},
);

(DataMap::new(keys), chunks)
(DataMap::new(max_chunk_size, keys), chunks)
}

/// Encrypt the chunk
Expand Down
Loading
Loading