From 4ca19492592d5c1c81e7c35803d810449618effb Mon Sep 17 00:00:00 2001 From: i1i1 Date: Tue, 10 Dec 2024 17:09:54 +0300 Subject: [PATCH] Use bincode for merged block hashes file --- bins/fetch/src/main.rs | 35 ++++++++++++++----------- crates/pevm/tests/common/mod.rs | 24 +++++++---------- data/block_hashes.bincode | Bin 0 -> 1400 bytes data/blocks/11114732/block_hashes.json | 1 - data/blocks/12047794/block_hashes.json | 1 - data/blocks/12159808/block_hashes.json | 1 - data/blocks/12459406/block_hashes.json | 1 - data/blocks/12964999/block_hashes.json | 1 - data/blocks/13217637/block_hashes.json | 1 - data/blocks/14029313/block_hashes.json | 1 - data/blocks/14334629/block_hashes.json | 1 - data/blocks/14383540/block_hashes.json | 1 - data/blocks/15199017/block_hashes.json | 1 - data/blocks/15537393/block_hashes.json | 1 - data/blocks/19444337/block_hashes.json | 1 - data/blocks/19606599/block_hashes.json | 1 - data/blocks/19807137/block_hashes.json | 1 - data/blocks/19860366/block_hashes.json | 1 - data/blocks/19923400/block_hashes.json | 1 - data/blocks/19932703/block_hashes.json | 1 - data/blocks/19933597/block_hashes.json | 1 - data/blocks/2462997/block_hashes.json | 1 - data/blocks/4864590/block_hashes.json | 1 - data/blocks/5283152/block_hashes.json | 1 - data/blocks/5526571/block_hashes.json | 1 - data/blocks/7279999/block_hashes.json | 1 - data/blocks/7280000/block_hashes.json | 1 - data/blocks/8889776/block_hashes.json | 1 - data/blocks/9069000/block_hashes.json | 1 - 29 files changed, 30 insertions(+), 55 deletions(-) create mode 100644 data/block_hashes.bincode delete mode 100644 data/blocks/11114732/block_hashes.json delete mode 100644 data/blocks/12047794/block_hashes.json delete mode 100644 data/blocks/12159808/block_hashes.json delete mode 100644 data/blocks/12459406/block_hashes.json delete mode 100644 data/blocks/12964999/block_hashes.json delete mode 100644 data/blocks/13217637/block_hashes.json delete mode 100644 data/blocks/14029313/block_hashes.json delete mode 100644 data/blocks/14334629/block_hashes.json delete mode 100644 data/blocks/14383540/block_hashes.json delete mode 100644 data/blocks/15199017/block_hashes.json delete mode 100644 data/blocks/15537393/block_hashes.json delete mode 100644 data/blocks/19444337/block_hashes.json delete mode 100644 data/blocks/19606599/block_hashes.json delete mode 100644 data/blocks/19807137/block_hashes.json delete mode 100644 data/blocks/19860366/block_hashes.json delete mode 100644 data/blocks/19923400/block_hashes.json delete mode 100644 data/blocks/19932703/block_hashes.json delete mode 100644 data/blocks/19933597/block_hashes.json delete mode 100644 data/blocks/2462997/block_hashes.json delete mode 100644 data/blocks/4864590/block_hashes.json delete mode 100644 data/blocks/5283152/block_hashes.json delete mode 100644 data/blocks/5526571/block_hashes.json delete mode 100644 data/blocks/7279999/block_hashes.json delete mode 100644 data/blocks/7280000/block_hashes.json delete mode 100644 data/blocks/8889776/block_hashes.json delete mode 100644 data/blocks/9069000/block_hashes.json diff --git a/bins/fetch/src/main.rs b/bins/fetch/src/main.rs index ddae30bb..de81abae 100644 --- a/bins/fetch/src/main.rs +++ b/bins/fetch/src/main.rs @@ -3,7 +3,7 @@ use std::{ collections::BTreeMap, error::Error, fs::{self, File}, - io::{BufReader, Write}, + io::BufReader, num::NonZeroUsize, }; @@ -72,8 +72,6 @@ async fn main() -> Result<(), Box> { serde_json::to_writer(block_file, &block) .map_err(|err| format!("Failed to write block to file: {err}"))?; - // Populate bytecodes and state from RPC storage. - let mut state = BTreeMap::::new(); // TODO: Deduplicate logic with [for_each_block_from_disk] when there is more usage let mut bytecodes: BTreeMap = match File::open("data/bytecodes.bincode.gz") { Ok(compressed_file) => { @@ -83,6 +81,9 @@ async fn main() -> Result<(), Box> { Err(_) => BTreeMap::new(), }; bytecodes.extend(storage.get_cache_bytecodes()); + + // Populate bytecodes and state from RPC storage. + let mut state = BTreeMap::::new(); for (address, mut account) in storage.get_cache_accounts() { if let Some(code) = account.code.take() { let code_hash = account @@ -95,28 +96,32 @@ async fn main() -> Result<(), Box> { } // Write compressed bytecodes to disk. - let file_bytecodes = File::create("data/bytecodes.bincode.gz") + let writer_bytecodes = File::create("data/bytecodes.bincode.gz") + .map(|f| GzEncoder::new(f, Compression::default())) .map_err(|err| format!("Failed to create compressed bytecodes file: {err}"))?; - let serialized_bytecodes = bincode::serialize(&bytecodes) - .map_err(|err| format!("Failed to serialize bytecodes to bincode: {err}"))?; - GzEncoder::new(file_bytecodes, Compression::default()) - .write_all(&serialized_bytecodes) + bincode::serialize_into(writer_bytecodes, &bytecodes) .map_err(|err| format!("Failed to write bytecodes to file: {err}"))?; // Write pre-state to disk. let file_state = File::create(format!("{block_dir}/pre_state.json")) .map_err(|err| format!("Failed to create pre-state file: {err}"))?; - let json_state = serde_json::to_value(&state) - .map_err(|err| format!("Failed to serialize pre-state to JSON: {err}"))?; - serde_json::to_writer(file_state, &json_state) + serde_json::to_writer(file_state, &state) .map_err(|err| format!("Failed to write pre-state to file: {err}"))?; - // Write block hashes to disk. - let block_hashes: BTreeMap = storage.get_cache_block_hashes().into_iter().collect(); + // TODO: Deduplicate logic with [for_each_block_from_disk] when there is more usage + let mut block_hashes = match File::open("data/block_hashes.bincode") { + Ok(compressed_file) => bincode::deserialize_from(compressed_file) + .map_err(|err| format!("Failed to deserialize bytecodes from file: {err}"))?, + Err(_) => BTreeMap::::new(), + }; + + block_hashes.extend(storage.get_cache_block_hashes()); + if !block_hashes.is_empty() { - let file = File::create(format!("{block_dir}/block_hashes.json")) + // Write compressed block hashes to disk + let file = File::create("data/block_hashes.bincode") .map_err(|err| format!("Failed to create block hashes file: {err}"))?; - serde_json::to_writer(file, &block_hashes) + bincode::serialize_into(file, &block_hashes) .map_err(|err| format!("Failed to write block hashes to file: {err}"))?; } diff --git a/crates/pevm/tests/common/mod.rs b/crates/pevm/tests/common/mod.rs index c885cd97..6266d8d5 100644 --- a/crates/pevm/tests/common/mod.rs +++ b/crates/pevm/tests/common/mod.rs @@ -10,9 +10,7 @@ use alloy_primitives::{Address, Bytes, PrimitiveSignature, TxKind, B256, U256}; use alloy_rpc_types_eth::{Block, BlockTransactions, Header}; use flate2::bufread::GzDecoder; use hashbrown::HashMap; -use pevm::{ - chain::PevmChain, BlockHashes, BuildSuffixHasher, Bytecodes, EvmAccount, InMemoryStorage, -}; +use pevm::{chain::PevmChain, BlockHashes, BuildSuffixHasher, EvmAccount, InMemoryStorage}; /// runner module pub mod runner; @@ -32,19 +30,22 @@ pub fn for_each_block_from_disk(mut handler: impl FnMut(Block, InMemoryStorage<' let data_dir = std::path::PathBuf::from("../../data"); // TODO: Deduplicate logic with [bin/fetch.rs] when there is more usage - let bytecodes: Bytecodes = bincode::deserialize_from(GzDecoder::new(BufReader::new( + let bytecodes = bincode::deserialize_from(GzDecoder::new(BufReader::new( File::open(data_dir.join("bytecodes.bincode.gz")).unwrap(), ))) .unwrap(); + let block_hashes = bincode::deserialize_from::<_, BlockHashes>(BufReader::new( + File::open(data_dir.join("block_hashes.bincode")).unwrap(), + )) + .unwrap(); + for block_path in fs::read_dir(data_dir.join("blocks")).unwrap() { let block_path = block_path.unwrap().path(); - let block_number = block_path.file_name().unwrap().to_str().unwrap(); - - let block_dir = data_dir.join("blocks").join(block_number); + let block_dir = data_dir.join("blocks").join(block_path); // Parse block - let block: Block = serde_json::from_reader(BufReader::new( + let block = serde_json::from_reader(BufReader::new( File::open(block_dir.join("block.json")).unwrap(), )) .unwrap(); @@ -55,14 +56,9 @@ pub fn for_each_block_from_disk(mut handler: impl FnMut(Block, InMemoryStorage<' ) .unwrap(); - // Parse block hashes - let block_hashes: BlockHashes = File::open(block_dir.join("block_hashes.json")) - .map(|file| serde_json::from_reader::<_, BlockHashes>(BufReader::new(file)).unwrap()) - .unwrap_or_default(); - handler( block, - InMemoryStorage::new(accounts, Some(&bytecodes), block_hashes), + InMemoryStorage::new(accounts, Some(&bytecodes), block_hashes.clone()), ); } } diff --git a/data/block_hashes.bincode b/data/block_hashes.bincode new file mode 100644 index 0000000000000000000000000000000000000000..bc03e4d22c44f0d88be7309c5ff7c45b398f2784 GIT binary patch literal 1400 zcmZ9`c{tk#7zc1hQIs}~Hja$MnZ@-;9aYp2v3Qh{IaiN`qQa{d%tPwvRyTXr?Vq3bulIeP&-eShP`>{L{8IVLqVWGsX=TvME-ro$ z!eR{`x(1Kw>d+dNC zoAr9qI5gU}$PVB~P14=cLWYKJ;BRteOi~|f=J;3eqiw2S^W?BOQiAF?I2^!7Up1m< zFZzjG&o$RD@g7A6M;0CYzU^7H=k97j0?Fz}O9cWve5F487eWh2$jwaXQ`n>K{SDhy zM$J~`v-Zev>OfLH>5|`go6=6PE0Ls*8OIVHXlogSI?}%j7Ev~tztGv=8f=KU#y#eJk!NLET18qrM^;@XzOUK_$vk=CL!R7s?siCzt0J z2U=jE7$(|TPd}Zu-EhgX1$*$njX$=sZ$a1f&`{;xCpfb8|!&+W7z_WxJ-F2<>+=M^CgDs}2 z7o_{ER-y$)h_SR;FFAwO`R3_1fM0{MWUc8~G+T{<{|hc@%=2X#U-6sc=f#zE5$crk z@27SEJbOmFt64q;mdwNTX$xE@z2FdR?zNGPDQaMODQ+Q;6$Fe;8Y8 zHD--UAiz5r7)oxHmJ3+__JjD%q$xJQry~jUDO2rGr`VXyu9e9hlxQ<7;#T03!>I$v zPj>@0yc&mq{z#pZR#~1>gs5!#C=gZlGVH`hIRxZJk!I%b{>&)l1I++@S`-Q$6Xelb z+ve|v!(lWPRjoiJwPamW?Xxx|`19lP&Ut`eOpfU0SI#OT9uCqd#Sr2~e`u*$x6a>> zym{!SUQ(hh&lUi_;gv6L;lrRgWeZEdaO^ahkP>Ea_1L6eFV`z2@j_5-YXji-b!T*t z9C+5MSCEs;PbqHU57C3SUxue8J>1{CtN%!d|7N$MzK_x#>2uWGUfF*1C1 zI_^