Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add OnDiskStorage #246

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
287 changes: 287 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ name = "pevm"
version = "0.1.0"
edition = "2021"

[workspace.package]
version = "1.0.1"
edition = "2021"
rust-version = "1.79"

[dependencies]
# Put this behind a feature flag if there are use cases & users
# that need more security guarantees even for internally hashing
Expand Down Expand Up @@ -33,8 +38,13 @@ alloy-transport-http = "0.2.1"
reqwest = "0.12.5"
tokio = { version = "1.39.2", features = ["rt-multi-thread"] }

[dev-dependencies]
# On-Disk Storage dependencies
# TODO: Mark these deps as optional under a feature.
# https://doc.rust-lang.org/cargo/reference/features.html#optional-dependencies
reth-libmdbx = { path = "crates/libmdbx-rs" }
bincode = "1.3.3"

[dev-dependencies]
criterion = "0.5.1"
rand = "0.8.5"
rayon = "1.10.0"
Expand Down
60 changes: 52 additions & 8 deletions benches/mainnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@

#![allow(missing_docs)]

use std::{num::NonZeroUsize, thread};
use std::{
num::NonZeroUsize,
thread,
time::{Duration, Instant},
};

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use pevm::chain::PevmEthereum;
use pevm::{chain::PevmEthereum, OnDiskStorage};

// Better project structure
#[path = "../tests/common/mod.rs"]
Expand All @@ -23,37 +27,77 @@ pub fn criterion_benchmark(c: &mut Criterion) {
// 8 seems to be the sweet max for Ethereum blocks. Any more
// will yield many overheads and hurt execution on (small) blocks
// with many dependencies.
.min(NonZeroUsize::new(8).unwrap());
.min(NonZeroUsize::new(12).unwrap());

common::for_each_block_from_disk(|block, storage| {
common::for_each_block_from_disk(|block, in_memory_storage, mdbx_dir| {
let mut group = c.benchmark_group(format!(
"Block {}({} txs, {} gas)",
block.header.number.unwrap(),
block.transactions.len(),
block.header.gas_used
));
group.bench_function("Sequential", |b| {
group.bench_function("Sequential/In Memory", |b| {
b.iter(|| {
pevm::execute(
black_box(&storage),
black_box(&in_memory_storage),
black_box(&chain),
black_box(block.clone()),
black_box(concurrency_level),
black_box(true),
)
})
});
group.bench_function("Parallel", |b| {
group.bench_function("Parallel/In Memory", |b| {
b.iter(|| {
pevm::execute(
black_box(&storage),
black_box(&in_memory_storage),
black_box(&chain),
black_box(block.clone()),
black_box(concurrency_level),
black_box(false),
)
})
});
group.bench_function("Sequential/On Disk", |b| {
b.iter_custom(|iters| {
let mut total_duration = Duration::ZERO;
for _i in 0..iters {
let on_disk_storage = OnDiskStorage::open(mdbx_dir).unwrap();
let start = Instant::now();
pevm::execute(
black_box(&on_disk_storage),
black_box(&chain),
black_box(block.clone()),
black_box(concurrency_level),
black_box(true),
)
.unwrap();
total_duration += start.elapsed();
}
total_duration
})
});

group.bench_function("Parallel/On Disk", |b| {
b.iter_custom(|iters| {
let mut total_duration = Duration::ZERO;
for _i in 0..iters {
let on_disk_storage = OnDiskStorage::open(mdbx_dir).unwrap();
let start = Instant::now();
pevm::execute(
black_box(&on_disk_storage),
black_box(&chain),
black_box(block.clone()),
black_box(concurrency_level),
black_box(false),
)
.unwrap();
total_duration += start.elapsed();
}
total_duration
})
});

group.finish();
});
}
Expand Down
26 changes: 5 additions & 21 deletions benches/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,13 @@ def read_estimate(block, exec_type):
return (estimates["slope"] or estimates["mean"])["point_estimate"]


total_sequential = 0
total_parallel = 0
max_speed_up = 0
min_speed_up = float("inf")

for path in os.listdir(CRITERION_PATH):
if path.startswith("Block"):
estimate_sequential = read_estimate(path, "Sequential")
total_sequential += estimate_sequential

estimate_parallel = read_estimate(path, "Parallel")
total_parallel += estimate_parallel

speed_up = round(estimate_sequential / estimate_parallel, 2)
max_speed_up = max(max_speed_up, speed_up)
min_speed_up = min(min_speed_up, speed_up)
seq_ims = read_estimate(path, "Sequential_In Memory")
par_ims = read_estimate(path, "Parallel_In Memory")
seq_ods = read_estimate(path, "Sequential_On Disk")
par_ods = read_estimate(path, "Parallel_On Disk")

print(f"{path}")
print(
f"{format_ms(estimate_sequential)} {format_ms(estimate_parallel)} {speed_up}\n"
f"{path: <40}\t:{format_ms(seq_ims)}\t{format_ms(par_ims)}\t{format_ms(seq_ods)}\t{format_ms(par_ods)}"
)


print(f"Average: x{round(total_sequential / total_parallel, 2)}")
print(f"Max: x{max_speed_up}")
print(f"Min: x{min_speed_up}")
41 changes: 41 additions & 0 deletions crates/libmdbx-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[package]
name = "reth-libmdbx"
description = "Idiomatic and safe MDBX wrapper"
version.workspace = true
edition.workspace = true
rust-version.workspace = true

[dependencies]
reth-mdbx-sys = { path = "./mdbx-sys" }

bitflags = "2"
byteorder = "1"
derive_more = "0.99"
indexmap = "2"
thiserror = "1"
dashmap = { version = "6.0.1", features = ["inline"], optional = true }
tracing = "0.1.0"

[features]
default = []
return-borrowed = []
read-tx-timeouts = ["dashmap", "dashmap/inline"]

[dev-dependencies]
pprof = { version = "0.13", features = [
"flamegraph",
"frame-pointer",
"criterion",
] }
criterion = "0.5.1"
rand = "0.8"
rand_xorshift = "0.3"
tempfile = "3"

[[bench]]
name = "cursor"
harness = false

[[bench]]
name = "transaction"
harness = false
25 changes: 25 additions & 0 deletions crates/libmdbx-rs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# libmdbx-rs


Rust bindings for [libmdbx](https://libmdbx.dqdkfa.ru).

Forked from an earlier Apache licenced version of the `libmdbx-rs` crate, before it changed licence to GPL.
NOTE: Most of the repo came from [lmdb-rs bindings](https://github.com/mozilla/lmdb-rs).

## Updating the libmdbx Version

To update the libmdbx version you must clone it and copy the `dist/` folder in `mdbx-sys/`.
Make sure to follow the [building steps](https://libmdbx.dqdkfa.ru/usage.html#getting).

```bash
# clone libmmdbx to a repository outside at specific tag
git clone https://gitflic.ru/project/erthink/libmdbx.git ../libmdbx --branch v0.7.0
make -C ../libmdbx dist

# copy the `libmdbx/dist/` folder just created into `mdbx-sys/libmdbx`
rm -rf mdbx-sys/libmdbx
cp -R ../libmdbx/dist mdbx-sys/libmdbx

# add the changes to the next commit you will make
git add mdbx-sys/libmdbx
```
112 changes: 112 additions & 0 deletions crates/libmdbx-rs/benches/cursor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#![allow(missing_docs)]
mod utils;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use pprof::criterion::{Output, PProfProfiler};
use reth_libmdbx::{ffi::*, *};
use std::ptr;
use utils::*;

/// Benchmark of iterator sequential read performance.
fn bench_get_seq_iter(c: &mut Criterion) {
let n = 100;
let (_dir, env) = setup_bench_db(n);
let txn = env.begin_ro_txn().unwrap();
let db = txn.open_db(None).unwrap();

c.bench_function("bench_get_seq_iter", |b| {
b.iter(|| {
let mut cursor = txn.cursor(&db).unwrap();
let mut i = 0;
let mut count = 0u32;

for (key_len, data_len) in
cursor.iter::<ObjectLength, ObjectLength>().map(Result::unwrap)
{
i = i + *key_len + *data_len;
count += 1;
}
for (key_len, data_len) in
cursor.iter::<ObjectLength, ObjectLength>().filter_map(Result::ok)
{
i = i + *key_len + *data_len;
count += 1;
}

fn iterate<K: TransactionKind>(cursor: &mut Cursor<K>) -> Result<()> {
let mut i = 0;
for result in cursor.iter::<ObjectLength, ObjectLength>() {
let (key_len, data_len) = result?;
i = i + *key_len + *data_len;
}
Ok(())
}
iterate(&mut cursor).unwrap();

black_box(i);
assert_eq!(count, n);
})
});
}

/// Benchmark of cursor sequential read performance.
fn bench_get_seq_cursor(c: &mut Criterion) {
let n = 100;
let (_dir, env) = setup_bench_db(n);
let txn = env.begin_ro_txn().unwrap();
let db = txn.open_db(None).unwrap();

c.bench_function("bench_get_seq_cursor", |b| {
b.iter(|| {
let (i, count) = txn
.cursor(&db)
.unwrap()
.iter::<ObjectLength, ObjectLength>()
.map(Result::unwrap)
.fold((0, 0), |(i, count), (key, val)| (i + *key + *val, count + 1));

black_box(i);
assert_eq!(count, n);
})
});
}

/// Benchmark of raw MDBX sequential read performance (control).
fn bench_get_seq_raw(c: &mut Criterion) {
let n = 100;
let (_dir, env) = setup_bench_db(n);

let dbi = env.begin_ro_txn().unwrap().open_db(None).unwrap().dbi();
let txn = env.begin_ro_txn().unwrap();

let mut key = MDBX_val { iov_len: 0, iov_base: ptr::null_mut() };
let mut data = MDBX_val { iov_len: 0, iov_base: ptr::null_mut() };
let mut cursor: *mut MDBX_cursor = ptr::null_mut();

c.bench_function("bench_get_seq_raw", |b| {
b.iter(|| unsafe {
txn.txn_execute(|txn| {
mdbx_cursor_open(txn, dbi, &mut cursor);
let mut i = 0;
let mut count = 0u32;

while mdbx_cursor_get(cursor, &mut key, &mut data, MDBX_NEXT) == 0 {
i += key.iov_len + data.iov_len;
count += 1;
}

black_box(i);
assert_eq!(count, n);
mdbx_cursor_close(cursor);
})
.unwrap();
})
});
}

criterion_group! {
name = benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
targets = bench_get_seq_iter, bench_get_seq_cursor, bench_get_seq_raw
}
criterion_main!(benches);
Loading
Loading