diff --git a/.github/codespell/words.txt b/.github/codespell/words.txt index 3ebd95862..7f5a67ef9 100644 --- a/.github/codespell/words.txt +++ b/.github/codespell/words.txt @@ -3,3 +3,4 @@ manuel numer ser shs +widder diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 2632f2e49..51487ac90 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -1,4 +1,4 @@ -name: Codespell +name: Spelling on: pull_request: push: @@ -15,8 +15,4 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: codespell-project/actions-codespell@v2 - with: - skip: './code/target' - ignore_words_file: .github/codespell/words.txt - + - uses: crate-ci/typos@master diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index be853e20f..b83fe289a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -27,6 +27,8 @@ jobs: uses: actions/checkout@v4 - name: Install Protoc uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Node uses: actions/setup-node@v3 with: @@ -47,7 +49,7 @@ jobs: cargo llvm-cov nextest \ --workspace \ --exclude malachite-itf \ - --ignore-filename-regex node/bin \ + --ignore-filename-regex crates/cli \ --all-features \ --ignore-run-fail \ --lcov \ @@ -75,6 +77,8 @@ jobs: uses: actions/checkout@v4 - name: Install Protoc uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/mbt.yml b/.github/workflows/mbt.yml index 9af99db13..dfc6fc14b 100644 --- a/.github/workflows/mbt.yml +++ b/.github/workflows/mbt.yml @@ -29,6 +29,8 @@ jobs: uses: actions/checkout@v4 - name: Install Protoc uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Node uses: actions/setup-node@v3 with: @@ -40,10 +42,10 @@ jobs: - name: Install cargo-nextest uses: taiki-e/install-action@cargo-nextest - name: Build code - working-directory: code/itf + working-directory: code/crates/itf run: cargo nextest run --workspace --all-features --no-run - name: Current time as random seed for Quint run: echo "QUINT_SEED=$(date +%s)" >> $GITHUB_ENV - name: Run tests from traces (with random seed) - working-directory: code/itf + working-directory: code/crates/itf run: cargo nextest run -p malachite-itf --all-features diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 0c5d66700..28555e602 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -33,6 +33,8 @@ jobs: uses: actions/checkout@v4 - name: Install Protoc uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Node uses: actions/setup-node@v3 with: @@ -56,6 +58,8 @@ jobs: uses: actions/checkout@v4 - name: Install Protoc uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Rust toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: @@ -94,4 +98,4 @@ jobs: - name: Install cargo-msrv run: cargo binstall --no-confirm --force cargo-msrv@0.16.0-beta.20 - name: Check MSRV - run: cargo msrv verify --output-format minimal --manifest-path code/driver/Cargo.toml -- 'cargo check --all-features' + run: cargo msrv verify --output-format minimal --manifest-path code/crates/driver/Cargo.toml -- 'cargo check --all-features' diff --git a/code/.dockerignore b/code/.dockerignore new file mode 100644 index 000000000..39a8ce060 --- /dev/null +++ b/code/.dockerignore @@ -0,0 +1,2 @@ +target +scripts diff --git a/code/.gitignore b/code/.gitignore new file mode 100644 index 000000000..8d206c8fa --- /dev/null +++ b/code/.gitignore @@ -0,0 +1 @@ +/x diff --git a/code/Cargo.toml b/code/Cargo.toml index 29243d39b..bd795cfdf 100644 --- a/code/Cargo.toml +++ b/code/Cargo.toml @@ -2,18 +2,7 @@ resolver = "2" members = [ - "actors", - "cli", - "common", - "driver", - "gossip", - "itf", - "network", - "node", - "proto", - "round", - "test", - "vote", + "crates/*" ] [workspace.package] @@ -24,35 +13,51 @@ license = "Apache-2.0" publish = false rust-version = "1.77" +[profile.release] +overflow-checks = true + +[profile.profiling] +inherits = "release" +debug = true + [workspace.lints.rust] -# unused_crate_dependencies = "warn" +unused_crate_dependencies = "warn" [workspace.dependencies] -malachite-actors = { version = "0.1.0", path = "actors" } -malachite-cli = { version = "0.1.0", path = "cli" } -malachite-common = { version = "0.1.0", path = "common" } -malachite-driver = { version = "0.1.0", path = "driver" } -malachite-gossip = { version = "0.1.0", path = "gossip" } -malachite-network = { version = "0.1.0", path = "network" } -malachite-itf = { version = "0.1.0", path = "itf" } -malachite-node = { version = "0.1.0", path = "node" } -malachite-proto = { version = "0.1.0", path = "proto" } -malachite-round = { version = "0.1.0", path = "round" } -malachite-test = { version = "0.1.0", path = "test" } -malachite-vote = { version = "0.1.0", path = "vote" } +malachite-actors = { version = "0.1.0", path = "crates/actors" } +malachite-cli = { version = "0.1.0", path = "crates/cli" } +malachite-common = { version = "0.1.0", path = "crates/common" } +malachite-driver = { version = "0.1.0", path = "crates/driver" } +malachite-gossip-consensus = { version = "0.1.0", path = "crates/gossip-consensus" } +malachite-gossip-mempool = { version = "0.1.0", path = "crates/gossip-mempool" } +malachite-itf = { version = "0.1.0", path = "crates/itf" } +malachite-metrics = { version = "0.1.0", path = "crates/metrics" } +malachite-node = { version = "0.1.0", path = "crates/node" } +malachite-proto = { version = "0.1.0", path = "crates/proto" } +malachite-round = { version = "0.1.0", path = "crates/round" } +malachite-test = { version = "0.1.0", path = "crates/test" } +malachite-test-app = { version = "0.1.0", path = "crates/test-app" } +malachite-vote = { version = "0.1.0", path = "crates/vote" } async-trait = "0.1.77" -clap = { version = "4.5.4", features = ["derive"] } +axum = "0.7" +base64 = "0.22.0" +bytesize = "1.3" +clap = "4.5.4" color-eyre = "0.6" +config = { version = "0.14", features = ["toml"], default-features = false } +eyre = "0.6" derive-where = "1.2.7" +directories = "5.0.1" ed25519-consensus = "2.1.0" futures = "0.3" glob = "0.3.0" +hex = { version = "0.4.3", features = ["serde"] } +humantime = "2.1.0" humantime-serde = "1.1.1" -itertools = "0.12" +itertools = "0.13" itf = "0.2.3" -libp2p = { version = "0.53.2", features = ["macros", "mdns", "identify", "tokio", "ed25519", "quic", "tls", "gossipsub"] } -libp2p-gossipsub = { version = "0.46.1" } +libp2p = { version = "0.53.2", features = ["macros", "mdns", "identify", "tokio", "ed25519", "quic", "tls", "gossipsub", "dns", "metrics"] } multiaddr = "0.18.1" num-bigint = "0.4.4" num-traits = "0.2.17" @@ -60,7 +65,8 @@ pretty_assertions = "1.4" prost = "0.12.3" prost-build = "0.12.3" prost-types = "0.12.3" -ractor = "0.9.6" +prometheus-client = "0.22" +ractor = "0.10.0" ractor_actors = { version = "0.4.0", default-features= false } rand = { version = "0.8.5", features = ["std_rng"] } rand_chacha = "0.3.1" @@ -75,3 +81,4 @@ tokio-stream = "0.1" toml = "0.8.10" tracing = "0.1.40" tracing-subscriber = "0.3.18" +tempfile = "3.10.1" diff --git a/code/actors/src/cal.rs b/code/actors/src/cal.rs deleted file mode 100644 index 2946e51f8..000000000 --- a/code/actors/src/cal.rs +++ /dev/null @@ -1,64 +0,0 @@ -use malachite_common::Context; -use ractor::{async_trait, Actor, ActorProcessingErr, ActorRef, RpcReplyPort}; - -pub enum Msg { - GetValidatorSet { - height: Ctx::Height, - reply: RpcReplyPort, - }, -} - -pub struct CAL { - #[allow(dead_code)] - ctx: Ctx, - validator_set: Ctx::ValidatorSet, -} - -impl CAL { - pub async fn spawn( - ctx: Ctx, - validator_set: Ctx::ValidatorSet, - ) -> Result>, ActorProcessingErr> { - let (actor_ref, _) = Actor::spawn(None, Self { ctx, validator_set }, ()).await?; - - Ok(actor_ref) - } - - async fn get_validator_set( - &self, - _height: Ctx::Height, - ) -> Result { - Ok(self.validator_set.clone()) - } -} - -#[async_trait] -impl Actor for CAL { - type Msg = Msg; - type State = (); - type Arguments = (); - - async fn pre_start( - &self, - _myself: ActorRef, - _: Self::Arguments, - ) -> Result { - Ok(()) - } - - async fn handle( - &self, - _myself: ActorRef, - msg: Self::Msg, - _state: &mut Self::State, - ) -> Result<(), ActorProcessingErr> { - match msg { - Msg::GetValidatorSet { height, reply } => { - let validators = self.get_validator_set(height).await?; - reply.send(validators)?; - } - } - - Ok(()) - } -} diff --git a/code/actors/src/lib.rs b/code/actors/src/lib.rs deleted file mode 100644 index a6948ed8a..000000000 --- a/code/actors/src/lib.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub mod cal; -pub mod consensus; -pub mod gossip; -pub mod node; -pub mod prelude; -pub mod proposal_builder; -pub mod timers; -pub mod util; diff --git a/code/actors/src/node.rs b/code/actors/src/node.rs deleted file mode 100644 index fb219a3e3..000000000 --- a/code/actors/src/node.rs +++ /dev/null @@ -1,162 +0,0 @@ -use async_trait::async_trait; -use ractor::{Actor, ActorRef}; -use tokio::sync::mpsc; -use tokio::task::JoinHandle; - -use malachite_common::{Context, Round}; -use malachite_proto::Protobuf; -use malachite_vote::ThresholdParams; - -use crate::cal::Msg as CALMsg; -use crate::cal::CAL; -use crate::consensus::{Consensus, Msg as ConsensusMsg, Params as ConsensusParams}; -use crate::gossip::{Gossip, Msg as GossipMsg}; -use crate::proposal_builder::Msg as ProposalBuilderMsg; -use crate::proposal_builder::ProposalBuilder; -use crate::timers::Config as TimersConfig; -use crate::util::ValueBuilder; - -pub struct Params { - pub address: Ctx::Address, - pub initial_validator_set: Ctx::ValidatorSet, - pub keypair: malachite_gossip::Keypair, - pub start_height: Ctx::Height, - pub threshold_params: ThresholdParams, - pub timers_config: TimersConfig, - pub value_builder: Box>, - pub tx_decision: mpsc::Sender<(Ctx::Height, Round, Ctx::Value)>, -} - -pub async fn spawn( - ctx: Ctx, - params: Params, -) -> Result<(ActorRef, JoinHandle<()>), ractor::ActorProcessingErr> -where - Ctx: Context, - Ctx::Vote: Protobuf, - Ctx::Proposal: Protobuf, -{ - let cal = CAL::spawn(ctx.clone(), params.initial_validator_set.clone()).await?; - - let proposal_builder = ProposalBuilder::spawn(ctx.clone(), params.value_builder).await?; - - let consensus_params = ConsensusParams { - start_height: params.start_height, - initial_validator_set: params.initial_validator_set, - address: params.address, - threshold_params: params.threshold_params, - }; - - let addr = "/ip4/0.0.0.0/udp/0/quic-v1".parse().unwrap(); - let config = malachite_gossip::Config::default(); - let gossip = Gossip::spawn(params.keypair, addr, config, None) - .await - .unwrap(); - - let consensus = Consensus::spawn( - ctx.clone(), - consensus_params, - params.timers_config, - gossip.clone(), - cal.clone(), - proposal_builder.clone(), - params.tx_decision, - None, - ) - .await?; - - let node = Node::new( - ctx, - cal, - gossip, - consensus, - proposal_builder, - params.start_height, - ); - let actor = node.spawn().await?; - Ok(actor) -} - -pub struct Node { - #[allow(dead_code)] - ctx: Ctx, - cal: ActorRef>, - gossip: ActorRef, - consensus: ActorRef>, - proposal_builder: ActorRef>, - start_height: Ctx::Height, -} - -impl Node -where - Ctx: Context, - Ctx::Vote: Protobuf, - Ctx::Proposal: Protobuf, -{ - pub fn new( - ctx: Ctx, - cal: ActorRef>, - gossip: ActorRef, - consensus: ActorRef>, - proposal_builder: ActorRef>, - start_height: Ctx::Height, - ) -> Self { - Self { - ctx, - cal, - gossip, - consensus, - proposal_builder, - start_height, - } - } - - pub async fn spawn(self) -> Result<(ActorRef, JoinHandle<()>), ractor::SpawnErr> { - Actor::spawn(None, self, ()).await - } -} - -pub enum Msg { - Start, -} - -#[async_trait] -impl Actor for Node -where - Ctx: Context, - Ctx::Vote: Protobuf, - Ctx::Proposal: Protobuf, -{ - type Msg = Msg; - type State = (); - type Arguments = (); - - async fn pre_start( - &self, - myself: ActorRef, - _args: (), - ) -> Result<(), ractor::ActorProcessingErr> { - // Set ourselves as the supervisor of the other actors - self.cal.link(myself.get_cell()); - self.gossip.link(myself.get_cell()); - self.consensus.link(myself.get_cell()); - self.proposal_builder.link(myself.get_cell()); - - Ok(()) - } - - async fn handle( - &self, - _myself: ActorRef, - msg: Self::Msg, - _state: &mut (), - ) -> Result<(), ractor::ActorProcessingErr> { - match msg { - Msg::Start => self - .consensus - .cast(crate::consensus::Msg::StartHeight(self.start_height))?, - } - - Ok(()) - } -} diff --git a/code/actors/src/proposal_builder.rs b/code/actors/src/proposal_builder.rs deleted file mode 100644 index 10ef66fe6..000000000 --- a/code/actors/src/proposal_builder.rs +++ /dev/null @@ -1,92 +0,0 @@ -use std::time::Duration; - -use malachite_common::{Context, Round}; -use ractor::{async_trait, Actor, ActorProcessingErr, ActorRef, RpcReplyPort}; - -use crate::util::ValueBuilder; - -pub struct ProposedValue { - pub height: Ctx::Height, - pub round: Round, - pub value: Option, -} - -pub enum Msg { - GetValue { - height: Ctx::Height, - round: Round, - timeout_duration: Duration, - reply: RpcReplyPort>, - }, -} - -pub struct ProposalBuilder { - #[allow(dead_code)] - ctx: Ctx, - value_builder: Box>, -} - -impl ProposalBuilder { - pub async fn spawn( - ctx: Ctx, - value_builder: Box>, - ) -> Result>, ActorProcessingErr> { - let (actor_ref, _) = Actor::spawn(None, Self { ctx, value_builder }, ()).await?; - - Ok(actor_ref) - } - - async fn get_value( - &self, - height: Ctx::Height, - round: Round, - timeout_duration: Duration, - ) -> Result, ActorProcessingErr> { - let value = self - .value_builder - .build_value(height, timeout_duration) - .await; - - Ok(ProposedValue { - height, - round, - value, - }) - } -} - -#[async_trait] -impl Actor for ProposalBuilder { - type Msg = Msg; - type State = (); - type Arguments = (); - - async fn pre_start( - &self, - _myself: ActorRef, - _: Self::Arguments, - ) -> Result { - Ok(()) - } - - async fn handle( - &self, - _myself: ActorRef, - msg: Self::Msg, - _state: &mut Self::State, - ) -> Result<(), ActorProcessingErr> { - match msg { - Msg::GetValue { - height, - round, - timeout_duration, - reply, - } => { - let value = self.get_value(height, round, timeout_duration).await?; - reply.send(value)?; - } - } - - Ok(()) - } -} diff --git a/code/actors/src/util/make_actor.rs b/code/actors/src/util/make_actor.rs deleted file mode 100644 index 37c658a7e..000000000 --- a/code/actors/src/util/make_actor.rs +++ /dev/null @@ -1,39 +0,0 @@ -use ractor::ActorRef; -use tokio::sync::mpsc; - -use malachite_common::Round; -use malachite_gossip::Keypair; -use malachite_test::{Address, Height, PrivateKey, TestContext, ValidatorSet, Value}; -use tokio::task::JoinHandle; - -use crate::node::{Msg as NodeMsg, Params as NodeParams}; -use crate::timers::Config as TimersConfig; -use crate::util::TestValueBuilder; - -pub async fn make_node_actor( - initial_validator_set: ValidatorSet, - private_key: PrivateKey, - address: Address, - tx_decision: mpsc::Sender<(Height, Round, Value)>, -) -> (ActorRef, JoinHandle<()>) { - let keypair = Keypair::ed25519_from_bytes(private_key.inner().to_bytes()).unwrap(); - let start_height = Height::new(1); - let ctx = TestContext::new(private_key); - - let value_builder = Box::>::default(); - - let timers_config = TimersConfig::default(); - - let params = NodeParams { - address, - initial_validator_set, - keypair, - start_height, - threshold_params: Default::default(), - timers_config, - tx_decision, - value_builder, - }; - - crate::node::spawn(ctx, params).await.unwrap() -} diff --git a/code/actors/src/util/mod.rs b/code/actors/src/util/mod.rs deleted file mode 100644 index bbb930f85..000000000 --- a/code/actors/src/util/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod forward; -mod make_actor; -mod value_builder; - -pub use forward::{forward, Forward}; -pub use make_actor::make_node_actor; -pub use value_builder::{test::TestValueBuilder, ValueBuilder}; diff --git a/code/actors/src/util/value_builder.rs b/code/actors/src/util/value_builder.rs deleted file mode 100644 index cd4341567..000000000 --- a/code/actors/src/util/value_builder.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::marker::PhantomData; -use std::time::Duration; - -use async_trait::async_trait; -use derive_where::derive_where; - -use malachite_common::Context; - -#[async_trait] -pub trait ValueBuilder: Send + Sync + 'static { - async fn build_value( - &self, - height: Ctx::Height, - timeout_duration: Duration, - ) -> Option; -} - -pub mod test { - use super::*; - - use malachite_test::{Height, TestContext, Value}; - - #[derive_where(Default)] - pub struct TestValueBuilder { - _phantom: PhantomData, - } - - #[async_trait] - impl ValueBuilder for TestValueBuilder { - async fn build_value(&self, height: Height, timeout_duration: Duration) -> Option { - tokio::time::sleep(timeout_duration / 2).await; - - Some(Value::new(40 + height.as_u64())) - } - } -} diff --git a/code/actors/tests/actor_gossip_n3f1.rs b/code/actors/tests/actor_gossip_n3f1.rs deleted file mode 100644 index 9b0fcb067..000000000 --- a/code/actors/tests/actor_gossip_n3f1.rs +++ /dev/null @@ -1,33 +0,0 @@ -#![allow(unused_crate_dependencies)] - -#[path = "util.rs"] -mod util; -use util::*; - -#[tokio::test] -pub async fn one_node_fails_to_start() { - let nodes = Test::new( - [ - TestNode::faulty(5, vec![Fault::NoStart]), - TestNode::correct(15), - TestNode::correct(10), - ], - 4, - ); - - run_test(nodes).await -} - -#[tokio::test] -pub async fn one_node_crashes() { - let nodes = Test::new( - [ - TestNode::faulty(5, vec![Fault::Crash(2)]), - TestNode::correct(15), - TestNode::correct(10), - ], - 7, - ); - - run_test(nodes).await -} diff --git a/code/actors/tests/util.rs b/code/actors/tests/util.rs deleted file mode 100644 index 996411358..000000000 --- a/code/actors/tests/util.rs +++ /dev/null @@ -1,174 +0,0 @@ -#![allow(dead_code)] - -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; - -use tokio::sync::mpsc; -use tokio::time::{sleep, Duration}; -use tracing::{error, info}; - -use malachite_common::{Round, VotingPower}; -use malachite_test::utils::make_validators; -use malachite_test::{Height, PrivateKey, Validator, ValidatorSet, Value}; - -use malachite_actors::node::Msg; -use malachite_actors::util::make_node_actor; - -pub const SEED: u64 = 42; -pub const HEIGHTS: u64 = 3; -pub const START_HEIGHT: Height = Height::new(1); -pub const END_HEIGHT: Height = Height::new(START_HEIGHT.as_u64() + HEIGHTS - 1); -pub const TEST_TIMEOUT: Duration = Duration::from_secs(20); - -pub struct Test { - pub nodes: [TestNode; N], - pub validator_set: ValidatorSet, - pub vals_and_keys: [(Validator, PrivateKey); N], - pub expected_decisions: usize, -} - -impl Test { - pub fn new(nodes: [TestNode; N], expected_decisions: usize) -> Self { - let vals_and_keys = make_validators(Self::voting_powers(&nodes)); - let validators = vals_and_keys.iter().map(|(v, _)| v).cloned(); - let validator_set = ValidatorSet::new(validators); - - Self { - nodes, - validator_set, - vals_and_keys, - expected_decisions, - } - } - - pub fn voting_powers(nodes: &[TestNode; N]) -> [VotingPower; N] { - let mut voting_powers = [0; N]; - for (i, node) in nodes.iter().enumerate() { - voting_powers[i] = node.voting_power; - } - voting_powers - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Fault { - NoStart, - Crash(u64), -} - -#[derive(Clone)] -pub struct TestNode { - pub voting_power: VotingPower, - pub faults: Vec, -} - -impl TestNode { - pub fn correct(voting_power: VotingPower) -> Self { - Self { - voting_power, - faults: vec![], - } - } - - pub fn faulty(voting_power: VotingPower, faults: Vec) -> Self { - Self { - voting_power, - faults, - } - } - - fn start_node(&self) -> bool { - !self.faults.contains(&Fault::NoStart) - } - - fn crashes_at(&self, height: u64) -> bool { - self.faults.iter().any(|f| match f { - Fault::NoStart => false, - Fault::Crash(h) => *h == height, - }) - } -} - -pub async fn run_test(test: Test) { - tracing_subscriber::fmt::init(); - - let mut handles = Vec::with_capacity(N); - - for (v, sk) in &test.vals_and_keys { - let (tx_decision, rx_decision) = mpsc::channel(HEIGHTS as usize); - - let node = tokio::spawn(make_node_actor( - test.validator_set.clone(), - sk.clone(), - v.address, - tx_decision, - )); - - handles.push((node, rx_decision)); - } - - sleep(Duration::from_secs(5)).await; - - let mut nodes = Vec::with_capacity(handles.len()); - for (i, (handle, rx)) in handles.into_iter().enumerate() { - let (actor_ref, _) = handle.await.expect("Error: node failed to start"); - let test = test.nodes[i].clone(); - nodes.push((actor_ref, test, rx)); - } - - let mut actors = Vec::with_capacity(nodes.len()); - let mut rxs = Vec::with_capacity(nodes.len()); - - for (actor, node_test, rx) in nodes { - if node_test.start_node() { - actor.cast(Msg::Start).unwrap(); - } - - actors.push(actor); - rxs.push(rx); - } - - let correct_decisions = Arc::new(AtomicUsize::new(0)); - - for (i, mut rx_decision) in rxs.into_iter().enumerate() { - let correct_decisions = Arc::clone(&correct_decisions); - - let node_test = test.nodes[i].clone(); - let actor_ref = actors[i].clone(); - - tokio::spawn(async move { - for height in START_HEIGHT.as_u64()..=END_HEIGHT.as_u64() { - if node_test.crashes_at(height) { - info!("[{i}] Faulty node {i} has crashed"); - actor_ref.kill(); - break; - } - - let decision = rx_decision.recv().await; - let expected = Some((Height::new(height), Round::new(0), Value::new(40 + height))); - - if decision == expected { - info!("[{i}] {height}/{HEIGHTS} correct decision"); - correct_decisions.fetch_add(1, Ordering::Relaxed); - } else { - error!("[{i}] {height}/{HEIGHTS} incorrect decision: expected {expected:?}, got {decision:?}"); - } - } - }); - } - - tokio::time::sleep(TEST_TIMEOUT).await; - - let correct_decisions = correct_decisions.load(Ordering::Relaxed); - - if correct_decisions != test.expected_decisions { - panic!( - "Not all nodes made correct decisions: got {}, expected {}", - correct_decisions, test.expected_decisions - ); - } - - for actor in actors { - let _ = actor.stop_and_wait(None, None).await; - } -} diff --git a/code/cli/Cargo.toml b/code/cli/Cargo.toml deleted file mode 100644 index af728f4ff..000000000 --- a/code/cli/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "malachite-cli" -version.workspace = true -edition.workspace = true -repository.workspace = true -license.workspace = true -publish.workspace = true - -[lints] -workspace = true - -[dependencies] -malachite-actors.workspace = true -malachite-node.workspace = true -malachite-test.workspace = true - -clap = { workspace = true } -color-eyre = { workspace = true } -itertools = { workspace = true } -tokio = { workspace = true, features = ["full"] } -tracing = { workspace = true } -tracing-subscriber = { workspace = true, features = ["fmt", "env-filter"] } diff --git a/code/cli/src/main.rs b/code/cli/src/main.rs deleted file mode 100644 index 012f7f5ef..000000000 --- a/code/cli/src/main.rs +++ /dev/null @@ -1,75 +0,0 @@ -use std::time::Duration; - -use clap::Parser; -use logging::DebugSection; -use malachite_actors::node::Msg; -use malachite_actors::util::make_node_actor; -use malachite_test::utils::make_validators; -use malachite_test::ValidatorSet; - -use tracing::info; - -use crate::logging::LogLevel; - -#[derive(clap::Parser)] -pub struct Args { - #[clap( - short, - long, - help = "Index of this node in the validator set (0, 1, or 2)" - )] - pub index: usize, - - #[clap( - short, - long = "debug", - help = "Enable debug output for the given comma-separated sections", - value_enum, - value_delimiter = ',' - )] - debug: Vec, -} - -const VOTING_POWERS: [u64; 3] = [11, 10, 10]; - -mod logging; - -#[tokio::main(flavor = "current_thread")] -pub async fn main() -> Result<(), Box> { - let args = Args::parse(); - let index = args.index; - - logging::init(LogLevel::Debug, &args.debug); - - let vs = make_validators(VOTING_POWERS); - - let (val, sk) = vs[index].clone(); - let (vs, _): (Vec<_>, Vec<_>) = vs.into_iter().unzip(); - let vs = ValidatorSet::new(vs); - - info!("[{index}] Starting..."); - - let (tx_decision, mut rx_decision) = tokio::sync::mpsc::channel(32); - let (actor, handle) = make_node_actor(vs, sk, val.address, tx_decision).await; - - tokio::spawn({ - let actor = actor.clone(); - async move { - tokio::signal::ctrl_c().await.unwrap(); - info!("[{index}] Shutting down..."); - actor.stop(None); - } - }); - - tokio::time::sleep(Duration::from_secs(1)).await; - - actor.cast(Msg::Start)?; - - while let Some((height, round, value)) = rx_decision.recv().await { - info!("[{index}] Decision at height {height} and round {round}: {value:?}",); - } - - handle.await?; - - Ok(()) -} diff --git a/code/config.toml b/code/config.toml index 475340ff3..d7c427bec 100644 --- a/code/config.toml +++ b/code/config.toml @@ -5,34 +5,104 @@ # A custom human readable name for this node moniker = "malachite" -####################################################### -### P2P Configuration Options ### -####################################################### -[p2p] - -# Address to listen for incoming connections -listen_addr = "/ip4/0.0.0.0/udp/0/quic-v1" -# List of nodes to keep persistent connections to -persistent_peers = [] - ####################################################### ### Consensus Configuration Options ### ####################################################### [consensus] +# Maximum block size +# Override with MALACHITE__CONSENSUS__MAX_BLOCK_SIZE env variable +max_block_size = "1 MiB" +## Timeouts # How long we wait for a proposal block before prevoting nil +# Override with MALACHITE__CONSENSUS__TIMEOUT_PROPOSE env variable timeout_propose = "3s" + # How much timeout_propose increases with each round timeout_propose_delta = "500ms" + # How long we wait after receiving +2/3 prevotes for “anything” (ie. not a single block or nil) +# Override with MALACHITE__CONSENSUS__TIMEOUT_PREVOTE env variable timeout_prevote = "1s" + # How much the timeout_prevote increases with each round timeout_prevote_delta = "500ms" + # How long we wait after receiving +2/3 precommits for “anything” (ie. not a single block or nil) +# Override with MALACHITE__CONSENSUS__TIMEOUT_PRECOMMIT env variable timeout_precommit = "1s" + # How much the timeout_precommit increases with each round timeout_precommit_delta = "500ms" + # How long we wait after committing a block, before starting on the new # height (this gives us a chance to receive some more precommits, even # though we already have +2/3). +# Override with MALACHITE__CONSENSUS__TIMEOUT_COMMIT env variable timeout_commit = "1s" + +####################################################### +### Consensus P2P Configuration Options ### +####################################################### +[consensus.p2p] +# Address to listen for incoming connections +listen_addr = "/ip4/0.0.0.0/udp/0/quic-v1" +# List of nodes to keep persistent connections to +persistent_peers = [] + + +####################################################### +### Mempool Configuration Options ### +####################################################### +[mempool] +# Maximum number of transactions in the mempool +# Override with MALACHITE__MEMPOOL__MAX_TX_COUNT env variable +max_tx_count = 10000 + +# Maximum number of transactions to gossip at once in a batch. If set to 0, mempool does not gossip the transactions. +# Override with MALACHITE__MEMPOOL__GOSSIP_BATCH_SIZE +gossip_batch_size = 100 + +####################################################### +### Mempool P2P Configuration Options ### +####################################################### +[mempool.p2p] +# Address to listen for incoming connections +listen_addr = "/ip4/0.0.0.0/udp/0/quic-v1" +# List of nodes to keep persistent connections to +persistent_peers = [] + +####################################################### +### Metrics Configuration Options ### +####################################################### +[metrics] +# Enable the metrics server +enabled = true + +# Metrics are exported at `http://127.0.0.1:9000/metrics` +listen_addr = "127.0.0.1:9000" + +####################################################### +### Runtime Configuration Options ### +####################################################### +[runtime] +# The flavor of Tokio runtime to use. +# Possible values: +# - "single_threaded": A single threaded runtime (default) +# - "multi_threaded": A multi-threaded runtime +flavor = "single_threaded" + +# For the multi-threaded runtime only. +# Sets the number of worker threads the Runtime will use. +# If set to 0, defaults to the number of cores available to the system. +# worker_threads = 4 + +[test] +# Override with MALACHITE__TEST__TX_SIZE env variable +tx_size = "256 B" +# Override with MALACHITE__TEST__TXS_PER_PART env variable +txs_per_part = 200 +# Override with MALACHITE__TEST__TIME_ALLOWANCE_FACTOR env variable +time_allowance_factor = 0.7 +# Override with MALACHITE__TEST__EXEC_TIME_PER_TX env variable +exec_time_per_tx = "1ms" diff --git a/code/actors/Cargo.toml b/code/crates/actors/Cargo.toml similarity index 54% rename from code/actors/Cargo.toml rename to code/crates/actors/Cargo.toml index 531664693..c70e9444c 100644 --- a/code/actors/Cargo.toml +++ b/code/crates/actors/Cargo.toml @@ -10,25 +10,22 @@ publish.workspace = true workspace = true [dependencies] -malachite-common.workspace = true -malachite-driver.workspace = true -malachite-gossip.workspace = true -malachite-network.workspace = true -malachite-node.workspace = true -malachite-proto.workspace = true -malachite-test.workspace = true -malachite-vote.workspace = true +malachite-common.workspace = true +malachite-driver.workspace = true +malachite-gossip-consensus.workspace = true +malachite-gossip-mempool.workspace = true +malachite-metrics.workspace = true +malachite-node.workspace = true +malachite-proto.workspace = true +malachite-vote.workspace = true async-trait = { workspace = true } +eyre = { workspace = true } derive-where = { workspace = true } libp2p = { workspace = true } +prost = { workspace = true } +prost-types = { workspace = true } ractor = { workspace = true, features = ["async-trait"] } rand = { workspace = true } tokio = { workspace = true, features = ["full"] } tracing = { workspace = true } - -[dev-dependencies] -malachite-test = { workspace = true } -rand_chacha = { workspace = true } -tracing-subscriber = { workspace = true, features = ["fmt"] } - diff --git a/code/actors/src/consensus.rs b/code/crates/actors/src/consensus.rs similarity index 54% rename from code/actors/src/consensus.rs rename to code/crates/actors/src/consensus.rs index c1cd2ef94..013523467 100644 --- a/code/actors/src/consensus.rs +++ b/code/crates/actors/src/consensus.rs @@ -1,4 +1,5 @@ -use std::collections::VecDeque; +use std::collections::btree_map::Entry; +use std::collections::{BTreeMap, BTreeSet, VecDeque}; use std::fmt::Display; use std::sync::Arc; @@ -6,54 +7,58 @@ use async_trait::async_trait; use ractor::rpc::{call_and_forward, CallResult}; use ractor::{Actor, ActorCell, ActorProcessingErr, ActorRef}; use tokio::sync::mpsc; -use tracing::{debug, info, warn}; +use tracing::{debug, error, info, trace, warn}; +use malachite_common::NilOrVal; use malachite_common::{ - Context, Height, NilOrVal, Proposal, Round, SignedProposal, SignedVote, Timeout, TimeoutStep, - Validator, ValidatorSet, ValueId, Vote, VoteType, + Context, Height, Proposal, Round, SignedBlockPart, SignedProposal, SignedVote, Timeout, + TimeoutStep, Validator, ValidatorSet, Value, Vote, VoteType, }; use malachite_driver::Driver; use malachite_driver::Input as DriverInput; use malachite_driver::Output as DriverOutput; use malachite_driver::Validity; -use malachite_gossip::{Channel, Event as GossipEvent}; -use malachite_network::Msg as NetworkMsg; -use malachite_network::PeerId; +use malachite_gossip_consensus::{Channel, Event as GossipEvent, PeerId}; use malachite_proto as proto; use malachite_proto::Protobuf; -use malachite_vote::{Threshold, ThresholdParams}; +use malachite_vote::ThresholdParams; -use crate::cal::Msg as CALMsg; -use crate::gossip::Msg as GossipMsg; -use crate::proposal_builder::{Msg as ProposalBuilderMsg, ProposedValue}; -use crate::timers::{Config as TimersConfig, Msg as TimersMsg, TimeoutElapsed, Timers}; +use crate::gossip_consensus::{GossipConsensusRef, Msg as GossipConsensusMsg}; +use crate::host::{HostRef, LocallyProposedValue, Msg as HostMsg, ReceivedProposedValue}; +use crate::timers::{Config as TimersConfig, Msg as TimersMsg, TimeoutElapsed, Timers, TimersRef}; use crate::util::forward; +mod network; +use network::NetworkMsg; + +mod metrics; +pub use metrics::Metrics; + pub enum Next { None, Input(DriverInput), Decided(Round, Ctx::Value), } -pub struct Params { +pub struct ConsensusParams { pub start_height: Ctx::Height, pub initial_validator_set: Ctx::ValidatorSet, pub address: Ctx::Address, pub threshold_params: ThresholdParams, } -// type Ref = ActorRef<::Msg>; +pub type ConsensusRef = ActorRef>; pub struct Consensus where Ctx: Context, { ctx: Ctx, - params: Params, + params: ConsensusParams, timers_config: TimersConfig, - gossip: ActorRef, - cal: ActorRef>, - proposal_builder: ActorRef>, + gossip_consensus: GossipConsensusRef, + host: HostRef, + metrics: Metrics, tx_decision: mpsc::Sender<(Ctx::Height, Round, Ctx::Value)>, } @@ -62,13 +67,14 @@ pub enum Msg { MoveToHeight(Ctx::Height), GossipEvent(Arc), TimeoutElapsed(Timeout), - ProposeValue(Ctx::Height, Round, Option), SendDriverInput(DriverInput), Decided(Ctx::Height, Round, Ctx::Value), - ProcessDriverOutputs( - Vec>, - Option<(VoteType, Round, NilOrVal>)>, - ), + ProcessDriverOutputs(Vec>), + // The proposal builder has built a value and can be used in a new proposal consensus message + ProposeValue(Ctx::Height, Round, Ctx::Value), + // The proposal builder has build a new block part, needs to be signed and gossiped by consensus + BuilderBlockPart(Ctx::BlockPart), + BlockReceived(ReceivedProposedValue), } impl From for Msg { @@ -82,9 +88,61 @@ where Ctx: Context, { driver: Driver, - timers: ActorRef, + timers: TimersRef, msg_queue: VecDeque>, validator_set: Ctx::ValidatorSet, + connected_peers: BTreeSet, + + /// The Value and validity of received blocks. + pub received_blocks: Vec<(Ctx::Height, Round, Ctx::Value, Validity)>, + + /// Store Precommit Votes to be sent along the decision to the host + pub signed_precommits: BTreeMap<(Ctx::Height, Round), Vec>>, +} + +impl State +where + Ctx: Context, +{ + pub fn remove_received_block(&mut self, height: Ctx::Height, round: Round) { + self.received_blocks + .retain(|&(h, r, ..)| h != height && r != round); + } + + pub fn store_signed_precommit(&mut self, precommit: &SignedVote) { + assert_eq!(precommit.vote.vote_type(), VoteType::Precommit); + + let height = precommit.vote.height(); + let round = precommit.vote.round(); + + match self.signed_precommits.entry((height, round)) { + Entry::Vacant(e) => { + e.insert(vec![precommit.clone()]); + } + Entry::Occupied(mut e) => { + e.get_mut().push(precommit.clone()); + } + } + } + + pub fn restore_precommits( + &mut self, + height: Ctx::Height, + round: Round, + value: &Ctx::Value, + ) -> Vec> { + // Get the commits for the height and round. + let mut commits_for_height_and_round = self + .signed_precommits + .remove(&(height, round)) + .unwrap_or_default(); + + // Keep the commits for the specified value. + // For now we ignore equivocating votes if present. + commits_for_height_and_round.retain(|c| c.vote.value() == &NilOrVal::Val(value.id())); + + commits_for_height_and_round + } } impl Consensus @@ -92,23 +150,24 @@ where Ctx: Context, Ctx::Vote: Protobuf, Ctx::Proposal: Protobuf, + Ctx::BlockPart: Protobuf, { pub fn new( ctx: Ctx, - params: Params, + params: ConsensusParams, timers_config: TimersConfig, - gossip: ActorRef, - cal: ActorRef>, - proposal_builder: ActorRef>, + gossip_consensus: GossipConsensusRef, + host: HostRef, + metrics: Metrics, tx_decision: mpsc::Sender<(Ctx::Height, Round, Ctx::Value)>, ) -> Self { Self { ctx, params, timers_config, - gossip, - cal, - proposal_builder, + gossip_consensus, + host, + metrics, tx_decision, } } @@ -116,11 +175,11 @@ where #[allow(clippy::too_many_arguments)] pub async fn spawn( ctx: Ctx, - params: Params, + params: ConsensusParams, timers_config: TimersConfig, - gossip: ActorRef, - cal: ActorRef>, - proposal_builder: ActorRef>, + gossip_consensus: GossipConsensusRef, + host: HostRef, + metrics: Metrics, tx_decision: mpsc::Sender<(Ctx::Height, Round, Ctx::Value)>, supervisor: Option, ) -> Result>, ractor::SpawnErr> { @@ -128,9 +187,9 @@ where ctx, params, timers_config, - gossip, - cal, - proposal_builder, + gossip_consensus, + host, + metrics, tx_decision, ); @@ -149,24 +208,10 @@ where myself: ActorRef>, state: &mut State, ) -> Result<(), ractor::ActorProcessingErr> { - match event { - GossipEvent::Listening(addr) => { - info!("Listening on {addr}"); - } - GossipEvent::PeerConnected(peer_id) => { - info!("Connected to peer {peer_id}"); - } - GossipEvent::PeerDisconnected(peer_id) => { - info!("Disconnected from peer {peer_id}"); - } - GossipEvent::Message(from, Channel::Consensus, data) => { - let from = PeerId::new(from.to_string()); - let msg = NetworkMsg::from_network_bytes(data).unwrap(); + if let GossipEvent::Message(from, _, data) = event { + let msg = NetworkMsg::from_network_bytes(data).unwrap(); - info!("Received message from peer {from}: {msg:?}"); - - self.handle_network_msg(from, msg, myself, state).await?; - } + self.handle_network_msg(from, msg, myself, state).await?; } Ok(()) @@ -174,7 +219,7 @@ where pub async fn handle_network_msg( &self, - from: PeerId, + from: &PeerId, msg: NetworkMsg, myself: ActorRef>, state: &mut State, @@ -200,23 +245,14 @@ where } let vote_height = signed_vote.vote.height(); + assert!(vote_height == state.driver.height()); - if vote_height > state.driver.height() { - warn!( - %from, %validator_address, - "Received vote for height {0} greater than current height {1}, moving to height {0}", - vote_height, state.driver.height(), - ); - - // FIXME: We lose the vote here. We should instead buffer it - // and process it once we moved to the correct height. - // NOTE: We cannot just send the vote via `SendDriverInput` because otherwise - // the vote will reach the driver before it has started the new height. - myself.cast(Msg::MoveToHeight(vote_height))?; - - return Ok(()); + // Store the non-nil Precommits. + if signed_vote.vote.vote_type() == VoteType::Precommit + && signed_vote.vote.value().is_val() + { + state.store_signed_precommit(&signed_vote); } - myself.cast(Msg::SendDriverInput(DriverInput::Vote(signed_vote.vote)))?; } @@ -224,39 +260,77 @@ where let signed_proposal = SignedProposal::::from_proto(proposal).unwrap(); let validator_address = signed_proposal.proposal.validator_address(); - info!(%from, %validator_address, "Received proposal: {:?}", signed_proposal.proposal); + info!(%from, %validator_address, "Received proposal: (h: {}, r: {}, id: {:?})", + signed_proposal.proposal.height(), signed_proposal.proposal.round(), signed_proposal.proposal.value().id()); let Some(validator) = state.validator_set.get_by_address(validator_address) else { warn!(%from, %validator_address, "Received proposal from unknown validator"); return Ok(()); }; - let valid = self - .ctx - .verify_signed_proposal(&signed_proposal, validator.public_key()); + // TODO - verify that the proposal was signed by the proposer for the height and round, drop otherwise. + let proposal = &signed_proposal.proposal; + let proposal_height = proposal.height(); + let proposal_round = proposal.round(); - let proposal_height = signed_proposal.proposal.height(); - - if proposal_height > state.driver.height() { - warn!( - %from, %validator_address, - "Received proposal for height {0} greater than current height {1}, moving to height {0}", - proposal_height, state.driver.height(), + if !self + .ctx + .verify_signed_proposal(&signed_proposal, validator.public_key()) + { + error!( + "Received invalid signature for proposal ({}, {}, {:?}", + proposal_height, + proposal_round, + proposal.value() ); + return Ok(()); + } + assert!(proposal_height == state.driver.height()); + + let received_block = state + .received_blocks + .iter() + .find(|&x| x.0 == proposal_height && x.1 == proposal_round); + + match received_block { + Some((_height, _round, _value, valid)) => { + myself.cast(Msg::SendDriverInput(DriverInput::Proposal( + proposal.clone(), + *valid, + )))?; + } + None => { + // Store the proposal and wait for all block parts + // TODO - or maybe integrate with receive-proposal() here? will this block until all parts are received? + info!("Received proposal before all block parts, storing it: {proposal:?}",); + + state.driver.proposal = Some(proposal.clone()); + } + } + } - // FIXME: We lose the proposal here. We should instead buffer it - // and process it once we moved to the correct height. - // NOTE: We cannot just send the proposal via `SendDriverInput` because otherwise - // the proposal will reach the driver before it has started the new height. - myself.cast(Msg::MoveToHeight(proposal_height))?; + NetworkMsg::BlockPart(block_part) => { + let signed_block_part = SignedBlockPart::::from_proto(block_part).unwrap(); + let validator_address = signed_block_part.validator_address(); + let Some(validator) = state.validator_set.get_by_address(validator_address) else { + warn!(%from, %validator_address, "Received block part from unknown validator"); + return Ok(()); + }; + + if !self + .ctx + .verify_signed_block_part(&signed_block_part, validator.public_key()) + { + warn!(%from, %validator_address, "Received invalid block part: {signed_block_part:?}"); return Ok(()); } - myself.cast(Msg::SendDriverInput(DriverInput::Proposal( - signed_proposal.proposal, - Validity::from_valid(valid), - )))?; + // TODO - verify that the proposal was signed by the proposer for the height and round, drop otherwise. + self.host.cast(HostMsg::BlockPart { + block_part: signed_block_part.block_part, + reply_to: myself.clone(), + })? } } @@ -300,7 +374,7 @@ where ) -> Result<(), ractor::ActorProcessingErr> { match &input { DriverInput::NewRound(_, _, _) => { - state.timers.cast(TimersMsg::Reset)?; + state.timers.cast(TimersMsg::CancelAllTimeouts)?; } DriverInput::ProposeValue(round, _) => state @@ -318,21 +392,12 @@ where DriverInput::TimeoutElapsed(_) => (), } - let check_threshold = if let DriverInput::Vote(vote) = &input { - let round = Vote::::round(vote); - let value = Vote::::value(vote); - - Some((vote.vote_type(), round, value.clone())) - } else { - None - }; - let outputs = state .driver .process(input) .map_err(|e| format!("Driver failed to process input: {e}"))?; - myself.cast(Msg::ProcessDriverOutputs(outputs, check_threshold))?; + myself.cast(Msg::ProcessDriverOutputs(outputs))?; Ok(()) } @@ -340,31 +405,9 @@ where async fn process_driver_outputs( &self, outputs: Vec>, - check_threshold: Option<(VoteType, Round, NilOrVal>)>, myself: ActorRef>, state: &mut State, ) -> Result<(), ActorProcessingErr> { - // When we receive a vote, check if we've gotten +2/3 votes for the value we just received a vote for, - // if so then cancel the corresponding timeout. - if let Some((vote_type, round, value)) = check_threshold { - let threshold = match value { - NilOrVal::Nil => Threshold::Nil, - NilOrVal::Val(value) => Threshold::Value(value), - }; - - let votes = state.driver.votes(); - - if votes.is_threshold_met(&round, vote_type, threshold.clone()) { - let timeout = match vote_type { - VoteType::Prevote => Timeout::prevote(round), - VoteType::Precommit => Timeout::precommit(round), - }; - - info!("Threshold met for {threshold:?} at round {round}, cancelling {timeout}"); - state.timers.cast(TimersMsg::CancelTimeout(timeout))?; - } - } - for output in outputs { let next = self .handle_driver_output(output, myself.clone(), state) @@ -411,8 +454,8 @@ where DriverOutput::Propose(proposal) => { info!( - "Proposing value {:?} at round {}", - proposal.value(), + "Proposing value with id: {:?}, at round {}", + proposal.value().id(), proposal.round() ); @@ -422,8 +465,8 @@ where let proto = signed_proposal.to_proto().unwrap(); // FIXME let msg = NetworkMsg::Proposal(proto); let bytes = msg.to_network_bytes().unwrap(); // FIXME - self.gossip - .cast(GossipMsg::Broadcast(Channel::Consensus, bytes))?; + self.gossip_consensus + .cast(GossipConsensusMsg::Broadcast(Channel::Consensus, bytes))?; Ok(Next::Input(DriverInput::Proposal( signed_proposal.proposal, @@ -433,7 +476,8 @@ where DriverOutput::Vote(vote) => { info!( - "Voting for value {:?} at round {}", + "Voting {:?} for value {:?} at round {}", + vote.vote_type(), vote.value(), vote.round() ); @@ -444,14 +488,15 @@ where let proto = signed_vote.to_proto().unwrap(); // FIXME let msg = NetworkMsg::Vote(proto); let bytes = msg.to_network_bytes().unwrap(); // FIXME - self.gossip - .cast(GossipMsg::Broadcast(Channel::Consensus, bytes))?; + self.gossip_consensus + .cast(GossipConsensusMsg::Broadcast(Channel::Consensus, bytes))?; Ok(Next::Input(DriverInput::Vote(signed_vote.vote))) } DriverOutput::Decide(round, value) => { - info!("Decided on value {value:?} at round {round}"); + // TODO - remove proposal, votes, block for the round + info!("Decided on value {:?} at round {round}", value.id()); let _ = self .tx_decision @@ -489,15 +534,17 @@ where // Call `GetValue` on the CAL actor, and forward the reply to the current actor, // wrapping it in `Msg::ProposeValue`. call_and_forward( - &self.proposal_builder.get_cell(), - |reply| ProposalBuilderMsg::GetValue { + &self.host.get_cell(), + |reply| HostMsg::GetValue { height, round, timeout_duration, + address: self.params.address.clone(), + consensus: myself.clone(), reply, }, myself.get_cell(), - |proposed: ProposedValue| { + |proposed: LocallyProposedValue| { Msg::::ProposeValue(proposed.height, proposed.round, proposed.value) }, None, @@ -529,8 +576,11 @@ where height: Ctx::Height, ) -> Result { let result = self - .cal - .call(|reply| CALMsg::GetValidatorSet { height, reply }, None) + .host + .call( + |reply_to| HostMsg::GetValidatorSet { height, reply_to }, + None, + ) .await?; // TODO: Figure out better way to handle this: @@ -553,6 +603,7 @@ where Ctx::Height: Display, Ctx::Vote: Protobuf, Ctx::Proposal: Protobuf, + Ctx::BlockPart: Protobuf, { type Msg = Msg; type State = State; @@ -567,7 +618,8 @@ where Timers::spawn_linked(self.timers_config, myself.clone(), myself.get_cell()).await?; let forward = forward(myself.clone(), Some(myself.get_cell()), Msg::GossipEvent).await?; - self.gossip.cast(GossipMsg::Subscribe(forward))?; + self.gossip_consensus + .cast(GossipConsensusMsg::Subscribe(forward))?; let driver = Driver::new( self.ctx.clone(), @@ -582,11 +634,14 @@ where timers, msg_queue: VecDeque::new(), validator_set: self.params.initial_validator_set.clone(), + connected_peers: BTreeSet::new(), + received_blocks: vec![], + signed_precommits: Default::default(), }) } #[tracing::instrument( - name = "node", + name = "consensus", skip(self, myself, msg, state), fields( height = %state.driver.height(), @@ -601,6 +656,8 @@ where ) -> Result<(), ractor::ActorProcessingErr> { match msg { Msg::StartHeight(height) => { + self.metrics.block_start(); + let round = Round::new(0); info!("Starting height {height} at round {round}"); @@ -615,13 +672,15 @@ where // Drain the pending message queue to process any gossip events that were received // before the driver started the new height and was still at round Nil. let pending_msgs = std::mem::take(&mut state.msg_queue); + debug!("Replaying {} messages", pending_msgs.len()); for msg in pending_msgs { myself.cast(msg)?; } } Msg::MoveToHeight(height) => { - state.timers.cast(TimersMsg::Reset)?; + state.timers.cast(TimersMsg::CancelAllTimeouts)?; + state.timers.cast(TimersMsg::ResetTimeouts)?; let validator_set = self.get_validator_set(height).await?; state.driver.move_to_height(height, validator_set); @@ -651,29 +710,99 @@ where return Ok(()); } - match value { - Some(value) => myself.cast(Msg::SendDriverInput(DriverInput::ProposeValue( - round, value, - )))?, - - None => warn!( - %height, %round, - "Proposal builder failed to build a value within the deadline" - ), - } + myself.cast(Msg::SendDriverInput(DriverInput::ProposeValue( + round, value, + )))? } Msg::Decided(height, round, value) => { - info!("Decided on value {value:?} at height {height} and round {round}"); + info!( + "Decided on value {:?} at height {height} and round {round}", + value.id() + ); + + // Remove the block information as it is not needed anymore + state.remove_received_block(height, round); + + // Restore the commits. Note that they will be removed from `state` + let commits = state.restore_precommits(height, round, &value); + + self.host.cast(HostMsg::DecidedOnValue { + height, + round, + value, + commits, + })?; + + // Reinitialize to remove any previous round or equivocating precommits. + // TODO - revise when evidence module is added. + state.signed_precommits = Default::default(); + + self.metrics.block_end(); + self.metrics.finalized_blocks.inc(); + self.metrics + .rounds_per_block + .observe((round.as_i64() + 1) as f64); } Msg::GossipEvent(event) => { - if state.driver.round() == Round::Nil { - debug!("Received gossip event at round -1, queuing for later"); - state.msg_queue.push_back(Msg::GossipEvent(event)); - } else { - self.handle_gossip_event(event.as_ref(), myself, state) - .await?; + match event.as_ref() { + GossipEvent::Listening(addr) => { + info!("Listening on {addr}"); + } + + GossipEvent::PeerConnected(peer_id) => { + info!("Connected to peer {peer_id}"); + + if !state.connected_peers.insert(*peer_id) { + // We already saw that peer, ignoring... + return Ok(()); + } + + self.metrics.connected_peers.inc(); + + if state.connected_peers.len() == state.validator_set.count() - 1 { + info!( + "Enough peers ({}) connected to start consensus", + state.connected_peers.len() + ); + + myself.cast(Msg::StartHeight(state.driver.height()))?; + } + } + + GossipEvent::PeerDisconnected(peer_id) => { + info!("Disconnected from peer {peer_id}"); + + if state.connected_peers.remove(peer_id) { + self.metrics.connected_peers.dec(); + + // TODO: pause/stop consensus, if necessary + } + } + + GossipEvent::Message(_, _, data) => { + let msg = NetworkMsg::from_network_bytes(data).unwrap(); // FIXME + + let Some(msg_height) = msg.msg_height() else { + trace!("Received message without height, dropping"); + return Ok(()); + }; + + // Queue messages if driver is not initialized, or if they are for higher height. + // Process messages received for the current height. + // Drop all others. + if state.driver.round() == Round::Nil { + debug!("Received gossip event at round -1, queuing for later"); + state.msg_queue.push_back(Msg::GossipEvent(event)); + } else if state.driver.height().as_u64() < msg_height { + debug!("Received gossip event for higher height, queuing for later"); + state.msg_queue.push_back(Msg::GossipEvent(event)); + } else if state.driver.height().as_u64() == msg_height { + self.handle_gossip_event(event.as_ref(), myself, state) + .await?; + } + } } } @@ -685,9 +814,44 @@ where self.send_driver_input(input, myself, state).await?; } - Msg::ProcessDriverOutputs(outputs, check_threshold) => { - self.process_driver_outputs(outputs, check_threshold, myself, state) - .await?; + Msg::ProcessDriverOutputs(outputs) => { + self.process_driver_outputs(outputs, myself, state).await?; + } + + Msg::BuilderBlockPart(block_part) => { + let signed_block_part = self.ctx.sign_block_part(block_part); + let proto = signed_block_part.to_proto().unwrap(); // FIXME + let msg = NetworkMsg::BlockPart(proto); + let bytes = msg.to_network_bytes().unwrap(); // FIXME + self.gossip_consensus + .cast(GossipConsensusMsg::Broadcast(Channel::BlockParts, bytes))?; + } + + Msg::BlockReceived(block) => { + let ReceivedProposedValue { + height, + round, + value, + valid, + .. + } = block; + + info!("Received block: {value:?}"); + + // Store the block and validity information. It will be removed when a decision is reached for that height. + state + .received_blocks + .push((height, round, value.clone(), valid)); + + if let Some(proposal) = state.driver.proposal.clone() { + if height == proposal.height() && round == proposal.round() { + let validity = value == *proposal.value() && valid.is_valid(); + myself.cast(Msg::SendDriverInput(DriverInput::Proposal( + proposal, + Validity::from_valid(validity), + )))?; + } + } } } diff --git a/code/crates/actors/src/consensus/metrics.rs b/code/crates/actors/src/consensus/metrics.rs new file mode 100644 index 000000000..0f49bbf14 --- /dev/null +++ b/code/crates/actors/src/consensus/metrics.rs @@ -0,0 +1,176 @@ +use std::ops::Deref; +use std::sync::Arc; + +use malachite_metrics::{linear_buckets, Counter, Gauge, Histogram, SharedRegistry}; + +#[derive(Clone, Debug)] +pub struct Metrics(Arc); + +impl Deref for Metrics { + type Target = Inner; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Clone, Debug)] +pub struct Inner { + /// Number of blocks finalized + pub finalized_blocks: Counter, + + /// Number of transactions finalized + pub finalized_txes: Counter, + + /// Time taken to finalize a block, in seconds + pub time_per_block: Histogram, + + /// Block size in terms of # of transactions + pub block_tx_count: Histogram, + + /// Size of each block in bytes + pub block_size_bytes: Histogram, + + /// Consensus rounds, ie. how many rounds did each block need to reach finalization + pub rounds_per_block: Histogram, + + /// Number of connected peers, ie. for each consensus node, how many peers is it connected to) + pub connected_peers: Gauge, + + /// Internal state for measuring time taken to finalize a block + instant_block_started: Arc, +} + +impl Metrics { + pub fn new() -> Self { + Self(Arc::new(Inner { + finalized_blocks: Counter::default(), + finalized_txes: Counter::default(), + time_per_block: Histogram::new(linear_buckets(0.0, 1.0, 20)), + block_tx_count: Histogram::new(linear_buckets(0.0, 32.0, 128)), + block_size_bytes: Histogram::new(linear_buckets(0.0, 64.0 * 1024.0, 128)), + rounds_per_block: Histogram::new(linear_buckets(0.0, 1.0, 20)), + connected_peers: Gauge::default(), + instant_block_started: Arc::new(AtomicInstant::empty()), + })) + } + + pub fn register(registry: &SharedRegistry) -> Self { + let metrics = Self::new(); + + registry.with_prefix("malachite_consensus", |registry| { + registry.register( + "finalized_blocks", + "Number of blocks finalized", + metrics.finalized_blocks.clone(), + ); + + registry.register( + "finalized_txes", + "Number of transactions finalized", + metrics.finalized_txes.clone(), + ); + + registry.register( + "time_per_block", + "Time taken to finalize a block, in seconds", + metrics.time_per_block.clone(), + ); + + registry.register( + "block_tx_count", + "Block size in terms of # of transactions", + metrics.block_tx_count.clone(), + ); + + registry.register( + "block_size_bytes", + "Size of each block in bytes", + metrics.block_size_bytes.clone(), + ); + + registry.register( + "rounds_per_block", + "Consensus rounds, ie. how many rounds did each block need to reach finalization", + metrics.rounds_per_block.clone(), + ); + + registry.register( + "connected_peers", + "Number of connected peers, ie. for each consensus node, how many peers is it connected to", + metrics.connected_peers.clone(), + ); + }); + + metrics + } + + pub fn block_start(&self) { + self.instant_block_started.set_now(); + } + + pub fn block_end(&self) { + if !self.instant_block_started.is_empty() { + let elapsed = self.instant_block_started.elapsed().as_secs_f64(); + self.time_per_block.observe(elapsed); + + self.instant_block_started.set_millis(0); + } + } +} + +impl Default for Metrics { + fn default() -> Self { + Self::new() + } +} + +use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::{Duration, SystemTime}, +}; + +#[derive(Default, Debug)] +struct AtomicInstant(AtomicU64); + +#[allow(dead_code)] +impl AtomicInstant { + pub fn now() -> Self { + Self(AtomicU64::new(Self::now_millis())) + } + + pub fn empty() -> Self { + Self(AtomicU64::new(0)) + } + + pub const fn from_millis(millis: u64) -> Self { + Self(AtomicU64::new(millis)) + } + + pub fn elapsed(&self) -> Duration { + Duration::from_millis(Self::now_millis() - self.as_millis()) + } + + pub fn as_millis(&self) -> u64 { + self.0.load(Ordering::SeqCst) + } + + pub fn set_now(&self) { + self.set_millis(Self::now_millis()); + } + + pub fn set_millis(&self, millis: u64) { + self.0.store(millis, Ordering::SeqCst); + } + + pub fn is_empty(&self) -> bool { + self.as_millis() == 0 + } + + fn now_millis() -> u64 { + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis() as u64 + } +} diff --git a/code/network/src/msg.rs b/code/crates/actors/src/consensus/network.rs similarity index 53% rename from code/network/src/msg.rs rename to code/crates/actors/src/consensus/network.rs index 7a016ba01..0fba130d8 100644 --- a/code/network/src/msg.rs +++ b/code/crates/actors/src/consensus/network.rs @@ -3,15 +3,16 @@ use prost_types::Any; use malachite_proto::Error as ProtoError; use malachite_proto::Protobuf; -use malachite_proto::{SignedProposal, SignedVote}; +use malachite_proto::{SignedBlockPart, SignedProposal, SignedVote}; #[derive(Clone, Debug, PartialEq)] -pub enum Msg { +pub enum NetworkMsg { Vote(SignedVote), Proposal(SignedProposal), + BlockPart(SignedBlockPart), } -impl Msg { +impl NetworkMsg { pub fn from_network_bytes(bytes: &[u8]) -> Result { Protobuf::from_bytes(bytes) } @@ -19,18 +20,29 @@ impl Msg { pub fn to_network_bytes(&self) -> Result, ProtoError> { Protobuf::to_bytes(self) } + + pub fn msg_height(&self) -> Option { + match self { + NetworkMsg::Vote(msg) => Some(msg.vote.as_ref()?.height.as_ref()?.value), + NetworkMsg::Proposal(msg) => Some(msg.proposal.as_ref()?.height.as_ref()?.value), + NetworkMsg::BlockPart(msg) => Some(msg.block_part.as_ref()?.height.as_ref()?.value), + } + } } -impl Protobuf for Msg { +impl Protobuf for NetworkMsg { type Proto = Any; fn from_proto(proto: Self::Proto) -> Result { if proto.type_url == SignedVote::type_url() { let vote = SignedVote::decode(proto.value.as_slice())?; - Ok(Msg::Vote(vote)) + Ok(NetworkMsg::Vote(vote)) } else if proto.type_url == SignedProposal::type_url() { let proposal = SignedProposal::decode(proto.value.as_slice())?; - Ok(Msg::Proposal(proposal)) + Ok(NetworkMsg::Proposal(proposal)) + } else if proto.type_url == SignedBlockPart::type_url() { + let block_part = SignedBlockPart::decode(proto.value.as_slice())?; + Ok(NetworkMsg::BlockPart(block_part)) } else { Err(ProtoError::UnknownMessageType { type_url: proto.type_url, @@ -40,14 +52,18 @@ impl Protobuf for Msg { fn to_proto(&self) -> Result { Ok(match self { - Msg::Vote(vote) => Any { + NetworkMsg::Vote(vote) => Any { type_url: SignedVote::type_url(), value: vote.encode_to_vec(), }, - Msg::Proposal(proposal) => Any { + NetworkMsg::Proposal(proposal) => Any { type_url: SignedProposal::type_url(), value: proposal.encode_to_vec(), }, + NetworkMsg::BlockPart(block_part) => Any { + type_url: SignedBlockPart::type_url(), + value: block_part.encode_to_vec(), + }, }) } } diff --git a/code/actors/src/gossip.rs b/code/crates/actors/src/gossip_consensus.rs similarity index 68% rename from code/actors/src/gossip.rs rename to code/crates/actors/src/gossip_consensus.rs index 28098049e..ce0b7d21b 100644 --- a/code/actors/src/gossip.rs +++ b/code/crates/actors/src/gossip_consensus.rs @@ -1,32 +1,33 @@ +use std::collections::BTreeSet; use std::sync::Arc; use async_trait::async_trait; use libp2p::identity::Keypair; -use libp2p::Multiaddr; -use malachite_gossip::Channel; -use ractor::Actor; use ractor::ActorCell; use ractor::ActorProcessingErr; use ractor::ActorRef; +use ractor::{Actor, RpcReplyPort}; use tokio::task::JoinHandle; -use malachite_gossip::handle::CtrlHandle; -use malachite_gossip::Config; -use malachite_gossip::Event; +use malachite_gossip_consensus::handle::CtrlHandle; +use malachite_gossip_consensus::{Channel, Config, Event, PeerId}; +use malachite_metrics::SharedRegistry; -pub struct Gossip; +pub type GossipConsensusRef = ActorRef; -impl Gossip { +pub struct GossipConsensus; + +impl GossipConsensus { pub async fn spawn( keypair: Keypair, - addr: Multiaddr, config: Config, + metrics: SharedRegistry, supervisor: Option, ) -> Result, ractor::SpawnErr> { let args = Args { keypair, - addr, config, + metrics, }; let (actor_ref, _) = if let Some(supervisor) = supervisor { @@ -41,13 +42,14 @@ impl Gossip { pub struct Args { pub keypair: Keypair, - pub addr: Multiaddr, pub config: Config, + pub metrics: SharedRegistry, } pub enum State { Stopped, Running { + peers: BTreeSet, subscribers: Vec>>, ctrl_handle: CtrlHandle, recv_task: JoinHandle<()>, @@ -61,10 +63,14 @@ pub enum Msg { // Internal message #[doc(hidden)] NewEvent(Event), + // Request for number of peers from gossip + GetState { + reply: RpcReplyPort, + }, } #[async_trait] -impl Actor for Gossip { +impl Actor for GossipConsensus { type Msg = Msg; type State = State; type Arguments = Args; @@ -74,7 +80,9 @@ impl Actor for Gossip { myself: ActorRef, args: Args, ) -> Result { - let handle = malachite_gossip::spawn(args.keypair, args.addr, args.config).await?; + let handle = + malachite_gossip_consensus::spawn(args.keypair, args.config, args.metrics).await?; + let (mut recv_handle, ctrl_handle) = handle.split(); let recv_task = tokio::spawn({ @@ -86,6 +94,7 @@ impl Actor for Gossip { }); Ok(State::Running { + peers: BTreeSet::new(), subscribers: Vec::new(), ctrl_handle, recv_task, @@ -100,6 +109,7 @@ impl Actor for Gossip { Ok(()) } + #[tracing::instrument(name = "gossip.consensus", skip(self, _myself, msg, state))] async fn handle( &self, _myself: ActorRef, @@ -107,6 +117,7 @@ impl Actor for Gossip { state: &mut State, ) -> Result<(), ActorProcessingErr> { let State::Running { + peers, subscribers, ctrl_handle, .. @@ -119,11 +130,28 @@ impl Actor for Gossip { Msg::Subscribe(subscriber) => subscribers.push(subscriber), Msg::Broadcast(channel, data) => ctrl_handle.broadcast(channel, data).await?, Msg::NewEvent(event) => { + match event { + Event::PeerConnected(peer_id) => { + peers.insert(peer_id); + } + Event::PeerDisconnected(peer_id) => { + peers.remove(&peer_id); + } + _ => {} + } + let event = Arc::new(event); for subscriber in subscribers { subscriber.cast(Arc::clone(&event))?; } } + Msg::GetState { reply } => { + let number_peers = match state { + State::Stopped => 0, + State::Running { peers, .. } => peers.len(), + }; + reply.send(number_peers)?; + } } Ok(()) diff --git a/code/crates/actors/src/gossip_mempool.rs b/code/crates/actors/src/gossip_mempool.rs new file mode 100644 index 000000000..9325146aa --- /dev/null +++ b/code/crates/actors/src/gossip_mempool.rs @@ -0,0 +1,185 @@ +use std::collections::BTreeSet; +use std::sync::Arc; + +use async_trait::async_trait; +use libp2p::identity::Keypair; +use ractor::ActorCell; +use ractor::ActorProcessingErr; +use ractor::ActorRef; +use ractor::{Actor, RpcReplyPort}; +use tokio::task::JoinHandle; + +use malachite_common::MempoolTransactionBatch; +use malachite_gossip_mempool::handle::CtrlHandle; +use malachite_gossip_mempool::{Channel, Config, Event, PeerId}; +use malachite_metrics::SharedRegistry; +use malachite_proto::Protobuf; + +pub type GossipMempoolRef = ActorRef; + +pub struct GossipMempool; + +impl GossipMempool { + pub async fn spawn( + keypair: Keypair, + config: Config, + metrics: SharedRegistry, + supervisor: Option, + ) -> Result, ractor::SpawnErr> { + let args = Args { + keypair, + config, + metrics, + }; + + let (actor_ref, _) = if let Some(supervisor) = supervisor { + Actor::spawn_linked(None, Self, args, supervisor).await? + } else { + Actor::spawn(None, Self, args).await? + }; + + Ok(actor_ref) + } +} + +pub struct Args { + pub keypair: Keypair, + pub config: Config, + pub metrics: SharedRegistry, +} + +pub enum State { + Stopped, + Running { + peers: BTreeSet, + subscribers: Vec>>, + ctrl_handle: CtrlHandle, + recv_task: JoinHandle<()>, + }, +} + +pub enum Msg { + /// Subscribe to gossip events + Subscribe(ActorRef>), + + /// Broadcast a message to all peers + Broadcast(Channel, MempoolTransactionBatch), + + /// Request the number of connected peers + GetState { reply: RpcReplyPort }, + + // Internal message + #[doc(hidden)] + NewEvent(Event), +} + +#[async_trait] +impl Actor for GossipMempool { + type Msg = Msg; + type State = State; + type Arguments = Args; + + async fn pre_start( + &self, + myself: ActorRef, + args: Args, + ) -> Result { + let handle = + malachite_gossip_mempool::spawn(args.keypair, args.config, args.metrics).await?; + let (mut recv_handle, ctrl_handle) = handle.split(); + + let recv_task = tokio::spawn({ + async move { + while let Some(event) = recv_handle.recv().await { + myself.cast(Msg::NewEvent(event)).unwrap(); // FIXME + } + } + }); + + Ok(State::Running { + peers: BTreeSet::new(), + subscribers: Vec::new(), + ctrl_handle, + recv_task, + }) + } + + async fn post_start( + &self, + _myself: ActorRef, + _state: &mut State, + ) -> Result<(), ActorProcessingErr> { + Ok(()) + } + + #[tracing::instrument(name = "gossip.mempool", skip(self, _myself, msg, state))] + async fn handle( + &self, + _myself: ActorRef, + msg: Msg, + state: &mut State, + ) -> Result<(), ActorProcessingErr> { + let State::Running { + peers, + subscribers, + ctrl_handle, + .. + } = state + else { + return Ok(()); + }; + + match msg { + Msg::Subscribe(subscriber) => subscribers.push(subscriber), + Msg::Broadcast(channel, batch) => { + let bytes = batch.to_bytes().unwrap(); + ctrl_handle.broadcast(channel, bytes).await? + } + Msg::NewEvent(event) => { + match event { + Event::PeerConnected(peer_id) => { + peers.insert(peer_id); + } + Event::PeerDisconnected(peer_id) => { + peers.remove(&peer_id); + } + _ => {} + } + + let event = Arc::new(event); + for subscriber in subscribers { + subscriber.cast(Arc::clone(&event))?; + } + } + Msg::GetState { reply } => { + let number_peers = match state { + State::Stopped => 0, + State::Running { peers, .. } => peers.len(), + }; + reply.send(number_peers)?; + } + } + + Ok(()) + } + + async fn post_stop( + &self, + _myself: ActorRef, + state: &mut State, + ) -> Result<(), ActorProcessingErr> { + let state = std::mem::replace(state, State::Stopped); + + if let State::Running { + ctrl_handle, + recv_task, + .. + } = state + { + ctrl_handle.wait_shutdown().await?; + recv_task.await?; + } + + Ok(()) + } +} diff --git a/code/crates/actors/src/host.rs b/code/crates/actors/src/host.rs new file mode 100644 index 000000000..2d7afcce3 --- /dev/null +++ b/code/crates/actors/src/host.rs @@ -0,0 +1,240 @@ +use std::marker::PhantomData; +use std::time::Duration; + +use derive_where::derive_where; +use ractor::{async_trait, Actor, ActorProcessingErr, ActorRef, RpcReplyPort}; +use tracing::info; + +use malachite_common::{Context, Round, SignedVote}; +use malachite_driver::Validity; + +use crate::consensus::{ConsensusRef, Msg as ConsensusMsg}; +use crate::value_builder::ValueBuilder; + +#[derive_where(Clone, Debug, PartialEq, Eq)] +pub struct LocallyProposedValue { + pub height: Ctx::Height, + pub round: Round, + pub value: Ctx::Value, +} + +/// Input to the round state machine. +#[derive_where(Clone, Debug, PartialEq, Eq)] +pub struct ReceivedProposedValue { + pub validator_address: Ctx::Address, + pub height: Ctx::Height, + pub round: Round, + pub value: Ctx::Value, + pub valid: Validity, +} + +pub type HostRef = ActorRef>; + +pub enum Msg { + // Request to build a local block/ value from Driver + GetValue { + height: Ctx::Height, + round: Round, + timeout_duration: Duration, + consensus: ConsensusRef, + address: Ctx::Address, + reply: RpcReplyPort>, + }, + + // BlockPart received <-- consensus <-- gossip + BlockPart { + block_part: Ctx::BlockPart, + reply_to: ConsensusRef, + }, + + // Retrieve a block/ value for which all parts have been received + GetReceivedValue { + height: Ctx::Height, + round: Round, + reply_to: RpcReplyPort>>, + }, + + GetValidatorSet { + height: Ctx::Height, + reply_to: RpcReplyPort, + }, + + // Decided value + DecidedOnValue { + height: Ctx::Height, + round: Round, + value: Ctx::Value, + commits: Vec>, + }, +} + +pub struct State { + validator_set: Ctx::ValidatorSet, + value_builder: Box>, +} + +pub struct Args { + validator_set: Ctx::ValidatorSet, + value_builder: Box>, +} + +pub struct Host { + marker: PhantomData, +} + +impl Host +where + Ctx: Context, +{ + pub async fn spawn( + value_builder: Box>, + validator_set: Ctx::ValidatorSet, + ) -> Result>, ActorProcessingErr> { + let (actor_ref, _) = Actor::spawn( + None, + Self { + marker: PhantomData, + }, + Args { + validator_set, + value_builder, + }, + ) + .await?; + + Ok(actor_ref) + } + + async fn get_value( + &self, + height: Ctx::Height, + round: Round, + timeout_duration: Duration, + address: Ctx::Address, + consensus: ConsensusRef, + value_builder: &mut dyn ValueBuilder, + ) -> Result, ActorProcessingErr> { + let value = value_builder + .build_value_locally(height, round, timeout_duration, address, consensus) + .await; + + match value { + Some(value) => Ok(value), + None => Err(eyre::eyre!("Value Builder failed to produce a value").into()), + } + } + + async fn build_value( + &self, + block_part: Ctx::BlockPart, + value_builder: &mut dyn ValueBuilder, + ) -> Result>, ActorProcessingErr> { + let value = value_builder.build_value_from_block_parts(block_part).await; + + if let Some(value) = &value { + info!("Value Builder received all parts, produced value for proposal: {value:?}",); + } + + Ok(value) + } +} + +#[async_trait] +impl Actor for Host { + type Msg = Msg; + type State = State; + type Arguments = Args; + + async fn pre_start( + &self, + _myself: ActorRef, + args: Self::Arguments, + ) -> Result { + Ok(State { + validator_set: args.validator_set, + value_builder: args.value_builder, + }) + } + + #[tracing::instrument(name = "host", skip(self, _myself, msg, state))] + async fn handle( + &self, + _myself: ActorRef, + msg: Self::Msg, + state: &mut Self::State, + ) -> Result<(), ActorProcessingErr> { + match msg { + Msg::GetValue { + height, + round, + timeout_duration, + consensus, + reply, + address, + } => { + let value = self + .get_value( + height, + round, + timeout_duration, + address, + consensus, + state.value_builder.as_mut(), + ) + .await?; + + reply.send(value)?; + } + + Msg::BlockPart { + block_part, + reply_to, + } => { + let maybe_block = self + .build_value(block_part, state.value_builder.as_mut()) + .await?; + + // Send the proposed value (from blockparts) to consensus/ Driver + if let Some(value_assembled) = maybe_block { + reply_to.cast(ConsensusMsg::BlockReceived(value_assembled))?; + } + } + + Msg::GetReceivedValue { + height, + round, + reply_to, + } => { + let value = state + .value_builder + .maybe_received_value(height, round) + .await; + + reply_to.send(value)?; + } + + Msg::DecidedOnValue { + height, + round, + value, + commits, + } => { + let _v = state + .value_builder + .decided_on_value(height, round, value, commits) + .await; + } + + Msg::GetValidatorSet { + height: _, + reply_to, + } => { + // FIXME: This is just a stub + let validator_set = state.validator_set.clone(); + reply_to.send(validator_set)?; + } + } + + Ok(()) + } +} diff --git a/code/crates/actors/src/lib.rs b/code/crates/actors/src/lib.rs new file mode 100644 index 000000000..f5b5acde6 --- /dev/null +++ b/code/crates/actors/src/lib.rs @@ -0,0 +1,10 @@ +pub mod consensus; +pub mod gossip_consensus; +pub mod gossip_mempool; +pub mod host; +pub mod mempool; +pub mod node; +pub mod prelude; +pub mod timers; +pub mod util; +pub mod value_builder; diff --git a/code/crates/actors/src/mempool.rs b/code/crates/actors/src/mempool.rs new file mode 100644 index 000000000..7d4972d5e --- /dev/null +++ b/code/crates/actors/src/mempool.rs @@ -0,0 +1,275 @@ +use std::collections::{BTreeMap, VecDeque}; +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::sync::Arc; + +use async_trait::async_trait; +use ractor::{Actor, ActorCell, ActorProcessingErr, ActorRef, RpcReplyPort}; +use rand::distributions::Uniform; +use rand::Rng; +use tracing::{info, trace}; + +use malachite_common::{MempoolTransactionBatch, Transaction, TransactionBatch}; +use malachite_gossip_mempool::{Channel, Event as GossipEvent, PeerId}; +use malachite_node::config::{MempoolConfig, TestConfig}; +use malachite_proto::Protobuf; + +use crate::gossip_mempool::{GossipMempoolRef, Msg as GossipMempoolMsg}; +use crate::util::forward; + +#[derive(Clone, Debug, PartialEq)] +pub enum NetworkMsg { + TransactionBatch(MempoolTransactionBatch), +} + +impl NetworkMsg { + pub fn from_network_bytes(bytes: &[u8]) -> Self { + let batch = Protobuf::from_bytes(bytes).unwrap(); // FIXME: Error handling + NetworkMsg::TransactionBatch(batch) + } + + pub fn to_network_bytes(&self) -> malachite_proto::MempoolTransactionBatch { + match self { + NetworkMsg::TransactionBatch(batch) => batch.to_proto().unwrap(), // FXME: Error handling + } + } +} + +pub type MempoolRef = ActorRef; + +pub struct Mempool { + gossip_mempool: GossipMempoolRef, + mempool_config: MempoolConfig, // todo - pick only what's needed + test_config: TestConfig, // todo - pick only the mempool related +} + +pub enum Msg { + GossipEvent(Arc), + Input(Transaction), + TxStream { + height: u64, + num_txes: usize, + reply: RpcReplyPort>, + }, + Update { + tx_hashes: Vec, + }, +} + +#[allow(dead_code)] +pub struct State { + msg_queue: VecDeque, + pub transactions: BTreeMap, +} + +impl State { + pub fn new() -> Self { + Self { + msg_queue: VecDeque::new(), + transactions: BTreeMap::new(), + } + } + + pub fn add_tx(&mut self, tx: &Transaction) { + let mut hash = DefaultHasher::new(); + tx.0.hash(&mut hash); + let key = hash.finish(); + self.transactions.entry(key).or_insert(tx.clone()); + } + + pub fn remove_tx(&mut self, hash: &u64) { + self.transactions.remove_entry(hash); + } +} + +impl Default for State { + fn default() -> Self { + Self::new() + } +} + +impl Mempool { + pub fn new( + gossip_mempool: GossipMempoolRef, + mempool_config: MempoolConfig, + test_config: TestConfig, + ) -> Self { + Self { + gossip_mempool, + mempool_config, + test_config, + } + } + + pub async fn spawn( + gossip_mempool: GossipMempoolRef, + mempool_config: &MempoolConfig, + test_config: &TestConfig, + supervisor: Option, + ) -> Result, ractor::SpawnErr> { + let node = Self::new(gossip_mempool, mempool_config.clone(), *test_config); + + let (actor_ref, _) = if let Some(supervisor) = supervisor { + Actor::spawn_linked(None, node, (), supervisor).await? + } else { + Actor::spawn(None, node, ()).await? + }; + + Ok(actor_ref) + } + + pub async fn handle_gossip_event( + &self, + event: &GossipEvent, + myself: ActorRef, + state: &mut State, + ) -> Result<(), ractor::ActorProcessingErr> { + match event { + GossipEvent::Listening(addr) => { + info!("Listening on {addr}"); + } + GossipEvent::PeerConnected(peer_id) => { + info!("Connected to peer {peer_id}"); + } + GossipEvent::PeerDisconnected(peer_id) => { + info!("Disconnected from peer {peer_id}"); + } + GossipEvent::Message(from, Channel::Mempool, data) => { + trace!(%from, "Received message of size {} bytes", data.len()); + + let msg = NetworkMsg::from_network_bytes(data); + self.handle_network_msg(from, msg, myself, state).await?; + } + } + + Ok(()) + } + + pub async fn handle_network_msg( + &self, + from: &PeerId, + msg: NetworkMsg, + myself: ActorRef, + _state: &mut State, + ) -> Result<(), ractor::ActorProcessingErr> { + match msg { + NetworkMsg::TransactionBatch(batch) => { + trace!(%from, "Received batch with {} transactions", batch.len()); + + for tx in batch.transaction_batch.into_transactions() { + myself.cast(Msg::Input(tx))?; + } + } + } + + Ok(()) + } +} + +#[async_trait] +impl Actor for Mempool { + type Msg = Msg; + type State = State; + type Arguments = (); + + async fn pre_start( + &self, + myself: ActorRef, + _args: (), + ) -> Result { + let forward = forward(myself.clone(), Some(myself.get_cell()), Msg::GossipEvent).await?; + self.gossip_mempool + .cast(GossipMempoolMsg::Subscribe(forward))?; + + Ok(State::new()) + } + + #[tracing::instrument(name = "mempool", skip(self, myself, msg, state))] + async fn handle( + &self, + myself: ActorRef, + msg: Msg, + state: &mut State, + ) -> Result<(), ractor::ActorProcessingErr> { + match msg { + Msg::GossipEvent(event) => { + self.handle_gossip_event(&event, myself, state).await?; + } + + Msg::Input(tx) => { + if state.transactions.len() < self.mempool_config.max_tx_count { + state.add_tx(&tx); + } else { + trace!("Mempool is full, dropping transaction"); + } + } + + Msg::TxStream { + reply, num_txes, .. + } => { + let txes = generate_and_broadcast_txes( + num_txes, + self.test_config.tx_size.as_u64(), + &self.mempool_config, + state, + &self.gossip_mempool, + )?; + + reply.send(txes)?; + } + + Msg::Update { .. } => { + //tx_hashes.iter().for_each(|hash| state.remove_tx(hash)); + // TODO - reset the mempool for now + state.transactions = BTreeMap::new(); + info!("Mempool after Update has size {}", state.transactions.len()); + } + } + + Ok(()) + } + + async fn post_stop( + &self, + _myself: ActorRef, + _state: &mut State, + ) -> Result<(), ActorProcessingErr> { + info!("Stopping..."); + + Ok(()) + } +} + +fn generate_and_broadcast_txes( + count: usize, + size: u64, + config: &MempoolConfig, + state: &mut State, + gossip_mempool: &GossipMempoolRef, +) -> Result, ActorProcessingErr> { + let mut transactions = vec![]; + let mut tx_batch = TransactionBatch::default(); + let mut rng = rand::thread_rng(); + + for _ in 0..count { + // Generate transaction + let range = Uniform::new(32, 64); + let tx_bytes: Vec = (0..size).map(|_| rng.sample(range)).collect(); + let tx = Transaction::new(tx_bytes); + + // Add transaction to state + if state.transactions.len() < config.max_tx_count { + state.add_tx(&tx); + } + tx_batch.push(tx.clone()); + + // Gossip tx-es to peers in batches + if config.gossip_batch_size > 0 && tx_batch.len() >= config.gossip_batch_size { + let mempool_batch = MempoolTransactionBatch::new(std::mem::take(&mut tx_batch)); + gossip_mempool.cast(GossipMempoolMsg::Broadcast(Channel::Mempool, mempool_batch))?; + } + + transactions.push(tx); + } + + Ok(transactions) +} diff --git a/code/crates/actors/src/node.rs b/code/crates/actors/src/node.rs new file mode 100644 index 000000000..52b5440e0 --- /dev/null +++ b/code/crates/actors/src/node.rs @@ -0,0 +1,109 @@ +use async_trait::async_trait; +use ractor::{Actor, ActorRef}; +use tokio::sync::mpsc; +use tokio::task::JoinHandle; + +use malachite_common::{Context, Round}; +use malachite_proto::Protobuf; +use malachite_vote::ThresholdParams; + +use crate::consensus::ConsensusRef; +use crate::gossip_consensus::GossipConsensusRef; +use crate::gossip_mempool::GossipMempoolRef; +use crate::host::HostRef; +use crate::mempool::MempoolRef; +use crate::timers::Config as TimersConfig; + +pub type NodeRef = ActorRef<()>; + +pub struct Params { + pub address: Ctx::Address, + pub initial_validator_set: Ctx::ValidatorSet, + pub keypair: malachite_gossip_consensus::Keypair, + pub start_height: Ctx::Height, + pub threshold_params: ThresholdParams, + pub timers_config: TimersConfig, + pub gossip_mempool: GossipMempoolRef, + pub mempool: MempoolRef, + pub tx_decision: mpsc::Sender<(Ctx::Height, Round, Ctx::Value)>, +} + +#[allow(dead_code)] +pub struct Node { + ctx: Ctx, + gossip_consensus: GossipConsensusRef, + consensus: ConsensusRef, + gossip_mempool: GossipMempoolRef, + mempool: MempoolRef, + host: HostRef, + start_height: Ctx::Height, +} + +impl Node +where + Ctx: Context, + Ctx::Vote: Protobuf, + Ctx::Proposal: Protobuf, +{ + #[allow(clippy::too_many_arguments)] + pub fn new( + ctx: Ctx, + gossip_consensus: GossipConsensusRef, + consensus: ConsensusRef, + gossip_mempool: GossipMempoolRef, + mempool: MempoolRef, + host: HostRef, + start_height: Ctx::Height, + ) -> Self { + Self { + ctx, + gossip_consensus, + consensus, + gossip_mempool, + mempool, + host, + start_height, + } + } + + pub async fn spawn(self) -> Result<(ActorRef<()>, JoinHandle<()>), ractor::SpawnErr> { + Actor::spawn(None, self, ()).await + } +} + +#[async_trait] +impl Actor for Node +where + Ctx: Context, + Ctx::Vote: Protobuf, + Ctx::Proposal: Protobuf, +{ + type Msg = (); + type State = (); + type Arguments = (); + + async fn pre_start( + &self, + myself: ActorRef, + _args: (), + ) -> Result<(), ractor::ActorProcessingErr> { + // Set ourselves as the supervisor of the other actors + self.gossip_consensus.link(myself.get_cell()); + self.consensus.link(myself.get_cell()); + self.gossip_mempool.link(myself.get_cell()); + self.mempool.link(myself.get_cell()); + self.host.link(myself.get_cell()); + + Ok(()) + } + + #[tracing::instrument(name = "node", skip(self, _myself, _msg, _state))] + async fn handle( + &self, + _myself: ActorRef, + _msg: Self::Msg, + _state: &mut (), + ) -> Result<(), ractor::ActorProcessingErr> { + Ok(()) + } +} diff --git a/code/actors/src/prelude.rs b/code/crates/actors/src/prelude.rs similarity index 100% rename from code/actors/src/prelude.rs rename to code/crates/actors/src/prelude.rs diff --git a/code/actors/src/timers.rs b/code/crates/actors/src/timers.rs similarity index 74% rename from code/actors/src/timers.rs rename to code/crates/actors/src/timers.rs index 3cdcb484a..7be2ea3b5 100644 --- a/code/actors/src/timers.rs +++ b/code/crates/actors/src/timers.rs @@ -18,9 +18,11 @@ impl TimeoutElapsed { } } +pub type TimersRef = ActorRef; + pub struct Timers { - config: Config, listener: ActorRef, + initial_config: Config, } impl Timers @@ -28,34 +30,50 @@ where M: From + ractor::Message, { pub async fn spawn( - config: Config, + initial_config: Config, listener: ActorRef, ) -> Result<(ActorRef, JoinHandle<()>), ractor::SpawnErr> { - Actor::spawn(None, Self { config, listener }, ()).await + Actor::spawn( + None, + Self { + listener, + initial_config, + }, + initial_config, + ) + .await } pub async fn spawn_linked( - config: Config, + initial_config: Config, listener: ActorRef, supervisor: ActorCell, ) -> Result<(ActorRef, JoinHandle<()>), ractor::SpawnErr> { - Actor::spawn_linked(None, Self { config, listener }, (), supervisor).await - } - - pub fn timeout_duration(&self, step: &TimeoutStep) -> Duration { - match step { - TimeoutStep::Propose => self.config.timeout_propose, - TimeoutStep::Prevote => self.config.timeout_prevote, - TimeoutStep::Precommit => self.config.timeout_precommit, - TimeoutStep::Commit => self.config.timeout_commit, - } + Actor::spawn_linked( + None, + Self { + listener, + initial_config, + }, + initial_config, + supervisor, + ) + .await } } pub enum Msg { + /// Schedule the given timeout ScheduleTimeout(Timeout), + + /// Cancel the given timeout CancelTimeout(Timeout), - Reset, + + /// Cancel all the timeouts + CancelAllTimeouts, + + /// Reset all timeouts values to their original values + ResetTimeouts, // Internal messages #[doc(hidden)] @@ -66,9 +84,41 @@ type TimerTask = JoinHandle>>; #[derive(Default)] pub struct State { + config: Config, timers: HashMap, } +impl State { + pub fn timeout_elapsed(&mut self, timeout: &Timeout) { + self.timers.remove(timeout); + self.increase_timeout(&timeout.step); + } + + pub fn increase_timeout(&mut self, step: &TimeoutStep) { + match step { + TimeoutStep::Propose => { + self.config.timeout_propose += self.config.timeout_propose_delta + } + TimeoutStep::Prevote => { + self.config.timeout_prevote += self.config.timeout_prevote_delta + } + TimeoutStep::Precommit => { + self.config.timeout_precommit += self.config.timeout_precommit_delta + } + TimeoutStep::Commit => (), + } + } + + pub fn timeout_duration(&self, step: &TimeoutStep) -> Duration { + match step { + TimeoutStep::Propose => self.config.timeout_propose, + TimeoutStep::Prevote => self.config.timeout_prevote, + TimeoutStep::Precommit => self.config.timeout_precommit, + TimeoutStep::Commit => self.config.timeout_commit, + } + } +} + #[async_trait] impl Actor for Timers where @@ -76,16 +126,20 @@ where { type Msg = Msg; type State = State; - type Arguments = (); + type Arguments = Config; async fn pre_start( &self, _myself: ActorRef, - _args: (), + config: Config, ) -> Result { - Ok(State::default()) + Ok(State { + config, + ..Default::default() + }) } + #[tracing::instrument(name = "timers", skip(self, myself, msg, state))] async fn handle( &self, myself: ActorRef, @@ -94,7 +148,7 @@ where ) -> Result<(), ActorProcessingErr> { match msg { Msg::ScheduleTimeout(timeout) => { - let duration = self.timeout_duration(&timeout.step); + let duration = state.timeout_duration(&timeout.step); let task = send_after(duration, myself.get_cell(), move || { Msg::TimeoutElapsed(timeout) }); @@ -108,15 +162,21 @@ where } } - Msg::Reset => { + Msg::TimeoutElapsed(timeout) => { + state.timeout_elapsed(&timeout); + self.listener.cast(TimeoutElapsed(timeout).into())?; + } + + Msg::CancelAllTimeouts => { + // Cancel all the timers for (_, task) in state.timers.drain() { task.abort(); } } - Msg::TimeoutElapsed(timeout) => { - state.timers.remove(&timeout); - self.listener.cast(TimeoutElapsed(timeout).into())?; + Msg::ResetTimeouts => { + // Reset the timeouts to their original values + state.config = self.initial_config; } } diff --git a/code/actors/src/util/forward.rs b/code/crates/actors/src/util/forward.rs similarity index 100% rename from code/actors/src/util/forward.rs rename to code/crates/actors/src/util/forward.rs diff --git a/code/crates/actors/src/util/mod.rs b/code/crates/actors/src/util/mod.rs new file mode 100644 index 000000000..99a32f5d2 --- /dev/null +++ b/code/crates/actors/src/util/mod.rs @@ -0,0 +1,3 @@ +mod forward; + +pub use forward::{forward, Forward}; diff --git a/code/crates/actors/src/value_builder.rs b/code/crates/actors/src/value_builder.rs new file mode 100644 index 000000000..1bf8cf639 --- /dev/null +++ b/code/crates/actors/src/value_builder.rs @@ -0,0 +1,39 @@ +use std::time::Duration; + +use async_trait::async_trait; + +use malachite_common::{Context, Round, SignedVote}; + +use crate::consensus::ConsensusRef; +use crate::host::{LocallyProposedValue, ReceivedProposedValue}; + +#[async_trait] +pub trait ValueBuilder: Send + Sync + 'static { + async fn build_value_locally( + &mut self, + height: Ctx::Height, + round: Round, + timeout_duration: Duration, + address: Ctx::Address, + consensus: ConsensusRef, + ) -> Option>; + + async fn build_value_from_block_parts( + &mut self, + block_part: Ctx::BlockPart, + ) -> Option>; + + async fn maybe_received_value( + &mut self, + height: Ctx::Height, + round: Round, + ) -> Option>; + + async fn decided_on_value( + &mut self, + height: Ctx::Height, + round: Round, + value: Ctx::Value, + commits: Vec>, + ); +} diff --git a/code/crates/cli/Cargo.toml b/code/crates/cli/Cargo.toml new file mode 100644 index 000000000..aa77c4755 --- /dev/null +++ b/code/crates/cli/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "malachite-cli" +version.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +malachite-metrics.workspace = true +malachite-node.workspace = true +malachite-test.workspace = true +malachite-test-app.workspace = true + +axum = { workspace = true } +bytesize = { workspace = true } +clap = { workspace = true, features = ["derive", "env"] } +color-eyre = { workspace = true } +config = { workspace = true } +directories = { workspace = true } +itertools = { workspace = true } +tokio = { workspace = true, features = ["full"] } +tracing = { workspace = true } +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +rand = { workspace = true } +toml = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/code/crates/cli/src/args.rs b/code/crates/cli/src/args.rs new file mode 100644 index 000000000..6b9863313 --- /dev/null +++ b/code/crates/cli/src/args.rs @@ -0,0 +1,182 @@ +//! Node command-line interface configuration +//! +//! The node CLI reads configuration from the configuration files found in the directory +//! provided with the `--home` global parameter. +//! +//! The command-line parameters are stored in the `Args` structure. +//! `clap` parses the command-line parameters into this structure. + +use std::path::{Path, PathBuf}; + +use clap::{Parser, Subcommand}; +use color_eyre::eyre::{eyre, Context, Result}; +use directories::BaseDirs; +use tracing::info; + +use malachite_node::config::Config; +use malachite_test::{PrivateKey, ValidatorSet}; + +use crate::cmd::keys::KeysCmd; +use crate::cmd::testnet::TestnetCmd; +use crate::logging::DebugSection; +use crate::priv_key::PrivValidatorKey; + +const APP_FOLDER: &str = ".malachite"; +const CONFIG_FILE: &str = "config.toml"; +const GENESIS_FILE: &str = "genesis.json"; +const PRIV_VALIDATOR_KEY_FILE: &str = "priv_validator_key.json"; + +#[derive(Parser, Clone, Debug, Default)] +#[command(version, about, long_about = None)] +pub struct Args { + /// Home directory for Malachite (default: `~/.malachite`) + #[arg(long, global = true, value_name = "HOME_DIR")] + pub home: Option, + + #[clap( + long, + global = true, + help = "Enable debug output for the given comma-separated sections", + value_enum, + value_delimiter = ',' + )] + pub debug: Vec, + + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand, Clone, Debug, Default)] +pub enum Commands { + /// Start node + #[default] + Start, + + /// Initialize configuration + Init, + + /// Manage keys + #[command(subcommand)] + Keys(KeysCmd), + + /// Generate testnet configuration + Testnet(TestnetCmd), +} + +impl Args { + /// new returns a new instance of the configuration. + pub fn new() -> Args { + Args::parse() + } + + /// get_home_dir returns the application home folder. + /// Typically, `$HOME/.malachite`, dependent on the operating system. + pub fn get_home_dir(&self) -> Result { + match self.home { + Some(ref path) => Ok(path.clone()), + None => Ok(BaseDirs::new() + .ok_or_else(|| eyre!("could not determine home directory path"))? + .home_dir() + .join(APP_FOLDER)), + } + } + + /// get_config_dir returns the configuration folder based on the home folder. + pub fn get_config_dir(&self) -> Result { + Ok(self.get_home_dir()?.join("config")) + } + + /// get_config_file_path returns the configuration file path based on the command-line arguments + /// and the configuration folder. + pub fn get_config_file_path(&self) -> Result { + Ok(self.get_config_dir()?.join(CONFIG_FILE)) + } + + /// get_genesis_file_path returns the genesis file path based on the command-line arguments and + /// the configuration folder. + pub fn get_genesis_file_path(&self) -> Result { + Ok(self.get_config_dir()?.join(GENESIS_FILE)) + } + + /// get_priv_validator_key_file_path returns the private validator key file path based on the + /// configuration folder. + pub fn get_priv_validator_key_file_path(&self) -> Result { + Ok(self.get_config_dir()?.join(PRIV_VALIDATOR_KEY_FILE)) + } + + /// load_config returns a configuration compiled from the input parameters + pub fn load_config(&self) -> Result { + let config_file = self.get_config_file_path()?; + info!("Loading configuration from {:?}", config_file.display()); + + let config = config::Config::builder() + .add_source(config::File::from(config_file)) + .add_source(config::Environment::with_prefix("MALACHITE").separator("__")) + .build()? + .try_deserialize()?; + + Ok(config) + } + + /// load_genesis returns the validator set from the genesis file + pub fn load_genesis(&self) -> Result { + let genesis_file = self.get_genesis_file_path()?; + info!("Loading genesis from {:?}", genesis_file.display()); + load_json_file(&genesis_file) + } + + /// load_private_key returns the private key either from the command-line parameter or + /// from the priv_validator_key.json file. + pub fn load_private_key(&self) -> Result { + let priv_key_file = self.get_priv_validator_key_file_path()?; + info!("Loading private key from {:?}", priv_key_file.display()); + let priv_validator_key: PrivValidatorKey = load_json_file(&priv_key_file)?; + Ok(priv_validator_key.private_key) + } +} + +fn load_json_file(file: &Path) -> Result +where + T: for<'de> serde::Deserialize<'de>, +{ + let content = std::fs::read_to_string(file) + .wrap_err_with(|| eyre!("Failed to read configuration file at {}", file.display()))?; + + serde_json::from_str(&content) + .wrap_err_with(|| eyre!("Failed to load configuration at {}", file.display(),)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn args_struct() { + let args = Args::parse_from(["test", "--debug", "ractor", "init"]); + assert_eq!(args.debug, vec![DebugSection::Ractor]); + assert!(matches!(args.command, Commands::Init)); + + let args = Args::parse_from(["test", "start"]); + assert_eq!(args.debug, vec![]); + assert!(matches!(args.command, Commands::Start)); + } + + #[test] + fn args_methods() { + use std::io::Write; + use tempfile::NamedTempFile; + + #[derive(serde::Deserialize)] + struct TestStruct {} + + let args = Args::parse_from(["test", "start"]); + assert!(args.get_config_file_path().is_ok()); + assert!(args.get_genesis_file_path().is_ok()); + assert!(load_json_file::(&PathBuf::from("nonexistent.json")).is_err()); + + let tmpfile = NamedTempFile::new().unwrap(); + let mut file = tmpfile.as_file(); + writeln!(file, "{{}}").unwrap(); + assert!(load_json_file::(&PathBuf::from(tmpfile.path())).is_ok()); + } +} diff --git a/code/crates/cli/src/cmd/init.rs b/code/crates/cli/src/cmd/init.rs new file mode 100644 index 000000000..1a362e2ef --- /dev/null +++ b/code/crates/cli/src/cmd/init.rs @@ -0,0 +1,102 @@ +//! Init command + +use std::fs; +use std::path::Path; + +use color_eyre::eyre::{eyre, Context, Result}; +use tracing::{info, warn}; + +use malachite_node::config::Config; +use malachite_test::ValidatorSet as Genesis; + +use crate::cmd::testnet::{generate_config, generate_genesis, generate_private_keys}; +use crate::priv_key::PrivValidatorKey; + +/// Execute the init command +pub fn run(config_file: &Path, genesis_file: &Path, priv_validator_key_file: &Path) -> Result<()> { + // Save default configuration + if config_file.exists() { + warn!( + "Configuration file already exists at {:?}, skipping", + config_file.display() + ) + } else { + info!("Saving configuration to {:?}", config_file); + save_config(config_file, &generate_config(0, 1))?; + } + + // Save default genesis + if genesis_file.exists() { + warn!( + "Genesis file already exists at {:?}, skipping", + genesis_file.display() + ) + } else { + let private_keys = generate_private_keys(1, true); + let public_keys = private_keys.iter().map(|pk| pk.public_key()).collect(); + let genesis = generate_genesis(public_keys, true); + info!("Saving test genesis to {:?}.", genesis_file); + save_genesis(genesis_file, &genesis)?; + } + + // Save default priv_validator_key + if priv_validator_key_file.exists() { + warn!( + "Private key file already exists at {:?}, skipping", + priv_validator_key_file.display() + ) + } else { + info!("Saving private key to {:?}", priv_validator_key_file); + let private_keys = generate_private_keys(1, false); + let priv_validator_key = PrivValidatorKey::from(private_keys[0].clone()); + save_priv_validator_key(priv_validator_key_file, &priv_validator_key)?; + } + + Ok(()) +} + +/// Save configuration to file +pub fn save_config(config_file: &Path, config: &Config) -> Result<()> { + save(config_file, &toml::to_string_pretty(config)?) +} + +/// Save genesis to file +pub fn save_genesis(genesis_file: &Path, genesis: &Genesis) -> Result<()> { + save(genesis_file, &serde_json::to_string_pretty(genesis)?) +} + +/// Save private_key validator key to file +pub fn save_priv_validator_key( + priv_validator_key_file: &Path, + priv_validator_key: &PrivValidatorKey, +) -> Result<()> { + save( + priv_validator_key_file, + &serde_json::to_string_pretty(priv_validator_key)?, + ) +} + +fn save(path: &Path, data: &str) -> Result<()> { + use std::io::Write; + + if let Some(parent_dir) = path.parent() { + fs::create_dir_all(parent_dir).wrap_err_with(|| { + eyre!( + "Failed to create parent directory {:?}", + parent_dir.display() + ) + })?; + } + + let mut f = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(path) + .wrap_err_with(|| eyre!("Failed to crate configuration file at {:?}", path.display()))?; + + f.write_all(data.as_bytes()) + .wrap_err_with(|| eyre!("Failed to write configuration to {:?}", path.display()))?; + + Ok(()) +} diff --git a/code/crates/cli/src/cmd/keys/generate.rs b/code/crates/cli/src/cmd/keys/generate.rs new file mode 100644 index 000000000..c776c2afb --- /dev/null +++ b/code/crates/cli/src/cmd/keys/generate.rs @@ -0,0 +1,31 @@ +use std::path::PathBuf; + +use color_eyre::eyre::Result; +use malachite_test::{Address, PrivateKey}; +use tracing::info; + +use crate::args::Args; + +#[derive(clap::Args, Clone, Debug)] +pub struct GenerateCmd { + #[clap(short, long, value_name = "OUTPUT_FILE")] + output: PathBuf, +} + +impl GenerateCmd { + pub fn run(&self, _args: &Args) -> Result<()> { + let rng = rand::thread_rng(); + let pk = PrivateKey::generate(rng); + + let address = Address::from_public_key(&pk.public_key()); + info!("Generated key with address: {address}"); + + let public_key = pk.public_key(); + info!("Public key: {}", serde_json::to_string_pretty(&public_key)?); + + info!("Saving private key to {:?}", self.output); + std::fs::write(&self.output, serde_json::to_vec(&pk)?)?; + + Ok(()) + } +} diff --git a/code/crates/cli/src/cmd/keys/mod.rs b/code/crates/cli/src/cmd/keys/mod.rs new file mode 100644 index 000000000..0595e328a --- /dev/null +++ b/code/crates/cli/src/cmd/keys/mod.rs @@ -0,0 +1,21 @@ +use clap::Subcommand; +use color_eyre::eyre::Result; + +use crate::args::Args; + +pub mod generate; + +/// Manage keys +#[derive(Subcommand, Clone, Debug)] +pub enum KeysCmd { + /// Generate a new key + Generate(generate::GenerateCmd), +} + +impl KeysCmd { + pub fn run(&self, args: &Args) -> Result<()> { + match self { + KeysCmd::Generate(cmd) => cmd.run(args), + } + } +} diff --git a/code/crates/cli/src/cmd/mod.rs b/code/crates/cli/src/cmd/mod.rs new file mode 100644 index 000000000..bc66285fd --- /dev/null +++ b/code/crates/cli/src/cmd/mod.rs @@ -0,0 +1,4 @@ +pub mod init; +pub mod keys; +pub mod start; +pub mod testnet; diff --git a/code/crates/cli/src/cmd/start.rs b/code/crates/cli/src/cmd/start.rs new file mode 100644 index 000000000..7ac127f6a --- /dev/null +++ b/code/crates/cli/src/cmd/start.rs @@ -0,0 +1,50 @@ +use color_eyre::eyre::Result; + +use tokio::sync::mpsc; +use tracing::{info, Instrument}; + +use malachite_node::config::Config; +use malachite_test::{Address, PrivateKey, ValidatorSet}; +use malachite_test_app::spawn::spawn_node_actor; + +use crate::metrics; + +pub async fn run(sk: PrivateKey, cfg: Config, vs: ValidatorSet) -> Result<()> { + let val_address = Address::from_public_key(&sk.public_key()); + let moniker = cfg.moniker.clone(); + + let span = tracing::error_span!("node", %moniker); + let _enter = span.enter(); + + if cfg.metrics.enabled { + tokio::spawn(metrics::serve(cfg.metrics.clone()).instrument(span.clone())); + } + + info!("Node is starting..."); + + let (tx_decision, mut rx_decision) = mpsc::channel(32); + let (actor, handle) = spawn_node_actor(cfg, vs, sk.clone(), sk, val_address, tx_decision).await; + + tokio::spawn({ + let actor = actor.clone(); + { + async move { + tokio::signal::ctrl_c().await.unwrap(); + info!("Shutting down..."); + actor.stop(None); + } + } + .instrument(span.clone()) + }); + + while let Some((height, round, value)) = rx_decision.recv().await { + info!( + "Decision at height {height} and round {round}: {:?}", + value.id() + ); + } + + handle.await?; + + Ok(()) +} diff --git a/code/crates/cli/src/cmd/testnet.rs b/code/crates/cli/src/cmd/testnet.rs new file mode 100644 index 000000000..12a27b2cc --- /dev/null +++ b/code/crates/cli/src/cmd/testnet.rs @@ -0,0 +1,166 @@ +//! Testnet command + +use std::path::Path; + +use bytesize::ByteSize; +use clap::Parser; +use color_eyre::eyre::Result; +use rand::prelude::StdRng; +use rand::rngs::OsRng; +use rand::{Rng, SeedableRng}; +use tracing::info; + +use malachite_node::config::{ + Config, ConsensusConfig, MempoolConfig, MetricsConfig, P2pConfig, RuntimeConfig, TimeoutConfig, +}; +use malachite_test::ValidatorSet as Genesis; +use malachite_test::{PrivateKey, PublicKey, Validator}; + +use crate::args::Args; +use crate::cmd::init::{save_config, save_genesis, save_priv_validator_key}; +use crate::priv_key::PrivValidatorKey; + +const MIN_VOTING_POWER: u64 = 8; +const MAX_VOTING_POWER: u64 = 15; + +#[derive(Parser, Debug, Clone, PartialEq)] +pub struct TestnetCmd { + /// Number of validator nodes in the testnet + #[clap(short, long)] + pub nodes: usize, + + /// Generate deterministic private keys for reproducibility + #[clap(short, long)] + pub deterministic: bool, +} + +impl TestnetCmd { + /// Execute the testnet command + pub fn run(&self, home_dir: &Path) -> Result<()> { + let private_keys = generate_private_keys(self.nodes, self.deterministic); + let public_keys = private_keys.iter().map(|pk| pk.public_key()).collect(); + let genesis = generate_genesis(public_keys, self.deterministic); + + for (i, private_key) in private_keys.iter().enumerate().take(self.nodes) { + // Use home directory `home_dir/` + let node_home_dir = home_dir.join(i.to_string()); + + info!( + "Generating configuration for node {i} at `{}`...", + node_home_dir.display() + ); + + // Set the destination folder + let args = Args { + home: Some(node_home_dir), + ..Args::default() + }; + + // Save private key + let priv_validator_key = PrivValidatorKey::from(private_key.clone()); + save_priv_validator_key( + &args.get_priv_validator_key_file_path()?, + &priv_validator_key, + )?; + + // Save genesis + save_genesis(&args.get_genesis_file_path()?, &genesis)?; + + // Save config + save_config( + &args.get_config_file_path()?, + &generate_config(i, self.nodes), + )?; + } + Ok(()) + } +} + +/// Generate private keys. Random or deterministic for different use-cases. +pub fn generate_private_keys(size: usize, deterministic: bool) -> Vec { + if deterministic { + let mut rng = StdRng::seed_from_u64(0x42); + (0..size).map(|_| PrivateKey::generate(&mut rng)).collect() + } else { + (0..size).map(|_| PrivateKey::generate(OsRng)).collect() + } +} + +/// Generate a Genesis file from the public keys and voting power. +/// Voting power can be random or deterministically pseudo-random. +pub fn generate_genesis(pks: Vec, deterministic: bool) -> Genesis { + let size = pks.len(); + let voting_powers: Vec = if deterministic { + let mut rng = StdRng::seed_from_u64(0x42); + (0..size) + .map(|_| rng.gen_range(MIN_VOTING_POWER..=MAX_VOTING_POWER)) + .collect() + } else { + (0..size) + .map(|_| OsRng.gen_range(MIN_VOTING_POWER..=MAX_VOTING_POWER)) + .collect() + }; + + let mut validators = Vec::with_capacity(size); + + for i in 0..size { + validators.push(Validator::new(pks[i], voting_powers[i])); + } + + Genesis { validators } +} + +const CONSENSUS_BASE_PORT: usize = 27000; +const MEMPOOL_BASE_PORT: usize = 28000; +const METRICS_BASE_PORT: usize = 29000; + +/// Generate configuration for node "index" out of "total" number of nodes. +pub fn generate_config(index: usize, total: usize) -> Config { + let consensus_port = CONSENSUS_BASE_PORT + index; + let mempool_port = MEMPOOL_BASE_PORT + index; + let metrics_port = METRICS_BASE_PORT + index; + + Config { + moniker: format!("test-{}", index), + consensus: ConsensusConfig { + max_block_size: ByteSize::mib(1), + timeouts: TimeoutConfig::default(), + p2p: P2pConfig { + listen_addr: format!("/ip4/127.0.0.1/udp/{consensus_port}/quic-v1") + .parse() + .unwrap(), + persistent_peers: (0..total) + .filter(|j| *j != index) + .map(|j| { + format!("/ip4/127.0.0.1/udp/{}/quic-v1", CONSENSUS_BASE_PORT + j) + .parse() + .unwrap() + }) + .collect(), + }, + }, + mempool: MempoolConfig { + p2p: P2pConfig { + listen_addr: format!("/ip4/127.0.0.1/udp/{mempool_port}/quic-v1") + .parse() + .unwrap(), + persistent_peers: (0..total) + .filter(|j| *j != index) + .map(|j| { + format!("/ip4/127.0.0.1/udp/{}/quic-v1", MEMPOOL_BASE_PORT + j) + .parse() + .unwrap() + }) + .collect(), + }, + max_tx_count: 10000, + gossip_batch_size: 100, + }, + metrics: MetricsConfig { + enabled: true, + listen_addr: format!("127.0.0.1:{metrics_port}").parse().unwrap(), + }, + runtime: RuntimeConfig::single_threaded(), + test: Default::default(), + } +} diff --git a/code/cli/src/logging.rs b/code/crates/cli/src/logging.rs similarity index 100% rename from code/cli/src/logging.rs rename to code/crates/cli/src/logging.rs diff --git a/code/crates/cli/src/main.rs b/code/crates/cli/src/main.rs new file mode 100644 index 000000000..0760a045e --- /dev/null +++ b/code/crates/cli/src/main.rs @@ -0,0 +1,146 @@ +use color_eyre::eyre::Result; +use tracing::debug; + +use malachite_node::config::{Config, RuntimeConfig}; +use malachite_test::{PrivateKey, ValidatorSet}; + +use crate::args::{Args, Commands}; +use crate::cmd::keys::KeysCmd; +use crate::cmd::testnet::TestnetCmd; +use crate::logging::LogLevel; + +mod args; +mod cmd; +mod logging; +mod metrics; +mod priv_key; + +pub fn main() -> Result<()> { + let args = Args::new(); + + logging::init(LogLevel::Debug, &args.debug); + + debug!("Command-line parameters: {args:?}"); + + match &args.command { + Commands::Start => start(&args), + Commands::Init => init(&args), + Commands::Keys(cmd) => keys(&args, cmd), + Commands::Testnet(cmd) => testnet(&args, cmd), + } +} + +fn start(args: &Args) -> Result<()> { + use tokio::runtime::Builder as RtBuilder; + + let cfg: Config = args.load_config()?; + let sk: PrivateKey = args.load_private_key()?; + let vs: ValidatorSet = args.load_genesis()?; + + let mut builder = match cfg.runtime { + RuntimeConfig::SingleThreaded => RtBuilder::new_current_thread(), + RuntimeConfig::MultiThreaded { worker_threads } => { + let mut builder = RtBuilder::new_multi_thread(); + if worker_threads > 0 { + builder.worker_threads(worker_threads); + } + builder + } + }; + + let rt = builder.enable_all().build()?; + rt.block_on(cmd::start::run(sk, cfg, vs)) +} + +fn init(args: &Args) -> Result<()> { + cmd::init::run( + &args.get_config_file_path()?, + &args.get_genesis_file_path()?, + &args.get_priv_validator_key_file_path()?, + ) +} + +fn keys(args: &Args, cmd: &KeysCmd) -> Result<()> { + cmd.run(args) +} + +fn testnet(args: &Args, cmd: &TestnetCmd) -> Result<()> { + cmd.run(&args.get_home_dir()?) +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::path::PathBuf; + + use clap::Parser; + use color_eyre::eyre; + + use super::*; + + #[test] + fn running_init_creates_config_files() -> eyre::Result<()> { + let tmp = tempfile::tempdir()?; + let config_dir = tmp.path().join("config"); + + let args = Args::parse_from(["test", "--home", tmp.path().to_str().unwrap(), "init"]); + + init(&args)?; + + let files = fs::read_dir(&config_dir)?.flatten().collect::>(); + + dbg!(&files); + + assert!(has_file(&files, &config_dir.join("config.toml"))); + assert!(has_file(&files, &config_dir.join("genesis.json"))); + assert!(has_file( + &files, + &config_dir.join("priv_validator_key.json") + )); + + Ok(()) + } + + #[test] + fn running_testnet_creates_all_configs() -> eyre::Result<()> { + let tmp = tempfile::tempdir()?; + + let args = Args::parse_from([ + "test", + "--home", + tmp.path().to_str().unwrap(), + "testnet", + "--nodes", + "3", + ]); + + let Commands::Testnet(ref testnet_args) = args.command else { + panic!("not testnet command"); + }; + + testnet(&args, testnet_args)?; + + let files = fs::read_dir(&tmp)?.flatten().collect::>(); + + assert_eq!(files.len(), 3); + + assert!(has_file(&files, &tmp.path().join("0"))); + assert!(has_file(&files, &tmp.path().join("1"))); + assert!(has_file(&files, &tmp.path().join("2"))); + + for node in 0..3 { + let node_dir = tmp.path().join(node.to_string()).join("config"); + let files = fs::read_dir(&node_dir)?.flatten().collect::>(); + + assert!(has_file(&files, &node_dir.join("config.toml"))); + assert!(has_file(&files, &node_dir.join("genesis.json"))); + assert!(has_file(&files, &node_dir.join("priv_validator_key.json"))); + } + + Ok(()) + } + + fn has_file(files: &[fs::DirEntry], path: &PathBuf) -> bool { + files.iter().any(|f| &f.path() == path) + } +} diff --git a/code/crates/cli/src/metrics.rs b/code/crates/cli/src/metrics.rs new file mode 100644 index 000000000..c334c0bd3 --- /dev/null +++ b/code/crates/cli/src/metrics.rs @@ -0,0 +1,21 @@ +use axum::routing::get; +use axum::Router; +use tokio::net::TcpListener; +use tracing::info; + +use malachite_node::config::MetricsConfig; + +#[tracing::instrument(name = "metrics", skip_all)] +pub async fn serve(config: MetricsConfig) { + let app = Router::new().route("/metrics", get(get_metrics)); + let listener = TcpListener::bind(config.listen_addr).await.unwrap(); + + info!("Serving metrics at http://{}", config.listen_addr); + axum::serve(listener, app).await.unwrap(); +} + +async fn get_metrics() -> String { + let mut buf = String::new(); + malachite_metrics::export(&mut buf); + buf +} diff --git a/code/crates/cli/src/priv_key.rs b/code/crates/cli/src/priv_key.rs new file mode 100644 index 000000000..9115025de --- /dev/null +++ b/code/crates/cli/src/priv_key.rs @@ -0,0 +1,23 @@ +use serde::{Deserialize, Serialize}; + +use malachite_test::{Address, PrivateKey, PublicKey}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PrivValidatorKey { + pub address: Address, + pub public_key: PublicKey, + pub private_key: PrivateKey, +} + +impl From for PrivValidatorKey { + fn from(private_key: PrivateKey) -> Self { + let public_key = private_key.public_key(); + let address = Address::from_public_key(&public_key); + + Self { + address, + public_key, + private_key, + } + } +} diff --git a/code/common/Cargo.toml b/code/crates/common/Cargo.toml similarity index 100% rename from code/common/Cargo.toml rename to code/crates/common/Cargo.toml diff --git a/code/crates/common/src/block_part.rs b/code/crates/common/src/block_part.rs new file mode 100644 index 000000000..5bc02fe78 --- /dev/null +++ b/code/crates/common/src/block_part.rs @@ -0,0 +1,23 @@ +use core::fmt::Debug; + +use crate::{Context, Round}; + +/// Defines the requirements for a block part type. + +pub trait BlockPart +where + Self: Clone + Debug + Eq + Send + Sync + 'static, + Ctx: Context, +{ + /// The part height + fn height(&self) -> Ctx::Height; + + /// The part round + fn round(&self) -> Round; + + /// The part sequence + fn sequence(&self) -> u64; + + /// Address of the validator who created this block part + fn validator_address(&self) -> &Ctx::Address; +} diff --git a/code/common/src/context.rs b/code/crates/common/src/context.rs similarity index 79% rename from code/common/src/context.rs rename to code/crates/common/src/context.rs index 51e3241d7..251b44c43 100644 --- a/code/common/src/context.rs +++ b/code/crates/common/src/context.rs @@ -1,6 +1,6 @@ use crate::{ - Address, Height, NilOrVal, Proposal, PublicKey, Round, SignedProposal, SignedVote, - SigningScheme, Validator, ValidatorSet, Value, ValueId, Vote, + Address, BlockPart, Height, NilOrVal, Proposal, PublicKey, Round, SignedBlockPart, + SignedProposal, SignedVote, SigningScheme, Validator, ValidatorSet, Value, ValueId, Vote, }; /// This trait allows to abstract over the various datatypes @@ -15,6 +15,9 @@ where /// The type of the height of a block. type Height: Height; + /// The type of block part + type BlockPart: BlockPart; + /// The interface provided by the proposal type. type Proposal: Proposal; @@ -36,6 +39,16 @@ where /// Sign the given vote with our private key. fn sign_vote(&self, vote: Self::Vote) -> SignedVote; + /// Sign the given proposal with our private key. + fn sign_proposal(&self, proposal: Self::Proposal) -> SignedProposal; + + /// Verify the given proposal's signature using the given public key. + fn verify_signed_proposal( + &self, + signed_proposal: &SignedProposal, + public_key: &PublicKey, + ) -> bool; + /// Verify the given vote's signature using the given public key. fn verify_signed_vote( &self, @@ -43,13 +56,13 @@ where public_key: &PublicKey, ) -> bool; - /// Sign the given proposal with our private key. - fn sign_proposal(&self, proposal: Self::Proposal) -> SignedProposal; + /// Sign the block part with our private key. + fn sign_block_part(&self, block_part: Self::BlockPart) -> SignedBlockPart; - /// Verify the given proposal's signature using the given public key. - fn verify_signed_proposal( + /// Verify the given block part signature using the given public key. + fn verify_signed_block_part( &self, - signed_proposal: &SignedProposal, + signed_block_part: &SignedBlockPart, public_key: &PublicKey, ) -> bool; diff --git a/code/common/src/height.rs b/code/crates/common/src/height.rs similarity index 100% rename from code/common/src/height.rs rename to code/crates/common/src/height.rs diff --git a/code/common/src/lib.rs b/code/crates/common/src/lib.rs similarity index 85% rename from code/common/src/lib.rs rename to code/crates/common/src/lib.rs index 184f10e5c..d688911ea 100644 --- a/code/common/src/lib.rs +++ b/code/crates/common/src/lib.rs @@ -9,19 +9,24 @@ rustdoc::private_intra_doc_links, variant_size_differences )] +// For coverage on nightly +#![allow(unexpected_cfgs)] #![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::panic))] #![cfg_attr(coverage_nightly, feature(coverage_attribute))] extern crate alloc; - +mod block_part; mod context; mod height; mod proposal; mod round; +mod signed_block_part; mod signed_proposal; mod signed_vote; mod signing; mod timeout; + +mod transaction; mod validator_set; mod value; mod vote; @@ -41,14 +46,19 @@ pub type PrivateKey = <::SigningScheme as SigningScheme>::P /// Type alias to make it easier to refer the `Signature` type of a given `Consensus` engine. pub type Signature = <::SigningScheme as SigningScheme>::Signature; +pub use block_part::BlockPart; pub use context::Context; pub use height::Height; pub use proposal::Proposal; pub use round::Round; +pub use signed_block_part::SignedBlockPart; pub use signed_proposal::SignedProposal; pub use signed_vote::SignedVote; pub use signing::SigningScheme; pub use timeout::{Timeout, TimeoutStep}; +pub use transaction::MempoolTransactionBatch; +pub use transaction::Transaction; +pub use transaction::TransactionBatch; pub use validator_set::{Address, Validator, ValidatorSet, VotingPower}; pub use value::{NilOrVal, Value}; pub use vote::{Vote, VoteType}; diff --git a/code/common/src/proposal.rs b/code/crates/common/src/proposal.rs similarity index 100% rename from code/common/src/proposal.rs rename to code/crates/common/src/proposal.rs diff --git a/code/common/src/round.rs b/code/crates/common/src/round.rs similarity index 100% rename from code/common/src/round.rs rename to code/crates/common/src/round.rs diff --git a/code/crates/common/src/signed_block_part.rs b/code/crates/common/src/signed_block_part.rs new file mode 100644 index 000000000..a24e102e3 --- /dev/null +++ b/code/crates/common/src/signed_block_part.rs @@ -0,0 +1,34 @@ +use derive_where::derive_where; + +use crate::{BlockPart, Context, Signature}; + +/// Defines the requirements for a signed block part type. + +#[derive_where(Debug, PartialEq, Eq)] +pub struct SignedBlockPart +where + Ctx: Context, +{ + /// The block part. + pub block_part: Ctx::BlockPart, + + /// The signature of the block part. + pub signature: Signature, +} + +impl SignedBlockPart +where + Ctx: Context, +{ + /// Create a new signed block part from the given part and signature. + pub fn new(block_part: Ctx::BlockPart, signature: Signature) -> Self { + Self { + block_part, + signature, + } + } + /// Return the address of the validator that emitted this block part. + pub fn validator_address(&self) -> &Ctx::Address { + self.block_part.validator_address() + } +} diff --git a/code/common/src/signed_proposal.rs b/code/crates/common/src/signed_proposal.rs similarity index 100% rename from code/common/src/signed_proposal.rs rename to code/crates/common/src/signed_proposal.rs diff --git a/code/common/src/signed_vote.rs b/code/crates/common/src/signed_vote.rs similarity index 100% rename from code/common/src/signed_vote.rs rename to code/crates/common/src/signed_vote.rs diff --git a/code/common/src/signing.rs b/code/crates/common/src/signing.rs similarity index 100% rename from code/common/src/signing.rs rename to code/crates/common/src/signing.rs diff --git a/code/common/src/timeout.rs b/code/crates/common/src/timeout.rs similarity index 100% rename from code/common/src/timeout.rs rename to code/crates/common/src/timeout.rs diff --git a/code/crates/common/src/transaction.rs b/code/crates/common/src/transaction.rs new file mode 100644 index 000000000..dc68cad89 --- /dev/null +++ b/code/crates/common/src/transaction.rs @@ -0,0 +1,93 @@ +use alloc::vec::Vec; +use core::fmt::Debug; + +/// Transaction +#[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct Transaction(pub Vec); + +impl Transaction { + /// Create a new transaction from bytes + pub const fn new(transaction: Vec) -> Self { + Self(transaction) + } + + /// Get bytes from a transaction + pub fn to_bytes(&self) -> Vec { + self.0.to_vec() + } + + /// Size of this transaction in bytes + pub fn size_bytes(&self) -> usize { + self.0.len() + } +} + +/// Transaction batch (used by mempool and block part) +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct TransactionBatch(Vec); + +impl TransactionBatch { + /// Create a new transaction batch + pub fn new(transactions: Vec) -> Self { + TransactionBatch(transactions) + } + + /// Add a transaction to the batch + pub fn push(&mut self, transaction: Transaction) { + self.0.push(transaction); + } + + /// Get the number of transactions in the batch + pub fn len(&self) -> usize { + self.0.len() + } + + /// Whether or not the batch is empty + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Get transactions from a batch + pub fn into_transactions(self) -> Vec { + self.0 + } + + /// Get transactions from a batch + pub fn transactions(&self) -> &[Transaction] { + &self.0 + } + + /// The size of this batch in bytes + pub fn size_bytes(&self) -> usize { + self.transactions() + .iter() + .map(|tx| tx.size_bytes()) + .sum::() + } +} + +/// Mempool transaction batch +// TODO: Move to different file +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct MempoolTransactionBatch { + /// The batch of transactions + pub transaction_batch: TransactionBatch, + // May add more fields to this structure +} + +impl MempoolTransactionBatch { + /// Create a new transaction batch + pub fn new(transaction_batch: TransactionBatch) -> Self { + Self { transaction_batch } + } + + /// Get the number of transactions in the batch + pub fn len(&self) -> usize { + self.transaction_batch.len() + } + + /// Implement is_empty + pub fn is_empty(&self) -> bool { + self.transaction_batch.is_empty() + } +} diff --git a/code/common/src/validator_set.rs b/code/crates/common/src/validator_set.rs similarity index 100% rename from code/common/src/validator_set.rs rename to code/crates/common/src/validator_set.rs diff --git a/code/common/src/value.rs b/code/crates/common/src/value.rs similarity index 100% rename from code/common/src/value.rs rename to code/crates/common/src/value.rs diff --git a/code/common/src/vote.rs b/code/crates/common/src/vote.rs similarity index 100% rename from code/common/src/vote.rs rename to code/crates/common/src/vote.rs diff --git a/code/driver/Cargo.toml b/code/crates/driver/Cargo.toml similarity index 100% rename from code/driver/Cargo.toml rename to code/crates/driver/Cargo.toml diff --git a/code/driver/src/driver.rs b/code/crates/driver/src/driver.rs similarity index 96% rename from code/driver/src/driver.rs rename to code/crates/driver/src/driver.rs index e9e33aade..feb502b02 100644 --- a/code/driver/src/driver.rs +++ b/code/crates/driver/src/driver.rs @@ -222,9 +222,11 @@ where proposal: Ctx::Proposal, validity: Validity, ) -> Result>, Error> { - // Discard proposals from different heights if self.height() != proposal.height() { - return Ok(None); + return Err(Error::InvalidProposalHeight { + proposal_height: proposal.height(), + consensus_height: self.height(), + }); } let round = proposal.round(); @@ -236,9 +238,11 @@ where } fn apply_vote(&mut self, vote: Ctx::Vote) -> Result>, Error> { - // Discard votes from different heights if self.height() != vote.height() { - return Ok(None); + return Err(Error::InvalidVoteHeight { + vote_height: vote.height(), + consensus_height: self.height(), + }); } let validator = self diff --git a/code/crates/driver/src/error.rs b/code/crates/driver/src/error.rs new file mode 100644 index 000000000..513739c36 --- /dev/null +++ b/code/crates/driver/src/error.rs @@ -0,0 +1,71 @@ +use core::fmt; + +use derive_where::derive_where; + +use malachite_common::{Context, Round}; + +/// The type of errors that can be yielded by the `Driver`. +#[derive_where(Clone, Debug, PartialEq, Eq)] +pub enum Error +where + Ctx: Context, +{ + /// No proposer was set for this round + NoProposer(Ctx::Height, Round), + + /// Proposer not found + ProposerNotFound(Ctx::Address), + + /// Validator not found in validator set + ValidatorNotFound(Ctx::Address), + + /// Received a proposal for another height + InvalidProposalHeight { + /// Proposal height + proposal_height: Ctx::Height, + /// Consensus height + consensus_height: Ctx::Height, + }, + + /// Received a vote for another height + InvalidVoteHeight { + /// Vote height + vote_height: Ctx::Height, + /// Consensus height + consensus_height: Ctx::Height, + }, +} + +impl fmt::Display for Error +where + Ctx: Context, +{ + #[cfg_attr(coverage_nightly, coverage(off))] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::NoProposer(height, round) => { + write!(f, "No proposer set for height {height} at round {round}") + } + Error::ProposerNotFound(addr) => write!(f, "Proposer not found: {addr}"), + Error::ValidatorNotFound(addr) => write!(f, "Validator not found: {addr}"), + Error::InvalidProposalHeight { + proposal_height, + consensus_height, + } => { + write!( + f, + "Received proposal for height {proposal_height} different from consensus height {consensus_height}" + ) + } + Error::InvalidVoteHeight { + vote_height, + consensus_height, + } => { + write!( + f, + "Received vote for height {vote_height} different from consensus height {consensus_height}" + ) + } + } + } +} diff --git a/code/driver/src/input.rs b/code/crates/driver/src/input.rs similarity index 100% rename from code/driver/src/input.rs rename to code/crates/driver/src/input.rs diff --git a/code/driver/src/lib.rs b/code/crates/driver/src/lib.rs similarity index 92% rename from code/driver/src/lib.rs rename to code/crates/driver/src/lib.rs index 328ee1334..25ea55797 100644 --- a/code/driver/src/lib.rs +++ b/code/crates/driver/src/lib.rs @@ -9,6 +9,8 @@ rustdoc::private_intra_doc_links, variant_size_differences )] +// For coverage on nightly +#![allow(unexpected_cfgs)] #![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::panic))] #![cfg_attr(coverage_nightly, feature(coverage_attribute))] diff --git a/code/driver/src/mux.rs b/code/crates/driver/src/mux.rs similarity index 100% rename from code/driver/src/mux.rs rename to code/crates/driver/src/mux.rs diff --git a/code/driver/src/output.rs b/code/crates/driver/src/output.rs similarity index 100% rename from code/driver/src/output.rs rename to code/crates/driver/src/output.rs diff --git a/code/driver/src/util.rs b/code/crates/driver/src/util.rs similarity index 100% rename from code/driver/src/util.rs rename to code/crates/driver/src/util.rs diff --git a/code/crates/gossip-consensus/Cargo.toml b/code/crates/gossip-consensus/Cargo.toml new file mode 100644 index 000000000..3b2fbb12c --- /dev/null +++ b/code/crates/gossip-consensus/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "malachite-gossip-consensus" +version.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +malachite-metrics = { workspace = true } + +futures = { workspace = true } +libp2p = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } diff --git a/code/gossip/src/behaviour.rs b/code/crates/gossip-consensus/src/behaviour.rs similarity index 67% rename from code/gossip/src/behaviour.rs rename to code/crates/gossip-consensus/src/behaviour.rs index 07bc046f8..7236e80ab 100644 --- a/code/gossip/src/behaviour.rs +++ b/code/crates/gossip-consensus/src/behaviour.rs @@ -1,8 +1,9 @@ use libp2p::swarm::NetworkBehaviour; -use libp2p::{gossipsub, identify, mdns}; +use libp2p::{gossipsub, identify}; pub use libp2p::identity::Keypair; pub use libp2p::{Multiaddr, PeerId}; +use malachite_metrics::Registry; use crate::PROTOCOL_VERSION; @@ -10,7 +11,6 @@ use crate::PROTOCOL_VERSION; #[behaviour(to_swarm = "NetworkEvent")] pub struct Behaviour { pub identify: identify::Behaviour, - pub mdns: mdns::tokio::Behaviour, pub gossipsub: gossipsub::Behaviour, } @@ -21,14 +21,25 @@ impl Behaviour { PROTOCOL_VERSION.to_string(), keypair.public(), )), - mdns: mdns::tokio::Behaviour::new( - mdns::Config::default(), - keypair.public().to_peer_id(), + gossipsub: gossipsub::Behaviour::new( + gossipsub::MessageAuthenticity::Signed(keypair.clone()), + gossipsub::Config::default(), ) .unwrap(), - gossipsub: gossipsub::Behaviour::new( + } + } + + pub fn new_with_metrics(keypair: &Keypair, registry: &mut Registry) -> Self { + Self { + identify: identify::Behaviour::new(identify::Config::new( + PROTOCOL_VERSION.to_string(), + keypair.public(), + )), + gossipsub: gossipsub::Behaviour::new_with_metrics( gossipsub::MessageAuthenticity::Signed(keypair.clone()), gossipsub::Config::default(), + registry, + Default::default(), ) .unwrap(), } @@ -38,7 +49,6 @@ impl Behaviour { #[derive(Debug)] pub enum NetworkEvent { Identify(identify::Event), - Mdns(mdns::Event), GossipSub(gossipsub::Event), } @@ -48,12 +58,6 @@ impl From for NetworkEvent { } } -impl From for NetworkEvent { - fn from(event: mdns::Event) -> Self { - Self::Mdns(event) - } -} - impl From for NetworkEvent { fn from(event: gossipsub::Event) -> Self { Self::GossipSub(event) diff --git a/code/gossip/src/handle.rs b/code/crates/gossip-consensus/src/handle.rs similarity index 100% rename from code/gossip/src/handle.rs rename to code/crates/gossip-consensus/src/handle.rs diff --git a/code/crates/gossip-consensus/src/lib.rs b/code/crates/gossip-consensus/src/lib.rs new file mode 100644 index 000000000..e0a6abe21 --- /dev/null +++ b/code/crates/gossip-consensus/src/lib.rs @@ -0,0 +1,322 @@ +use core::fmt; +use std::collections::HashMap; +use std::error::Error; +use std::ops::ControlFlow; +use std::time::Duration; + +use futures::StreamExt; +use libp2p::swarm::{self, SwarmEvent}; +use libp2p::{gossipsub, identify, SwarmBuilder}; +use tokio::sync::mpsc; +use tracing::{debug, error, error_span, trace, Instrument}; + +use malachite_metrics::SharedRegistry; + +pub use libp2p::identity::Keypair; +pub use libp2p::{Multiaddr, PeerId}; + +pub mod behaviour; +pub mod handle; + +use behaviour::{Behaviour, NetworkEvent}; +use handle::Handle; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Channel { + Consensus, + BlockParts, +} + +impl Channel { + pub fn all() -> &'static [Channel] { + &[Channel::Consensus, Channel::BlockParts] + } + + pub fn to_topic(self) -> gossipsub::IdentTopic { + gossipsub::IdentTopic::new(self.as_str()) + } + + pub fn topic_hash(&self) -> gossipsub::TopicHash { + self.to_topic().hash() + } + + pub fn as_str(&self) -> &'static str { + match self { + Channel::Consensus => "/consensus", + Channel::BlockParts => "/blockparts", + } + } + + pub fn has_topic(topic_hash: &gossipsub::TopicHash) -> bool { + Self::all() + .iter() + .any(|channel| &channel.topic_hash() == topic_hash) + } + + pub fn from_topic_hash(topic: &gossipsub::TopicHash) -> Option { + match topic.as_str() { + "/consensus" => Some(Channel::Consensus), + "/blockparts" => Some(Channel::BlockParts), + _ => None, + } + } +} + +impl fmt::Display for Channel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.as_str().fmt(f) + } +} + +const PROTOCOL_VERSION: &str = "malachite-gossip-consensus/v1beta1"; + +pub type BoxError = Box; + +#[derive(Clone, Debug)] +pub struct Config { + pub listen_addr: Multiaddr, + pub persistent_peers: Vec, + pub idle_connection_timeout: Duration, +} + +impl Config { + fn apply(&self, cfg: swarm::Config) -> swarm::Config { + cfg.with_idle_connection_timeout(self.idle_connection_timeout) + } +} + +#[derive(Debug)] +pub enum Event { + Listening(Multiaddr), + Message(PeerId, Channel, Vec), + PeerConnected(PeerId), + PeerDisconnected(PeerId), +} + +#[derive(Debug)] +pub enum CtrlMsg { + Broadcast(Channel, Vec), + Shutdown, +} + +#[derive(Debug, Default)] +pub struct State { + pub peers: HashMap, +} + +pub async fn spawn( + keypair: Keypair, + config: Config, + registry: SharedRegistry, +) -> Result { + let mut swarm = registry.with_prefix( + "malachite_gossip_consensus", + |registry| -> Result<_, BoxError> { + Ok(SwarmBuilder::with_existing_identity(keypair) + .with_tokio() + .with_quic() + .with_dns()? + .with_bandwidth_metrics(registry) + .with_behaviour(|kp| Behaviour::new_with_metrics(kp, registry))? + .with_swarm_config(|cfg| config.apply(cfg)) + .build()) + }, + )?; + + for channel in Channel::all() { + swarm + .behaviour_mut() + .gossipsub + .subscribe(&channel.to_topic())?; + } + + let (tx_event, rx_event) = mpsc::channel(32); + let (tx_ctrl, rx_ctrl) = mpsc::channel(32); + + let peer_id = swarm.local_peer_id(); + let span = error_span!("gossip-consensus", peer = %peer_id); + let task_handle = tokio::task::spawn(run(config, swarm, rx_ctrl, tx_event).instrument(span)); + + Ok(Handle::new(tx_ctrl, rx_event, task_handle)) +} + +async fn run( + config: Config, + mut swarm: swarm::Swarm, + mut rx_ctrl: mpsc::Receiver, + tx_event: mpsc::Sender, +) { + if let Err(e) = swarm.listen_on(config.listen_addr.clone()) { + error!("Error listening on {}: {e}", config.listen_addr); + return; + }; + + for persistent_peer in config.persistent_peers { + trace!("Dialing persistent peer: {persistent_peer}"); + + match swarm.dial(persistent_peer.clone()) { + Ok(()) => (), + Err(e) => error!("Error dialing persistent peer {persistent_peer}: {e}"), + } + } + + let mut state = State::default(); + + loop { + let result = tokio::select! { + event = swarm.select_next_some() => { + handle_swarm_event(event, &mut swarm, &mut state, &tx_event).await + } + + Some(ctrl) = rx_ctrl.recv() => { + handle_ctrl_msg(ctrl, &mut swarm).await + } + }; + + match result { + ControlFlow::Continue(()) => continue, + ControlFlow::Break(()) => break, + } + } +} + +async fn handle_ctrl_msg(msg: CtrlMsg, swarm: &mut swarm::Swarm) -> ControlFlow<()> { + match msg { + CtrlMsg::Broadcast(channel, data) => { + let msg_size = data.len(); + + let result = swarm + .behaviour_mut() + .gossipsub + .publish(channel.topic_hash(), data); + + match result { + Ok(message_id) => { + trace!("Broadcasted message {message_id} of {msg_size} bytes"); + } + Err(e) => { + error!("Error broadcasting message: {e}"); + } + } + + ControlFlow::Continue(()) + } + + CtrlMsg::Shutdown => ControlFlow::Break(()), + } +} + +async fn handle_swarm_event( + event: SwarmEvent, + swarm: &mut swarm::Swarm, + state: &mut State, + tx_event: &mpsc::Sender, +) -> ControlFlow<()> { + match event { + SwarmEvent::NewListenAddr { address, .. } => { + debug!("Node is listening on {address}"); + + if let Err(e) = tx_event.send(Event::Listening(address)).await { + error!("Error sending listening event to handle: {e}"); + return ControlFlow::Break(()); + } + } + + SwarmEvent::Behaviour(NetworkEvent::Identify(identify::Event::Sent { peer_id })) => { + trace!("Sent identity to {peer_id}"); + } + + SwarmEvent::Behaviour(NetworkEvent::Identify(identify::Event::Received { + peer_id, + info, + })) => { + trace!( + "Received identity from {peer_id}: protocol={:?}", + info.protocol_version + ); + + if info.protocol_version == PROTOCOL_VERSION { + trace!( + "Peer {peer_id} is using compatible protocol version: {:?}", + info.protocol_version + ); + + state.peers.insert(peer_id, info); + + swarm.behaviour_mut().gossipsub.add_explicit_peer(&peer_id); + } else { + trace!( + "Peer {peer_id} is using incompatible protocol version: {:?}", + info.protocol_version + ); + } + } + + SwarmEvent::Behaviour(NetworkEvent::GossipSub(gossipsub::Event::Subscribed { + peer_id, + topic, + })) => { + if !Channel::has_topic(&topic) { + trace!("Peer {peer_id} tried to subscribe to unknown topic: {topic}"); + return ControlFlow::Continue(()); + } + + trace!("Peer {peer_id} subscribed to {topic}"); + + if let Err(e) = tx_event.send(Event::PeerConnected(peer_id)).await { + error!("Error sending peer connected event to handle: {e}"); + return ControlFlow::Break(()); + } + } + + SwarmEvent::Behaviour(NetworkEvent::GossipSub(gossipsub::Event::Unsubscribed { + peer_id, + topic, + })) => { + if !Channel::has_topic(&topic) { + trace!("Peer {peer_id} tried to unsubscribe from unknown topic: {topic}"); + return ControlFlow::Continue(()); + } + + trace!("Peer {peer_id} unsubscribed from {topic}"); + + if let Err(e) = tx_event.send(Event::PeerDisconnected(peer_id)).await { + error!("Error sending peer disconnected event to handle: {e}"); + return ControlFlow::Break(()); + } + } + + SwarmEvent::Behaviour(NetworkEvent::GossipSub(gossipsub::Event::Message { + propagation_source: peer_id, + message_id, + message, + })) => { + let Some(channel) = Channel::from_topic_hash(&message.topic) else { + trace!( + "Received message {message_id} from {peer_id} on different channel: {}", + message.topic + ); + + return ControlFlow::Continue(()); + }; + + trace!( + "Received message {message_id} from {peer_id} on channel {} of {} bytes", + channel, + message.data.len() + ); + + if let Err(e) = tx_event + .send(Event::Message(peer_id, channel, message.data)) + .await + { + error!("Error sending message to handle: {e}"); + return ControlFlow::Break(()); + } + } + + _ => {} + } + + ControlFlow::Continue(()) +} diff --git a/code/gossip/Cargo.toml b/code/crates/gossip-mempool/Cargo.toml similarity index 79% rename from code/gossip/Cargo.toml rename to code/crates/gossip-mempool/Cargo.toml index 0247835c6..868915993 100644 --- a/code/gossip/Cargo.toml +++ b/code/crates/gossip-mempool/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "malachite-gossip" +name = "malachite-gossip-mempool" version.workspace = true edition.workspace = true repository.workspace = true @@ -10,8 +10,9 @@ publish.workspace = true workspace = true [dependencies] +malachite-metrics = { workspace = true } + futures = { workspace = true } libp2p = { workspace = true } -# libp2p-gossipsub = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/code/crates/gossip-mempool/src/behaviour.rs b/code/crates/gossip-mempool/src/behaviour.rs new file mode 100644 index 000000000..e92fb4b05 --- /dev/null +++ b/code/crates/gossip-mempool/src/behaviour.rs @@ -0,0 +1,66 @@ +use libp2p::swarm::NetworkBehaviour; +use libp2p::{gossipsub, identify}; + +use malachite_metrics::Registry; + +pub use libp2p::identity::Keypair; +pub use libp2p::{Multiaddr, PeerId}; + +use crate::PROTOCOL_VERSION; + +#[derive(NetworkBehaviour)] +#[behaviour(to_swarm = "NetworkEvent")] +pub struct Behaviour { + pub identify: identify::Behaviour, + pub gossipsub: gossipsub::Behaviour, +} + +impl Behaviour { + pub fn new(keypair: &Keypair) -> Self { + Self { + identify: identify::Behaviour::new(identify::Config::new( + PROTOCOL_VERSION.to_string(), + keypair.public(), + )), + gossipsub: gossipsub::Behaviour::new( + gossipsub::MessageAuthenticity::Signed(keypair.clone()), + gossipsub::Config::default(), + ) + .unwrap(), + } + } + + pub fn new_with_metrics(keypair: &Keypair, registry: &mut Registry) -> Self { + Self { + identify: identify::Behaviour::new(identify::Config::new( + PROTOCOL_VERSION.to_string(), + keypair.public(), + )), + gossipsub: gossipsub::Behaviour::new_with_metrics( + gossipsub::MessageAuthenticity::Signed(keypair.clone()), + gossipsub::Config::default(), + registry, + Default::default(), + ) + .unwrap(), + } + } +} + +#[derive(Debug)] +pub enum NetworkEvent { + Identify(identify::Event), + GossipSub(gossipsub::Event), +} + +impl From for NetworkEvent { + fn from(event: identify::Event) -> Self { + Self::Identify(event) + } +} + +impl From for NetworkEvent { + fn from(event: gossipsub::Event) -> Self { + Self::GossipSub(event) + } +} diff --git a/code/crates/gossip-mempool/src/handle.rs b/code/crates/gossip-mempool/src/handle.rs new file mode 100644 index 000000000..996d4852f --- /dev/null +++ b/code/crates/gossip-mempool/src/handle.rs @@ -0,0 +1,87 @@ +use tokio::sync::mpsc; +use tokio::task; + +use crate::{BoxError, Channel, CtrlMsg, Event}; + +pub struct RecvHandle { + rx_event: mpsc::Receiver, +} + +impl RecvHandle { + pub async fn recv(&mut self) -> Option { + self.rx_event.recv().await + } +} + +pub struct CtrlHandle { + tx_ctrl: mpsc::Sender, + task_handle: task::JoinHandle<()>, +} + +impl CtrlHandle { + pub async fn broadcast(&self, channel: Channel, data: Vec) -> Result<(), BoxError> { + self.tx_ctrl.send(CtrlMsg::Broadcast(channel, data)).await?; + Ok(()) + } + + pub async fn wait_shutdown(self) -> Result<(), BoxError> { + self.shutdown().await?; + self.join().await?; + Ok(()) + } + + pub async fn shutdown(&self) -> Result<(), BoxError> { + self.tx_ctrl.send(CtrlMsg::Shutdown).await?; + Ok(()) + } + + pub async fn join(self) -> Result<(), BoxError> { + self.task_handle.await?; + Ok(()) + } +} + +pub struct Handle { + recv: RecvHandle, + ctrl: CtrlHandle, +} + +impl Handle { + pub fn new( + tx_ctrl: mpsc::Sender, + rx_event: mpsc::Receiver, + task_handle: task::JoinHandle<()>, + ) -> Handle { + Self { + recv: RecvHandle { rx_event }, + ctrl: CtrlHandle { + tx_ctrl, + task_handle, + }, + } + } + + pub fn split(self) -> (RecvHandle, CtrlHandle) { + (self.recv, self.ctrl) + } + + pub async fn recv(&mut self) -> Option { + self.recv.recv().await + } + + pub async fn broadcast(&self, channel: Channel, data: Vec) -> Result<(), BoxError> { + self.ctrl.broadcast(channel, data).await + } + + pub async fn wait_shutdown(self) -> Result<(), BoxError> { + self.ctrl.wait_shutdown().await + } + + pub async fn shutdown(&self) -> Result<(), BoxError> { + self.ctrl.shutdown().await + } + + pub async fn join(self) -> Result<(), BoxError> { + self.ctrl.join().await + } +} diff --git a/code/gossip/src/lib.rs b/code/crates/gossip-mempool/src/lib.rs similarity index 65% rename from code/gossip/src/lib.rs rename to code/crates/gossip-mempool/src/lib.rs index ed8ee427d..140814d28 100644 --- a/code/gossip/src/lib.rs +++ b/code/crates/gossip-mempool/src/lib.rs @@ -1,13 +1,16 @@ use core::fmt; +use std::collections::HashMap; use std::error::Error; use std::ops::ControlFlow; use std::time::Duration; use futures::StreamExt; use libp2p::swarm::{self, SwarmEvent}; -use libp2p::{gossipsub, identify, mdns, SwarmBuilder}; +use libp2p::{gossipsub, identify, SwarmBuilder}; use tokio::sync::mpsc; -use tracing::{debug, error, error_span, Instrument}; +use tracing::{debug, error, error_span, trace, Instrument}; + +use malachite_metrics::SharedRegistry; pub use libp2p::identity::Keypair; pub use libp2p::{Multiaddr, PeerId}; @@ -19,12 +22,12 @@ use handle::Handle; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum Channel { - Consensus, + Mempool, } impl Channel { pub fn all() -> &'static [Channel] { - &[Channel::Consensus] + &[Channel::Mempool] } pub fn to_topic(self) -> gossipsub::IdentTopic { @@ -37,7 +40,7 @@ impl Channel { pub fn as_str(&self) -> &'static str { match self { - Channel::Consensus => "/consensus", + Channel::Mempool => "/mempool", } } @@ -49,7 +52,7 @@ impl Channel { pub fn from_topic_hash(topic: &gossipsub::TopicHash) -> Option { match topic.as_str() { - "/consensus" => Some(Channel::Consensus), + "/mempool" => Some(Channel::Mempool), _ => None, } } @@ -61,27 +64,26 @@ impl fmt::Display for Channel { } } -const PROTOCOL_VERSION: &str = "malachite-gossip/v1beta1"; +const PROTOCOL_VERSION: &str = "malachite-gossip-mempool/v1beta1"; pub type BoxError = Box; #[derive(Clone, Debug)] pub struct Config { - idle_connection_timeout: Duration, + pub listen_addr: Multiaddr, + pub persistent_peers: Vec, + pub idle_connection_timeout: Duration, } impl Config { - fn apply(self, cfg: swarm::Config) -> swarm::Config { + fn apply(&self, cfg: swarm::Config) -> swarm::Config { cfg.with_idle_connection_timeout(self.idle_connection_timeout) } } -impl Default for Config { - fn default() -> Self { - Self { - idle_connection_timeout: Duration::from_secs(30), - } - } +#[derive(Debug, Default)] +pub struct State { + pub peers: HashMap, } #[derive(Debug)] @@ -98,13 +100,24 @@ pub enum CtrlMsg { Shutdown, } -pub async fn spawn(keypair: Keypair, addr: Multiaddr, config: Config) -> Result { - let mut swarm = SwarmBuilder::with_existing_identity(keypair) - .with_tokio() - .with_quic() - .with_behaviour(Behaviour::new)? - .with_swarm_config(|cfg| config.apply(cfg)) - .build(); +pub async fn spawn( + keypair: Keypair, + config: Config, + registry: SharedRegistry, +) -> Result { + let mut swarm = registry.with_prefix( + "malachite_gossip_mempool", + |registry| -> Result<_, BoxError> { + Ok(SwarmBuilder::with_existing_identity(keypair) + .with_tokio() + .with_quic() + .with_dns()? + .with_bandwidth_metrics(registry) + .with_behaviour(|kp| Behaviour::new_with_metrics(kp, registry))? + .with_swarm_config(|cfg| config.apply(cfg)) + .build()) + }, + )?; for channel in Channel::all() { swarm @@ -113,27 +126,42 @@ pub async fn spawn(keypair: Keypair, addr: Multiaddr, config: Config) -> Result< .subscribe(&channel.to_topic())?; } - swarm.listen_on(addr)?; - let (tx_event, rx_event) = mpsc::channel(32); let (tx_ctrl, rx_ctrl) = mpsc::channel(32); let peer_id = swarm.local_peer_id(); - let span = error_span!("gossip", peer = %peer_id); - let task_handle = tokio::task::spawn(run(swarm, rx_ctrl, tx_event).instrument(span)); + let span = error_span!("gossip-mempool", peer = %peer_id); + let task_handle = tokio::task::spawn(run(config, swarm, rx_ctrl, tx_event).instrument(span)); Ok(Handle::new(tx_ctrl, rx_event, task_handle)) } async fn run( + config: Config, mut swarm: swarm::Swarm, mut rx_ctrl: mpsc::Receiver, tx_event: mpsc::Sender, ) { + if let Err(e) = swarm.listen_on(config.listen_addr.clone()) { + error!("Error listening on {}: {e}", config.listen_addr); + return; + }; + + for persistent_peer in config.persistent_peers { + trace!("Dialing persistent peer: {persistent_peer}"); + + match swarm.dial(persistent_peer.clone()) { + Ok(()) => (), + Err(e) => error!("Error dialing persistent peer {persistent_peer}: {e}"), + } + } + + let mut state = State::default(); + loop { let result = tokio::select! { event = swarm.select_next_some() => { - handle_swarm_event(event, &mut swarm, &tx_event).await + handle_swarm_event(event, &mut swarm, &mut state, &tx_event).await } Some(ctrl) = rx_ctrl.recv() => { @@ -160,7 +188,7 @@ async fn handle_ctrl_msg(msg: CtrlMsg, swarm: &mut swarm::Swarm) -> C match result { Ok(message_id) => { - debug!("Broadcasted message {message_id} of {msg_size} bytes"); + trace!("Broadcasted message {message_id} of {msg_size} bytes"); } Err(e) => { error!("Error broadcasting message: {e}"); @@ -177,6 +205,7 @@ async fn handle_ctrl_msg(msg: CtrlMsg, swarm: &mut swarm::Swarm) -> C async fn handle_swarm_event( event: SwarmEvent, swarm: &mut swarm::Swarm, + state: &mut State, tx_event: &mpsc::Sender, ) -> ControlFlow<()> { match event { @@ -190,40 +219,32 @@ async fn handle_swarm_event( } SwarmEvent::Behaviour(NetworkEvent::Identify(identify::Event::Sent { peer_id })) => { - debug!("Sent identity to {peer_id}"); + trace!("Sent identity to {peer_id}"); } SwarmEvent::Behaviour(NetworkEvent::Identify(identify::Event::Received { peer_id, - info: _, + info, })) => { - debug!("Received identity from {peer_id}"); - } + trace!( + "Received identity from {peer_id}: protocol={:?}", + info.protocol_version + ); - SwarmEvent::Behaviour(NetworkEvent::Mdns(mdns::Event::Discovered(peers))) => { - for (peer_id, addr) in peers { - debug!("Discovered peer {peer_id} at {addr}"); - swarm.behaviour_mut().gossipsub.add_explicit_peer(&peer_id); + if info.protocol_version == PROTOCOL_VERSION { + trace!( + "Connecting to peer {peer_id} using protocol {:?}", + info.protocol_version + ); - // if let Err(e) = tx_event.send(HandleEvent::PeerConnected(peer_id)).await { - // error!("Error sending peer connected event to handle: {e}"); - // return ControlFlow::Break(()); - // } - } - } + state.peers.insert(peer_id, info); - SwarmEvent::Behaviour(NetworkEvent::Mdns(mdns::Event::Expired(peers))) => { - for (peer_id, _addr) in peers { - debug!("Expired peer: {peer_id}"); - swarm - .behaviour_mut() - .gossipsub - .remove_explicit_peer(&peer_id); - - // if let Err(e) = tx_event.send(HandleEvent::PeerDisconnected(peer_id)).await { - // error!("Error sending peer disconnected event to handle: {e}"); - // return ControlFlow::Break(()); - // } + swarm.behaviour_mut().gossipsub.add_explicit_peer(&peer_id); + } else { + trace!( + "Peer {peer_id} is using incompatible protocol version: {:?}", + info.protocol_version + ); } } @@ -232,11 +253,12 @@ async fn handle_swarm_event( topic: topic_hash, })) => { if !Channel::has_topic(&topic_hash) { - debug!("Peer {peer_id} tried to subscribe to unknown topic: {topic_hash}"); + trace!("Peer {peer_id} tried to subscribe to unknown topic: {topic_hash}"); + return ControlFlow::Continue(()); } - debug!("Peer {peer_id} subscribed to {topic_hash}"); + trace!("Peer {peer_id} subscribed to {topic_hash}"); if let Err(e) = tx_event.send(Event::PeerConnected(peer_id)).await { error!("Error sending peer connected event to handle: {e}"); @@ -250,7 +272,7 @@ async fn handle_swarm_event( message, })) => { let Some(channel) = Channel::from_topic_hash(&message.topic) else { - debug!( + trace!( "Received message {message_id} from {peer_id} on different channel: {}", message.topic ); @@ -258,7 +280,7 @@ async fn handle_swarm_event( return ControlFlow::Continue(()); }; - debug!( + trace!( "Received message {message_id} from {peer_id} on channel {} of {} bytes", channel, message.data.len() diff --git a/code/itf/Cargo.toml b/code/crates/itf/Cargo.toml similarity index 100% rename from code/itf/Cargo.toml rename to code/crates/itf/Cargo.toml diff --git a/code/itf/src/consensus.rs b/code/crates/itf/src/consensus.rs similarity index 100% rename from code/itf/src/consensus.rs rename to code/crates/itf/src/consensus.rs diff --git a/code/itf/src/deserializers.rs b/code/crates/itf/src/deserializers.rs similarity index 100% rename from code/itf/src/deserializers.rs rename to code/crates/itf/src/deserializers.rs diff --git a/code/itf/src/lib.rs b/code/crates/itf/src/lib.rs similarity index 100% rename from code/itf/src/lib.rs rename to code/crates/itf/src/lib.rs diff --git a/code/itf/src/types.rs b/code/crates/itf/src/types.rs similarity index 100% rename from code/itf/src/types.rs rename to code/crates/itf/src/types.rs diff --git a/code/itf/src/utils.rs b/code/crates/itf/src/utils.rs similarity index 96% rename from code/itf/src/utils.rs rename to code/crates/itf/src/utils.rs index eb59929a5..5f6f70244 100644 --- a/code/itf/src/utils.rs +++ b/code/crates/itf/src/utils.rs @@ -5,7 +5,7 @@ pub fn generate_traces(spec_rel_path: &str, gen_dir: &str, quint_seed: u64) { println!("🪄 Generating traces for {spec_rel_path:?}..."); let spec_abs_path = format!( - "{}/../../specs/quint/{}", + "{}/../../../specs/quint/{}", env!("CARGO_MANIFEST_DIR"), spec_rel_path ); diff --git a/code/itf/src/votekeeper.rs b/code/crates/itf/src/votekeeper.rs similarity index 100% rename from code/itf/src/votekeeper.rs rename to code/crates/itf/src/votekeeper.rs diff --git a/code/itf/tests/consensus.rs b/code/crates/itf/tests/consensus.rs similarity index 100% rename from code/itf/tests/consensus.rs rename to code/crates/itf/tests/consensus.rs diff --git a/code/itf/tests/consensus/runner.rs b/code/crates/itf/tests/consensus/runner.rs similarity index 100% rename from code/itf/tests/consensus/runner.rs rename to code/crates/itf/tests/consensus/runner.rs diff --git a/code/itf/tests/consensus/utils.rs b/code/crates/itf/tests/consensus/utils.rs similarity index 100% rename from code/itf/tests/consensus/utils.rs rename to code/crates/itf/tests/consensus/utils.rs diff --git a/code/itf/tests/votekeeper.rs b/code/crates/itf/tests/votekeeper.rs similarity index 100% rename from code/itf/tests/votekeeper.rs rename to code/crates/itf/tests/votekeeper.rs diff --git a/code/itf/tests/votekeeper/runner.rs b/code/crates/itf/tests/votekeeper/runner.rs similarity index 100% rename from code/itf/tests/votekeeper/runner.rs rename to code/crates/itf/tests/votekeeper/runner.rs diff --git a/code/itf/tests/votekeeper/utils.rs b/code/crates/itf/tests/votekeeper/utils.rs similarity index 100% rename from code/itf/tests/votekeeper/utils.rs rename to code/crates/itf/tests/votekeeper/utils.rs diff --git a/code/crates/metrics/Cargo.toml b/code/crates/metrics/Cargo.toml new file mode 100644 index 000000000..e4cc2adab --- /dev/null +++ b/code/crates/metrics/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "malachite-metrics" +version.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true +rust-version.workspace = true + +[dependencies] +prometheus-client.workspace = true + +[lints] +workspace = true diff --git a/code/crates/metrics/src/lib.rs b/code/crates/metrics/src/lib.rs new file mode 100644 index 000000000..ec3f1b565 --- /dev/null +++ b/code/crates/metrics/src/lib.rs @@ -0,0 +1,42 @@ +use std::sync::{Arc, Mutex, OnceLock}; + +pub use prometheus_client::metrics::counter::Counter; +pub use prometheus_client::metrics::family::Family; +pub use prometheus_client::metrics::gauge::Gauge; +pub use prometheus_client::metrics::histogram::{linear_buckets, Histogram}; +pub use prometheus_client::registry::Registry; + +#[derive(Clone)] +pub struct SharedRegistry(Arc>); + +impl SharedRegistry { + pub fn new(registry: Registry) -> Self { + Self(Arc::new(Mutex::new(registry))) + } + + pub fn global() -> &'static Self { + global_registry() + } + + pub fn lock(&self) -> std::sync::MutexGuard<'_, Registry> { + self.0.lock().unwrap() + } + + pub fn with(&self, f: impl FnOnce(&mut Registry) -> A) -> A { + f(&mut self.lock()) + } + + pub fn with_prefix(&self, prefix: impl AsRef, f: impl FnOnce(&mut Registry) -> A) -> A { + f(self.lock().sub_registry_with_prefix(prefix)) + } +} + +fn global_registry() -> &'static SharedRegistry { + static REGISTRY: OnceLock = OnceLock::new(); + REGISTRY.get_or_init(|| SharedRegistry::new(Registry::default())) +} +pub fn export(writer: &mut W) { + use prometheus_client::encoding::text::encode; + + SharedRegistry::global().with(|registry| encode(writer, registry).unwrap()) +} diff --git a/code/node/Cargo.toml b/code/crates/node/Cargo.toml similarity index 62% rename from code/node/Cargo.toml rename to code/crates/node/Cargo.toml index c6052367f..81df4e032 100644 --- a/code/node/Cargo.toml +++ b/code/crates/node/Cargo.toml @@ -13,13 +13,11 @@ workspace = true [dependencies] malachite-common.workspace = true -malachite-network.workspace = true -malachite-test.workspace = true -async-trait = { workspace = true } -derive-where = { workspace = true } -ed25519-consensus = { workspace = true, features = ["serde"] } +bytesize = { workspace = true, features = ["serde"] } humantime-serde = { workspace = true } multiaddr = { workspace = true } -tokio = { workspace = true, features = ["full"] } serde = { workspace = true, features = ["derive"] } + +[dev-dependencies] +toml.workspace = true diff --git a/code/node/src/config.rs b/code/crates/node/src/config.rs similarity index 53% rename from code/node/src/config.rs rename to code/crates/node/src/config.rs index 3f07560d9..3c96ac825 100644 --- a/code/node/src/config.rs +++ b/code/crates/node/src/config.rs @@ -1,22 +1,36 @@ +use std::net::SocketAddr; use std::time::Duration; +use bytesize::ByteSize; use malachite_common::TimeoutStep; use multiaddr::Multiaddr; use serde::{Deserialize, Serialize}; /// Malachite configuration options -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Config { - /// A custom human readable name for this node + /// A custom human-readable name for this node pub moniker: String, - /// P2P configuration options - pub p2p: P2pConfig, + /// Consensus configuration options pub consensus: ConsensusConfig, + + /// Mempool configuration options + pub mempool: MempoolConfig, + + /// Metrics configuration options + pub metrics: MetricsConfig, + + /// Runtime configuration options + pub runtime: RuntimeConfig, + + /// Test configuration + #[serde(default)] + pub test: TestConfig, } /// P2P configuration options -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct P2pConfig { // Address to listen for incoming connections pub listen_addr: Multiaddr, @@ -24,14 +38,35 @@ pub struct P2pConfig { pub persistent_peers: Vec, } +/// Mempool configuration options +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct MempoolConfig { + /// P2P configuration options + pub p2p: P2pConfig, + + /// Maximum number of transactions + pub max_tx_count: usize, + + /// Maximum number of transactions to gossip at once in a batch + pub gossip_batch_size: usize, +} + /// Consensus configuration options -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct ConsensusConfig { + /// Max block size + pub max_block_size: ByteSize, + + /// Timeouts #[serde(flatten)] pub timeouts: TimeoutConfig, + + /// P2P configuration options + pub p2p: P2pConfig, } -#[derive(Copy, Clone, Debug, Serialize, Deserialize)] +/// Timeouts +#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct TimeoutConfig { /// How long we wait for a proposal block before prevoting nil #[serde(with = "humantime_serde")] @@ -97,3 +132,69 @@ impl Default for TimeoutConfig { } } } + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct MetricsConfig { + /// Enable the metrics server + pub enabled: bool, + + /// Address at which to serve the metrics at + pub listen_addr: SocketAddr, +} + +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "flavor", rename_all = "snake_case")] +pub enum RuntimeConfig { + /// Single-threaded runtime + #[default] + SingleThreaded, + + /// Multi-threaded runtime + MultiThreaded { + /// Number of worker threads + worker_threads: usize, + }, +} + +impl RuntimeConfig { + pub fn single_threaded() -> Self { + Self::SingleThreaded + } + + pub fn multi_threaded(worker_threads: usize) -> Self { + Self::MultiThreaded { worker_threads } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TestConfig { + pub tx_size: ByteSize, + pub txs_per_part: usize, + pub time_allowance_factor: f32, + #[serde(with = "humantime_serde")] + pub exec_time_per_tx: Duration, +} + +impl Default for TestConfig { + fn default() -> Self { + Self { + tx_size: ByteSize::b(256), + txs_per_part: 200, + time_allowance_factor: 0.7, + exec_time_per_tx: Duration::from_millis(1), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_default_config_file() { + let file = include_str!("../../../config.toml"); + let config = toml::from_str::(file).unwrap(); + assert_eq!(config.consensus.timeouts, TimeoutConfig::default()); + assert_eq!(config.test, TestConfig::default()); + } +} diff --git a/code/node/src/lib.rs b/code/crates/node/src/lib.rs similarity index 58% rename from code/node/src/lib.rs rename to code/crates/node/src/lib.rs index e51744f8b..2f9607846 100644 --- a/code/node/src/lib.rs +++ b/code/crates/node/src/lib.rs @@ -1,3 +1,5 @@ +// For coverage on nightly +#![allow(unexpected_cfgs)] #![cfg_attr(coverage_nightly, feature(coverage_attribute))] pub mod config; diff --git a/code/proto/Cargo.toml b/code/crates/proto/Cargo.toml similarity index 100% rename from code/proto/Cargo.toml rename to code/crates/proto/Cargo.toml diff --git a/code/proto/build.rs b/code/crates/proto/build.rs similarity index 67% rename from code/proto/build.rs rename to code/crates/proto/build.rs index c02bf2f34..fff6a63c7 100644 --- a/code/proto/build.rs +++ b/code/crates/proto/build.rs @@ -3,7 +3,7 @@ use std::io::Result; fn main() -> Result<()> { let mut config = prost_build::Config::new(); config.enable_type_names(); - config.compile_protos(&["src/malachite.proto"], &["src/"])?; + config.compile_protos(&["proto/malachite.proto"], &["proto"])?; Ok(()) } diff --git a/code/proto/src/malachite.proto b/code/crates/proto/proto/malachite.proto similarity index 58% rename from code/proto/src/malachite.proto rename to code/crates/proto/proto/malachite.proto index 8553e9c27..74c7884d0 100644 --- a/code/proto/src/malachite.proto +++ b/code/crates/proto/proto/malachite.proto @@ -2,6 +2,8 @@ syntax = "proto3"; package malachite; +import "google/protobuf/any.proto"; + message Height { uint64 value = 1; } @@ -11,7 +13,7 @@ message Address { } message Value { - optional bytes value = 2; + optional bytes value = 1; } message ValueId { @@ -53,3 +55,28 @@ message SignedProposal { bytes signature = 2; } +message Transaction { + optional bytes value = 1; +} + +message TransactionBatch { + repeated Transaction transactions = 1; +} + +message MempoolTransactionBatch { + TransactionBatch transaction_batch = 1; +} + +message BlockPart { + Height height = 1; + Round round = 2; + uint64 sequence = 3; + Address validator_address = 4; + google.protobuf.Any content = 5; +} + +message SignedBlockPart { + BlockPart block_part = 1; + bytes signature = 2; +} + diff --git a/code/crates/proto/src/impls.rs b/code/crates/proto/src/impls.rs new file mode 100644 index 000000000..360f8be8d --- /dev/null +++ b/code/crates/proto/src/impls.rs @@ -0,0 +1,175 @@ +use malachite_common::{ + Context, MempoolTransactionBatch, Round, SignedBlockPart, SignedProposal, SignedVote, + SigningScheme, Transaction, TransactionBatch, VoteType, +}; + +use crate::{self as proto, Error, Protobuf}; + +impl Protobuf for Round { + type Proto = proto::Round; + + fn from_proto(proto: Self::Proto) -> Result { + Ok(Round::new(proto.round)) + } + + fn to_proto(&self) -> Result { + Ok(proto::Round { + round: self.as_i64(), + }) + } +} + +impl Protobuf for SignedVote +where + Ctx::Vote: Protobuf, +{ + type Proto = proto::SignedVote; + + fn from_proto(proto: Self::Proto) -> Result { + let vote = proto + .vote + .ok_or_else(|| Error::missing_field::("vote"))?; + + Ok(Self { + vote: Ctx::Vote::from_proto(vote)?, + signature: Ctx::SigningScheme::decode_signature(&proto.signature) + .map_err(|e| Error::Other(format!("Failed to decode signature: {e}")))?, + }) + } + + fn to_proto(&self) -> Result { + Ok(proto::SignedVote { + vote: Some(self.vote.to_proto()?), + signature: Ctx::SigningScheme::encode_signature(&self.signature), + }) + } +} + +impl Protobuf for SignedBlockPart +where + Ctx::BlockPart: Protobuf, +{ + type Proto = proto::SignedBlockPart; + + fn from_proto(proto: Self::Proto) -> Result { + let block_part = proto + .block_part + .ok_or_else(|| Error::missing_field::("block_part"))?; + + Ok(Self { + block_part: Ctx::BlockPart::from_proto(block_part)?, + signature: Ctx::SigningScheme::decode_signature(&proto.signature) + .map_err(|e| Error::Other(format!("Failed to decode signature: {e}")))?, + }) + } + + fn to_proto(&self) -> Result { + Ok(proto::SignedBlockPart { + block_part: Some(self.block_part.to_proto()?), + signature: Ctx::SigningScheme::encode_signature(&self.signature), + }) + } +} + +impl From for VoteType { + fn from(vote_type: proto::VoteType) -> Self { + match vote_type { + proto::VoteType::Prevote => VoteType::Prevote, + proto::VoteType::Precommit => VoteType::Precommit, + } + } +} + +impl From for proto::VoteType { + fn from(vote_type: VoteType) -> proto::VoteType { + match vote_type { + VoteType::Prevote => proto::VoteType::Prevote, + VoteType::Precommit => proto::VoteType::Precommit, + } + } +} + +impl Protobuf for SignedProposal +where + Ctx::Proposal: Protobuf, +{ + type Proto = proto::SignedProposal; + + fn from_proto(proto: Self::Proto) -> Result { + let proposal = proto + .proposal + .ok_or_else(|| Error::Other("Missing field `proposal`".to_string()))?; + + Ok(Self { + proposal: Ctx::Proposal::from_proto(proposal)?, + signature: Ctx::SigningScheme::decode_signature(&proto.signature) + .map_err(|e| Error::Other(format!("Failed to decode signature: {e}")))?, + }) + } + + fn to_proto(&self) -> Result { + Ok(proto::SignedProposal { + proposal: Some(self.proposal.to_proto()?), + signature: Ctx::SigningScheme::encode_signature(&self.signature), + }) + } +} + +impl Protobuf for Transaction { + type Proto = proto::Transaction; + + fn from_proto(proto: Self::Proto) -> Result { + let tx = proto + .value + .ok_or_else(|| Error::Other("Missing field `value`".to_string()))?; + + Ok(Self::new(tx)) + } + + fn to_proto(&self) -> Result { + let value = self.to_bytes(); + Ok(proto::Transaction { value: Some(value) }) + } +} + +impl Protobuf for TransactionBatch { + type Proto = proto::TransactionBatch; + + fn from_proto(proto: Self::Proto) -> Result { + Ok(TransactionBatch::new( + proto + .transactions + .into_iter() + .map(Transaction::from_proto) + .collect::>()?, + )) + } + + fn to_proto(&self) -> Result { + Ok(proto::TransactionBatch { + transactions: self + .transactions() + .iter() + .map(|t| t.to_proto()) + .collect::>()?, + }) + } +} + +impl Protobuf for MempoolTransactionBatch { + type Proto = proto::MempoolTransactionBatch; + + fn from_proto(proto: Self::Proto) -> Result { + Ok(MempoolTransactionBatch::new(TransactionBatch::from_proto( + proto + .transaction_batch + .ok_or_else(|| proto::Error::missing_field::("content"))?, + )?)) + } + + fn to_proto(&self) -> Result { + Ok(proto::MempoolTransactionBatch { + transaction_batch: Some(self.transaction_batch.to_proto()?), + }) + } +} diff --git a/code/proto/src/lib.rs b/code/crates/proto/src/lib.rs similarity index 82% rename from code/proto/src/lib.rs rename to code/crates/proto/src/lib.rs index 7100a2db8..a0a911121 100644 --- a/code/proto/src/lib.rs +++ b/code/crates/proto/src/lib.rs @@ -1,5 +1,6 @@ use std::convert::Infallible; +use prost_types::Any; use thiserror::Error; use prost::{DecodeError, EncodeError, Message}; @@ -65,4 +66,18 @@ pub trait Protobuf: Sized { let proto = self.to_proto()?; Ok(proto.encode_to_vec()) } + + fn from_any(any: &Any) -> Result + where + Self::Proto: prost::Name, + { + Self::from_proto(any.to_msg::()?) + } + + fn to_any(&self) -> Result + where + Self::Proto: prost::Name, + { + Ok(Any::from_msg(&self.to_proto()?)?) + } } diff --git a/code/round/Cargo.toml b/code/crates/round/Cargo.toml similarity index 100% rename from code/round/Cargo.toml rename to code/crates/round/Cargo.toml diff --git a/code/round/src/input.rs b/code/crates/round/src/input.rs similarity index 100% rename from code/round/src/input.rs rename to code/crates/round/src/input.rs diff --git a/code/round/src/lib.rs b/code/crates/round/src/lib.rs similarity index 90% rename from code/round/src/lib.rs rename to code/crates/round/src/lib.rs index d8f8cebb6..99efbe12e 100644 --- a/code/round/src/lib.rs +++ b/code/crates/round/src/lib.rs @@ -9,6 +9,8 @@ rustdoc::private_intra_doc_links, variant_size_differences )] +// For coverage on nightly +#![allow(unexpected_cfgs)] #![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::panic))] #![cfg_attr(coverage_nightly, feature(coverage_attribute))] diff --git a/code/round/src/output.rs b/code/crates/round/src/output.rs similarity index 100% rename from code/round/src/output.rs rename to code/crates/round/src/output.rs diff --git a/code/round/src/state.rs b/code/crates/round/src/state.rs similarity index 100% rename from code/round/src/state.rs rename to code/crates/round/src/state.rs diff --git a/code/round/src/state_machine.rs b/code/crates/round/src/state_machine.rs similarity index 100% rename from code/round/src/state_machine.rs rename to code/crates/round/src/state_machine.rs diff --git a/code/round/src/transition.rs b/code/crates/round/src/transition.rs similarity index 100% rename from code/round/src/transition.rs rename to code/crates/round/src/transition.rs diff --git a/code/crates/test-app/Cargo.toml b/code/crates/test-app/Cargo.toml new file mode 100644 index 000000000..ff4fb7fbf --- /dev/null +++ b/code/crates/test-app/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "malachite-test-app" +version.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true +rust-version.workspace = true + +[dependencies] +malachite-actors.workspace = true +malachite-common.workspace = true +malachite-driver.workspace = true +malachite-node.workspace = true +malachite-metrics.workspace = true +malachite-gossip-consensus.workspace = true +malachite-gossip-mempool.workspace = true +malachite-test.workspace = true +malachite-proto.workspace = true + +async-trait.workspace = true +bytesize.workspace = true +derive-where.workspace = true +tokio.workspace = true +tracing.workspace = true + +[dev-dependencies] +malachite-test = { workspace = true } +rand = { workspace = true } +rand_chacha = { workspace = true } +tracing-subscriber = { workspace = true, features = ["fmt", "env-filter"] } + +[lints] +# workspace = true diff --git a/code/crates/test-app/src/lib.rs b/code/crates/test-app/src/lib.rs new file mode 100644 index 000000000..cd557d281 --- /dev/null +++ b/code/crates/test-app/src/lib.rs @@ -0,0 +1,3 @@ +pub mod part_store; +pub mod spawn; +pub mod value_builder; diff --git a/code/crates/test-app/src/part_store.rs b/code/crates/test-app/src/part_store.rs new file mode 100644 index 000000000..c4fc03c5d --- /dev/null +++ b/code/crates/test-app/src/part_store.rs @@ -0,0 +1,66 @@ +use std::collections::BTreeMap; + +use derive_where::derive_where; + +use malachite_common::{Context, Round}; + +// This is a temporary store implementation for block parts +// +// TODO-s: +// - [x] make it context generic +// - [ ] add Address to key +// note: not sure if this is required as consensus should verify that only the parts signed by the proposer for +// the height and round should be forwarded here (see the TODOs in consensus) +#[derive_where(Clone, Debug, PartialEq, Eq)] +pub struct PartStore { + pub map: BTreeMap<(Ctx::Height, Round, u64), Ctx::BlockPart>, +} + +impl Default for PartStore { + fn default() -> Self { + Self::new() + } +} + +impl PartStore { + pub fn new() -> Self { + Self { + map: BTreeMap::new(), + } + } + + pub fn get(&self, height: Ctx::Height, round: Round, sequence: u64) -> Option<&Ctx::BlockPart> { + self.map.get(&(height, round, sequence)) + } + + // Get all parts for a given height and round, sorted in sequence ascending order. + pub fn all_parts(&self, height: Ctx::Height, round: Round) -> Vec<&Ctx::BlockPart> { + use malachite_common::BlockPart; + + let mut block_parts: Vec<_> = self + .map + .iter() + .filter(|((h, r, _), _)| *h == height && *r == round) + .map(|(_, b)| b) + .collect(); + + block_parts.sort_by_key(|b| b.sequence()); + block_parts + } + + pub fn store(&mut self, block_part: Ctx::BlockPart) { + use malachite_common::BlockPart; + + let height = block_part.height(); + let round = block_part.round(); + let sequence = block_part.sequence(); + + self.map + .entry((height, round, sequence)) + .or_insert(block_part); + } + + pub fn prune(&mut self, min_height: Ctx::Height) { + self.map.retain(|(height, _, _), _| *height >= min_height); + } +} diff --git a/code/crates/test-app/src/spawn.rs b/code/crates/test-app/src/spawn.rs new file mode 100644 index 000000000..1aa305f2c --- /dev/null +++ b/code/crates/test-app/src/spawn.rs @@ -0,0 +1,193 @@ +use std::time::Duration; + +use tokio::sync::mpsc; +use tokio::task::JoinHandle; + +use malachite_actors::consensus::{Consensus, ConsensusParams, ConsensusRef, Metrics}; +use malachite_actors::gossip_consensus::{GossipConsensus, GossipConsensusRef}; +use malachite_actors::gossip_mempool::{GossipMempool, GossipMempoolRef}; +use malachite_actors::host::{Host, HostRef}; +use malachite_actors::mempool::{Mempool, MempoolRef}; +use malachite_actors::node::{Node, NodeRef}; +use malachite_common::Round; +use malachite_gossip_consensus::{Config as GossipConsensusConfig, Keypair}; +use malachite_gossip_mempool::Config as GossipMempoolConfig; +use malachite_metrics::SharedRegistry; +use malachite_node::config::{Config as NodeConfig, MempoolConfig, TestConfig}; +use malachite_test::{Address, Height, PrivateKey, TestContext, ValidatorSet, Value}; + +use crate::part_store::PartStore; +use crate::value_builder::{TestParams as TestValueBuilderParams, TestValueBuilder}; + +pub async fn spawn_node_actor( + cfg: NodeConfig, + initial_validator_set: ValidatorSet, + validator_pk: PrivateKey, + node_pk: PrivateKey, + address: Address, + tx_decision: mpsc::Sender<(Height, Round, Value)>, +) -> (NodeRef, JoinHandle<()>) { + let ctx = TestContext::new(validator_pk.clone()); + + let registry = SharedRegistry::global(); + let metrics = Metrics::register(registry); + + // Spawn mempool and its gossip layer + let gossip_mempool = spawn_gossip_mempool_actor(&cfg, node_pk, registry).await; + let mempool = spawn_mempool_actor(gossip_mempool.clone(), &cfg.mempool, &cfg.test).await; + + // Configure the value builder + let value_builder = make_test_value_builder(mempool.clone(), metrics.clone(), &cfg); + + // Spawn the host actor + let host = spawn_host_actor(value_builder, &initial_validator_set).await; + + // Spawn consensus and its gossip + let gossip_consensus = spawn_gossip_consensus_actor(&cfg, validator_pk, registry).await; + + let start_height = Height::new(1); + + let consensus = spawn_consensus_actor( + start_height, + initial_validator_set, + address, + ctx.clone(), + cfg, + gossip_consensus.clone(), + host.clone(), + metrics, + tx_decision, + ) + .await; + + // Spawn the node actor + let node = Node::new( + ctx, + gossip_consensus, + consensus, + gossip_mempool, + mempool, + host, + start_height, + ); + + let (actor_ref, handle) = node.spawn().await.unwrap(); + + (actor_ref, handle) +} + +#[allow(clippy::too_many_arguments)] +async fn spawn_consensus_actor( + start_height: Height, + initial_validator_set: ValidatorSet, + address: Address, + ctx: TestContext, + cfg: NodeConfig, + gossip_consensus: GossipConsensusRef, + host: HostRef, + metrics: Metrics, + tx_decision: mpsc::Sender<(Height, Round, Value)>, +) -> ConsensusRef { + let consensus_params = ConsensusParams { + start_height, + initial_validator_set, + address, + threshold_params: Default::default(), + }; + + Consensus::spawn( + ctx, + consensus_params, + cfg.consensus.timeouts, + gossip_consensus, + host, + metrics, + tx_decision, + None, + ) + .await + .unwrap() +} + +async fn spawn_gossip_consensus_actor( + cfg: &NodeConfig, + validator_pk: PrivateKey, + registry: &SharedRegistry, +) -> GossipConsensusRef { + let config_gossip = GossipConsensusConfig { + listen_addr: cfg.consensus.p2p.listen_addr.clone(), + persistent_peers: cfg.consensus.p2p.persistent_peers.clone(), + idle_connection_timeout: Duration::from_secs(60), + }; + + let validator_keypair = Keypair::ed25519_from_bytes(validator_pk.inner().to_bytes()).unwrap(); + + GossipConsensus::spawn( + validator_keypair.clone(), + config_gossip, + registry.clone(), + None, + ) + .await + .unwrap() +} + +async fn spawn_mempool_actor( + gossip_mempool: GossipMempoolRef, + mempool_config: &MempoolConfig, + test_config: &TestConfig, +) -> MempoolRef { + Mempool::spawn(gossip_mempool, mempool_config, test_config, None) + .await + .unwrap() +} + +async fn spawn_gossip_mempool_actor( + cfg: &NodeConfig, + node_pk: PrivateKey, + registry: &SharedRegistry, +) -> GossipMempoolRef { + let config_gossip_mempool = GossipMempoolConfig { + listen_addr: cfg.mempool.p2p.listen_addr.clone(), + persistent_peers: cfg.mempool.p2p.persistent_peers.clone(), + idle_connection_timeout: Duration::from_secs(60), + }; + + let node_keypair = Keypair::ed25519_from_bytes(node_pk.inner().to_bytes()).unwrap(); + + GossipMempool::spawn( + node_keypair.clone(), + config_gossip_mempool, + registry.clone(), + None, + ) + .await + .unwrap() +} + +async fn spawn_host_actor( + value_builder: TestValueBuilder, + initial_validator_set: &ValidatorSet, +) -> HostRef { + Host::spawn(Box::new(value_builder), initial_validator_set.clone()) + .await + .unwrap() +} + +fn make_test_value_builder( + mempool: MempoolRef, + metrics: Metrics, + cfg: &NodeConfig, +) -> TestValueBuilder { + let params = TestValueBuilderParams { + max_block_size: cfg.consensus.max_block_size, + tx_size: cfg.test.tx_size, + txs_per_part: cfg.test.txs_per_part, + time_allowance_factor: cfg.test.time_allowance_factor, + exec_time_per_tx: cfg.test.exec_time_per_tx, + }; + + let part_store = PartStore::new(); + + TestValueBuilder::new(mempool, params, part_store, metrics) +} diff --git a/code/crates/test-app/src/value_builder.rs b/code/crates/test-app/src/value_builder.rs new file mode 100644 index 000000000..8e9fdb654 --- /dev/null +++ b/code/crates/test-app/src/value_builder.rs @@ -0,0 +1,367 @@ +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::marker::PhantomData; +use std::time::{Duration, Instant}; + +use async_trait::async_trait; +use bytesize::ByteSize; +use tracing::{debug, error, info, trace}; + +use malachite_actors::consensus::Metrics; +use malachite_actors::consensus::{ConsensusRef, Msg as ConsensusMsg}; +use malachite_actors::host::{LocallyProposedValue, ReceivedProposedValue}; +use malachite_actors::mempool::{MempoolRef, Msg as MempoolMsg}; +use malachite_actors::value_builder::ValueBuilder; +use malachite_common::{Context, Round, SignedVote, Transaction, TransactionBatch}; +use malachite_driver::Validity; +use malachite_test::{Address, BlockMetadata, BlockPart, Content, Height, TestContext, Value}; + +use crate::part_store::PartStore; + +#[derive(Copy, Clone, Debug)] +pub struct TestParams { + pub max_block_size: ByteSize, + pub tx_size: ByteSize, + pub txs_per_part: usize, + pub time_allowance_factor: f32, + pub exec_time_per_tx: Duration, +} + +pub struct TestValueBuilder { + tx_streamer: MempoolRef, + params: TestParams, + part_store: PartStore, + metrics: Metrics, + _phantom: PhantomData, +} + +impl TestValueBuilder +where + Ctx: Context, +{ + pub fn new( + tx_streamer: MempoolRef, + params: TestParams, + part_store: PartStore, + metrics: Metrics, + ) -> Self { + Self { + tx_streamer, + params, + metrics, + part_store, + _phantom: PhantomData, + } + } +} + +#[async_trait] +impl ValueBuilder for TestValueBuilder { + #[tracing::instrument( + name = "value_builder.locally", + skip_all, + fields( + height = %height, + round = %round, + ) + )] + async fn build_value_locally( + &mut self, + height: Height, + round: Round, + timeout_duration: Duration, + validator_address: Address, + consensus: ConsensusRef, + ) -> Option> { + let start = Instant::now(); + let deadline = start + timeout_duration.mul_f32(self.params.time_allowance_factor); + let expiration_time = start + timeout_duration; + + let mut tx_batch = vec![]; + let mut sequence = 1; + let mut block_size = 0; + let mut max_block_size_reached = false; + + loop { + trace!( + "Build local value for h:{}, r:{}, s:{}", + height, + round, + sequence + ); + + let mut txes = self + .tx_streamer + .call( + |reply| MempoolMsg::TxStream { + height: height.as_u64(), + num_txes: self.params.txs_per_part, + reply, + }, + None, + ) // TODO timeout + .await + .ok()? + .unwrap(); + + if txes.is_empty() { + return None; + } + + let mut tx_count = 0; + + 'inner: for tx in &txes { + if block_size + tx.size_bytes() > self.params.max_block_size.as_u64() as usize { + max_block_size_reached = true; + break 'inner; + } + + block_size += tx.size_bytes(); + tx_batch.push(tx.clone()); + tx_count += 1; + } + + // Trim the tx batch so it does not overflow the block. + txes = txes.into_iter().take(tx_count).collect(); + + // Create, store and gossip the batch in a BlockPart + let block_part = BlockPart::new( + height, + round, + sequence, + validator_address, + Content::TxBatch(TransactionBatch::new(txes.clone())), + ); + + self.part_store.store(block_part.clone()); + + consensus + .cast(ConsensusMsg::BuilderBlockPart(block_part)) + .unwrap(); + + // Simulate execution of reaped txes + let exec_time = self.params.exec_time_per_tx * tx_count as u32; + trace!("Simulating tx execution for {tx_count} tx-es, sleeping for {exec_time:?}"); + tokio::time::sleep(exec_time).await; + + if Instant::now() > expiration_time { + error!( + "Value Builder failed to complete in given interval ({timeout_duration:?}), took {:?}", + Instant::now() - start, + ); + + return None; + } + + sequence += 1; + + if Instant::now() > deadline || max_block_size_reached { + if max_block_size_reached { + debug!( + "Value Builder stopped streaming Tx-es due to max block size being reached" + ); + } else { + debug!("Value Builder stopped streaming Tx-es due to deadline being reached"); + } + + // Create, store and gossip the BlockMetadata in a BlockPart + let value = Value::new_from_transactions(&tx_batch); + + let result = Some(LocallyProposedValue { + height, + round, + value, + }); + + let block_part = BlockPart::new( + height, + round, + sequence, + validator_address, + Content::Metadata(BlockMetadata::new(vec![], value)), + ); + + self.part_store.store(block_part.clone()); + + consensus + .cast(ConsensusMsg::BuilderBlockPart(block_part)) + .unwrap(); + + info!( + "Value Builder created a block with {} tx-es of size {} in {:?} with hash {:?}, disseminated in {} block parts ", + tx_batch.len(), + ByteSize::b(block_size as u64), + Instant::now() - start, + value.id(), + sequence, + ); + + return result; + } + } + } + + #[tracing::instrument( + name = "value_builder.from_block_parts", + skip_all, + fields( + height = %block_part.height, + round = %block_part.round, + sequence = %block_part.sequence + ) + )] + async fn build_value_from_block_parts( + &mut self, + block_part: BlockPart, + ) -> Option> { + let height = block_part.height; + let round = block_part.round; + let sequence = block_part.sequence; + + self.part_store.store(block_part.clone()); + let all_parts = self.part_store.all_parts(height, round); + + trace!(%height, %round, %sequence, "Received block part"); + + // Simulate Tx execution and proof verification (assumes success) + // TODO - add config knob for invalid blocks + let num_txes = block_part.content.tx_count().unwrap_or(0) as u32; + tokio::time::sleep(self.params.exec_time_per_tx * num_txes).await; + + // Get the "last" part, the one with highest sequence. + // Block parts may not be received in order. + let highest_sequence = all_parts.len() as u64; + + if let Some(last_part) = self.part_store.get(height, round, highest_sequence) { + // Check if the part with the highest sequence number had metadata content. + // TODO - do more validations, e.g. there is no higher tx block part. + match last_part.metadata() { + // All block parts should have been received, including the metadata that has + // the block hash/ value. + Some(meta) => { + // Compute the block size. + let block_size: usize = all_parts.iter().map(|p| p.size_bytes()).sum(); + + // Compute the number of transactions in the block. + let tx_count: usize = all_parts + .iter() + .map(|p| p.content.tx_count().unwrap_or(0)) + .sum(); + + let received_value = meta.value(); + + // Compute the expected block hash/ value from the block parts. + let all_txes: Vec = all_parts + .iter() + .flat_map(|p| match p.content.as_ref() { + Content::TxBatch(tx_batch) => { + info!("get txes from received part {}", p.sequence); + tx_batch.transactions().to_vec() + } + Content::Metadata(_) => { + vec![] + } + }) + .collect(); + let expected_value = Value::new_from_transactions(&all_txes); + + info!( + height = %last_part.height, + round = %last_part.round, + tx_count = %tx_count, + block_size = %block_size, + num_parts = %all_parts.len(), + "Value Builder received last block part", + ); + + let valid = if received_value != expected_value { + error!( + "Invalid block received with value {:?}, expected {:?}", + received_value, expected_value + ); + Validity::Invalid + } else { + Validity::Valid + }; + + Some(ReceivedProposedValue { + validator_address: last_part.validator_address, + height: last_part.height, + round: last_part.round, + value: meta.value(), + valid, + }) + } + None => None, + } + } else { + None + } + } + + async fn maybe_received_value( + &mut self, + height: Height, + round: Round, + ) -> Option> { + let block_parts = self.part_store.all_parts(height, round); + let num_parts = block_parts.len(); + let last_part = block_parts[num_parts - 1]; + + last_part.metadata().map(|metadata| ReceivedProposedValue { + validator_address: last_part.validator_address, + height, + round, + value: metadata.value(), + valid: Validity::Valid, + }) + } + + #[tracing::instrument( + name = "value_builder.decided", + skip_all, + fields( + height = %height, + round = %round, + ) + )] + async fn decided_on_value( + &mut self, + height: Height, + round: Round, + value: Value, + _commits: Vec>, + ) { + info!("Build and store block with hash {value:?}"); + + let all_parts = self.part_store.all_parts(height, round); + + // TODO - build the block from block parts and commits and store it + + // Update metrics + let block_size: usize = all_parts.iter().map(|p| p.size_bytes()).sum(); + let tx_count: usize = all_parts + .iter() + .map(|p| p.content.tx_count().unwrap_or(0)) + .sum(); + + self.metrics.block_tx_count.observe(tx_count as f64); + self.metrics.block_size_bytes.observe(block_size as f64); + self.metrics.finalized_txes.inc_by(tx_count as u64); + + // Send Update to mempool to remove all the tx-es included in the block. + let mut tx_hashes = vec![]; + for part in all_parts { + if let Content::TxBatch(transaction_batch) = part.content.as_ref() { + tx_hashes.extend(transaction_batch.transactions().iter().map(|tx| { + let mut hash = DefaultHasher::new(); + tx.0.hash(&mut hash); + hash.finish() + })); + } + } + let _ = self.tx_streamer.cast(MempoolMsg::Update { tx_hashes }); + + // Prune the PartStore of all parts for heights lower than `height - 1` + self.part_store.prune(height.decrement().unwrap_or(height)); + } +} diff --git a/code/actors/tests/actor_gossip_n3f0.rs b/code/crates/test-app/tests/actor_gossip_n3f0.rs similarity index 100% rename from code/actors/tests/actor_gossip_n3f0.rs rename to code/crates/test-app/tests/actor_gossip_n3f0.rs diff --git a/code/crates/test-app/tests/actor_gossip_n3f1.rs b/code/crates/test-app/tests/actor_gossip_n3f1.rs new file mode 100644 index 000000000..7d6d60689 --- /dev/null +++ b/code/crates/test-app/tests/actor_gossip_n3f1.rs @@ -0,0 +1,61 @@ +#![allow(unused_crate_dependencies)] + +#[path = "util.rs"] +mod util; +use util::*; + +#[tokio::test] +pub async fn proposer_fails_to_start() { + let nodes = Test::new( + [ + TestNode::faulty(10, vec![Fault::NoStart]), + TestNode::correct(10), + TestNode::correct(10), + ], + 0, + ); + + run_test(nodes).await +} + +#[tokio::test] +pub async fn one_node_fails_to_start() { + let nodes = Test::new( + [ + TestNode::correct(10), + TestNode::faulty(10, vec![Fault::NoStart]), + TestNode::correct(10), + ], + 0, + ); + + run_test(nodes).await +} + +#[tokio::test] +pub async fn proposer_crashes_at_height_1() { + let nodes = Test::new( + [ + TestNode::faulty(10, vec![Fault::Crash(1)]), + TestNode::correct(10), + TestNode::correct(10), + ], + 2, + ); + + run_test(nodes).await +} + +#[tokio::test] +pub async fn one_node_crashes_at_height_2() { + let nodes = Test::new( + [ + TestNode::faulty(10, vec![Fault::Crash(2)]), + TestNode::correct(10), + TestNode::correct(10), + ], + 3, + ); + + run_test(nodes).await +} diff --git a/code/crates/test-app/tests/util.rs b/code/crates/test-app/tests/util.rs new file mode 100644 index 000000000..73febad08 --- /dev/null +++ b/code/crates/test-app/tests/util.rs @@ -0,0 +1,274 @@ +#![allow(dead_code)] + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + +use bytesize::ByteSize; +use rand::Rng; +use tokio::sync::mpsc; +use tokio::time::{sleep, Duration}; +use tracing::{error, info, Instrument}; + +use malachite_common::{Round, VotingPower}; +use malachite_node::config::{ + ConsensusConfig, MempoolConfig, MetricsConfig, P2pConfig, RuntimeConfig, TimeoutConfig, +}; +use malachite_test::utils::make_validators; +use malachite_test::{Height, PrivateKey, Validator, ValidatorSet}; +use malachite_test_app::spawn::spawn_node_actor; + +pub const HEIGHTS: u64 = 3; +pub const START_HEIGHT: Height = Height::new(1); +pub const END_HEIGHT: Height = Height::new(START_HEIGHT.as_u64() + HEIGHTS - 1); +pub const TEST_TIMEOUT: Duration = Duration::from_secs(20); + +pub struct Test { + pub nodes: [TestNode; N], + pub validator_set: ValidatorSet, + pub vals_and_keys: [(Validator, PrivateKey); N], + pub expected_decisions: usize, + pub consensus_base_port: usize, + pub mempool_base_port: usize, + pub metrics_base_port: usize, +} + +impl Test { + pub fn new(nodes: [TestNode; N], expected_decisions: usize) -> Self { + let vals_and_keys = make_validators(Self::voting_powers(&nodes)); + let validators = vals_and_keys.iter().map(|(v, _)| v).cloned(); + let validator_set = ValidatorSet::new(validators); + + Self { + nodes, + validator_set, + vals_and_keys, + expected_decisions, + consensus_base_port: rand::thread_rng().gen_range(21000..30000), + mempool_base_port: rand::thread_rng().gen_range(31000..40000), + metrics_base_port: rand::thread_rng().gen_range(41000..50000), + } + } + + pub fn voting_powers(nodes: &[TestNode; N]) -> [VotingPower; N] { + let mut voting_powers = [0; N]; + for (i, node) in nodes.iter().enumerate() { + voting_powers[i] = node.voting_power; + } + voting_powers + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Fault { + NoStart, + Crash(u64), +} + +#[derive(Clone)] +pub struct TestNode { + pub voting_power: VotingPower, + pub faults: Vec, +} + +impl TestNode { + pub fn correct(voting_power: VotingPower) -> Self { + Self { + voting_power, + faults: vec![], + } + } + + pub fn faulty(voting_power: VotingPower, faults: Vec) -> Self { + Self { + voting_power, + faults, + } + } + + pub fn start_node(&self) -> bool { + !self.faults.contains(&Fault::NoStart) + } + + pub fn crashes_at(&self, height: u64) -> bool { + self.faults.iter().any(|f| match f { + Fault::NoStart => false, + Fault::Crash(h) => *h == height, + }) + } +} + +fn init_logging() { + use tracing_subscriber::util::SubscriberInitExt; + use tracing_subscriber::{EnvFilter, FmtSubscriber}; + + let filter = EnvFilter::builder() + .parse("info,malachite=debug,ractor=error") + .unwrap(); + + pub fn enable_ansi() -> bool { + use std::io::IsTerminal; + std::io::stdout().is_terminal() && std::io::stderr().is_terminal() + } + + // Construct a tracing subscriber with the supplied filter and enable reloading. + let builder = FmtSubscriber::builder() + .with_target(false) + .with_env_filter(filter) + .with_writer(std::io::stdout) + .with_ansi(enable_ansi()) + .with_thread_ids(false); + + let subscriber = builder.finish(); + subscriber.init(); +} + +pub async fn run_test(test: Test) { + init_logging(); + + let mut handles = Vec::with_capacity(N); + + for i in 0..N { + if test.nodes[i].faults.contains(&Fault::NoStart) { + continue; + } + + let (v, sk) = &test.vals_and_keys[i]; + let (tx_decision, rx_decision) = mpsc::channel(HEIGHTS as usize); + + let node_config = make_node_config(&test, i); + + let node = tokio::spawn(spawn_node_actor( + node_config, + test.validator_set.clone(), + sk.clone(), + sk.clone(), + v.address, + tx_decision, + )); + + handles.push((node, rx_decision)); + } + + sleep(Duration::from_secs(5)).await; + + let mut nodes = Vec::with_capacity(handles.len()); + for (i, (handle, rx)) in handles.into_iter().enumerate() { + let (actor_ref, _) = handle.await.expect("Error: node failed to start"); + let test = test.nodes[i].clone(); + nodes.push((actor_ref, test, rx)); + } + + let mut actors = Vec::with_capacity(nodes.len()); + let mut rxs = Vec::with_capacity(nodes.len()); + + for (actor, _, rx) in nodes { + actors.push(actor); + rxs.push(rx); + } + + let correct_decisions = Arc::new(AtomicUsize::new(0)); + + for (i, mut rx_decision) in rxs.into_iter().enumerate() { + let correct_decisions = Arc::clone(&correct_decisions); + + let node_test = test.nodes[i].clone(); + let actor_ref = actors[i].clone(); + + tokio::spawn( + async move { + for height in START_HEIGHT.as_u64()..=END_HEIGHT.as_u64() { + if node_test.crashes_at(height) { + info!("Faulty node has crashed"); + actor_ref.kill(); + break; + } + + let decision = rx_decision.recv().await; + + // TODO - the value proposed comes from a set of mempool Tx-es which are currently different for each proposer + // Also heights can go to higher rounds. + // Therefore removing the round and value check for now + match decision { + Some((h, r, _)) if h == Height::new(height) && r == Round::new(0) => { + info!("{height}/{HEIGHTS} correct decision"); + correct_decisions.fetch_add(1, Ordering::Relaxed); + } + _ => { + error!("{height}/{HEIGHTS} no decision") + } + } + } + } + .instrument(tracing::error_span!("node", i)), + ); + } + + tokio::time::sleep(TEST_TIMEOUT).await; + + let correct_decisions = correct_decisions.load(Ordering::Relaxed); + + if correct_decisions != test.expected_decisions { + panic!( + "Not all nodes made correct decisions: got {}, expected {}", + correct_decisions, test.expected_decisions + ); + } + + for actor in actors { + let _ = actor.stop_and_wait(None, None).await; + } +} + +fn make_node_config(test: &Test, i: usize) -> malachite_node::config::Config { + malachite_node::config::Config { + moniker: format!("node-{i}"), + consensus: ConsensusConfig { + max_block_size: ByteSize::mib(1), + timeouts: TimeoutConfig::default(), + p2p: P2pConfig { + listen_addr: format!( + "/ip4/127.0.0.1/udp/{}/quic-v1", + test.consensus_base_port + i + ) + .parse() + .unwrap(), + persistent_peers: (0..N) + .filter(|j| i != *j) + .map(|j| { + format!( + "/ip4/127.0.0.1/udp/{}/quic-v1", + test.consensus_base_port + j + ) + .parse() + .unwrap() + }) + .collect(), + }, + }, + mempool: MempoolConfig { + p2p: P2pConfig { + listen_addr: format!("/ip4/127.0.0.1/udp/{}/quic-v1", test.mempool_base_port + i) + .parse() + .unwrap(), + persistent_peers: (0..N) + .filter(|j| i != *j) + .map(|j| { + format!("/ip4/127.0.0.1/udp/{}/quic-v1", test.mempool_base_port + j) + .parse() + .unwrap() + }) + .collect(), + }, + max_tx_count: 10000, + gossip_batch_size: 100, + }, + metrics: MetricsConfig { + enabled: false, + listen_addr: format!("127.0.0.1:{}", test.metrics_base_port + i) + .parse() + .unwrap(), + }, + runtime: RuntimeConfig::single_threaded(), + test: Default::default(), + } +} diff --git a/code/test/Cargo.toml b/code/crates/test/Cargo.toml similarity index 69% rename from code/test/Cargo.toml rename to code/crates/test/Cargo.toml index 7558bc3ff..cc00a48cb 100644 --- a/code/test/Cargo.toml +++ b/code/crates/test/Cargo.toml @@ -16,8 +16,17 @@ malachite-round.workspace = true malachite-vote.workspace = true malachite-proto.workspace = true +base64.workspace = true futures = { workspace = true, features = ["executor"] } ed25519-consensus.workspace = true -signature.workspace = true +hex.workspace = true +prost.workspace = true +prost-types.workspace = true rand.workspace = true +signature.workspace = true sha2.workspace = true +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true + +[build-dependencies] +prost-build.workspace = true diff --git a/code/crates/test/build.rs b/code/crates/test/build.rs new file mode 100644 index 000000000..951dcddaa --- /dev/null +++ b/code/crates/test/build.rs @@ -0,0 +1,10 @@ +use std::io::Result; + +fn main() -> Result<()> { + let mut config = prost_build::Config::new(); + config.enable_type_names(); + config.extern_path(".malachite", "::malachite_proto"); + config.compile_protos(&["proto/test.proto"], &["proto", "../proto/proto"])?; + + Ok(()) +} diff --git a/code/crates/test/configs/0/config/config.toml b/code/crates/test/configs/0/config/config.toml new file mode 100644 index 000000000..5a7c3075e --- /dev/null +++ b/code/crates/test/configs/0/config/config.toml @@ -0,0 +1,24 @@ +moniker = "test-0" + +[consensus] +timeout_propose = "3s" +timeout_propose_delta = "500ms" +timeout_prevote = "1s" +timeout_prevote_delta = "500ms" +timeout_precommit = "1s" +timeout_precommit_delta = "500ms" +timeout_commit = "1s" + +[consensus.p2p] +listen_addr = "/ip4/127.0.0.1/udp/27000/quic-v1" +persistent_peers = [ + "/ip4/127.0.0.1/udp/27001/quic-v1/p2p/12D3KooWBTia5TdzdZ7mDiGu1dSPJFHYMguV9QQCUcm1RBzSqEvL", + "/ip4/127.0.0.1/udp/27002/quic-v1/p2p/12D3KooWDAFhzcuGpsGMpyrf61sH4yRmWSvTwVxian2TKE6ixtwQ", +] + +[mempool.p2p] +listen_addr = "/ip4/127.0.0.1/udp/28000/quic-v1" +persistent_peers = [ + "/ip4/127.0.0.1/udp/28001/quic-v1", + "/ip4/127.0.0.1/udp/28002/quic-v1", +] diff --git a/code/crates/test/configs/0/config/genesis.json b/code/crates/test/configs/0/config/genesis.json new file mode 100644 index 000000000..2b7d48b57 --- /dev/null +++ b/code/crates/test/configs/0/config/genesis.json @@ -0,0 +1,28 @@ +{ + "validators": [ + { + "address": "E20F7190262A62673584C6DAA70E3A28957721CF", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "aB6qo05JHmyDNavJ6pKwJO9S65FELKO4RZjHmnnzG3U=" + }, + "voting_power": 11 + }, + { + "address": "B036785B4003F97ACB848D6F9466DC3D29A887AF", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "GG0+7aAurV+7rXRO7RWNlYxPf0hWGj5myx7paFWtXBk=" + }, + "voting_power": 10 + }, + { + "address": "73E1F1EA132D63C845C50CE21976BF1268571177", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "MatfCgXiSKSTAzBHyFgfAHm0VY9enHGvYWrnbYDL2wc=" + }, + "voting_power": 10 + } + ] +} \ No newline at end of file diff --git a/code/crates/test/configs/0/config/priv_validator_key.json b/code/crates/test/configs/0/config/priv_validator_key.json new file mode 100644 index 000000000..33e7a7a70 --- /dev/null +++ b/code/crates/test/configs/0/config/priv_validator_key.json @@ -0,0 +1,34 @@ +[ + 88, + 80, + 29, + 195, + 14, + 153, + 139, + 120, + 116, + 208, + 63, + 84, + 65, + 197, + 224, + 149, + 42, + 142, + 156, + 253, + 137, + 109, + 95, + 104, + 171, + 196, + 100, + 142, + 70, + 151, + 199, + 1 +] \ No newline at end of file diff --git a/code/crates/test/configs/1/config/config.toml b/code/crates/test/configs/1/config/config.toml new file mode 100644 index 000000000..e1f58cd01 --- /dev/null +++ b/code/crates/test/configs/1/config/config.toml @@ -0,0 +1,24 @@ +moniker = "test-1" + +[consensus] +timeout_propose = "3s" +timeout_propose_delta = "500ms" +timeout_prevote = "1s" +timeout_prevote_delta = "500ms" +timeout_precommit = "1s" +timeout_precommit_delta = "500ms" +timeout_commit = "1s" + +[consensus.p2p] +listen_addr = "/ip4/127.0.0.1/udp/27001/quic-v1" +persistent_peers = [ + "/ip4/127.0.0.1/udp/27000/quic-v1/p2p/12D3KooWGpoh2tcZ8WdWdYCmDKjnaCGTiJefKFsA5VkyY6t8uJXN", + "/ip4/127.0.0.1/udp/27002/quic-v1/p2p/12D3KooWDAFhzcuGpsGMpyrf61sH4yRmWSvTwVxian2TKE6ixtwQ", +] + +[mempool.p2p] +listen_addr = "/ip4/127.0.0.1/udp/28001/quic-v1" +persistent_peers = [ + "/ip4/127.0.0.1/udp/28000/quic-v1", + "/ip4/127.0.0.1/udp/28002/quic-v1", +] diff --git a/code/crates/test/configs/1/config/genesis.json b/code/crates/test/configs/1/config/genesis.json new file mode 100644 index 000000000..2b7d48b57 --- /dev/null +++ b/code/crates/test/configs/1/config/genesis.json @@ -0,0 +1,28 @@ +{ + "validators": [ + { + "address": "E20F7190262A62673584C6DAA70E3A28957721CF", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "aB6qo05JHmyDNavJ6pKwJO9S65FELKO4RZjHmnnzG3U=" + }, + "voting_power": 11 + }, + { + "address": "B036785B4003F97ACB848D6F9466DC3D29A887AF", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "GG0+7aAurV+7rXRO7RWNlYxPf0hWGj5myx7paFWtXBk=" + }, + "voting_power": 10 + }, + { + "address": "73E1F1EA132D63C845C50CE21976BF1268571177", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "MatfCgXiSKSTAzBHyFgfAHm0VY9enHGvYWrnbYDL2wc=" + }, + "voting_power": 10 + } + ] +} \ No newline at end of file diff --git a/code/crates/test/configs/1/config/priv_validator_key.json b/code/crates/test/configs/1/config/priv_validator_key.json new file mode 100644 index 000000000..d283cf600 --- /dev/null +++ b/code/crates/test/configs/1/config/priv_validator_key.json @@ -0,0 +1,34 @@ +[ + 117, + 249, + 4, + 192, + 208, + 33, + 236, + 33, + 247, + 17, + 230, + 74, + 221, + 16, + 43, + 138, + 146, + 11, + 125, + 192, + 230, + 68, + 124, + 9, + 152, + 177, + 129, + 199, + 73, + 109, + 50, + 15 +] \ No newline at end of file diff --git a/code/crates/test/configs/2/config/config.toml b/code/crates/test/configs/2/config/config.toml new file mode 100644 index 000000000..231d42822 --- /dev/null +++ b/code/crates/test/configs/2/config/config.toml @@ -0,0 +1,24 @@ +moniker = "test-2" + +[consensus] +timeout_propose = "3s" +timeout_propose_delta = "500ms" +timeout_prevote = "1s" +timeout_prevote_delta = "500ms" +timeout_precommit = "1s" +timeout_precommit_delta = "500ms" +timeout_commit = "1s" + +[consensus.p2p] +listen_addr = "/ip4/127.0.0.1/udp/27002/quic-v1" +persistent_peers = [ + "/ip4/127.0.0.1/udp/27000/quic-v1/p2p/12D3KooWGpoh2tcZ8WdWdYCmDKjnaCGTiJefKFsA5VkyY6t8uJXN", + "/ip4/127.0.0.1/udp/27001/quic-v1/p2p/12D3KooWBTia5TdzdZ7mDiGu1dSPJFHYMguV9QQCUcm1RBzSqEvL", +] + +[mempool.p2p] +listen_addr = "/ip4/127.0.0.1/udp/28002/quic-v1" +persistent_peers = [ + "/ip4/127.0.0.1/udp/28000/quic-v1", + "/ip4/127.0.0.1/udp/28001/quic-v1", +] diff --git a/code/crates/test/configs/2/config/genesis.json b/code/crates/test/configs/2/config/genesis.json new file mode 100644 index 000000000..2b7d48b57 --- /dev/null +++ b/code/crates/test/configs/2/config/genesis.json @@ -0,0 +1,28 @@ +{ + "validators": [ + { + "address": "E20F7190262A62673584C6DAA70E3A28957721CF", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "aB6qo05JHmyDNavJ6pKwJO9S65FELKO4RZjHmnnzG3U=" + }, + "voting_power": 11 + }, + { + "address": "B036785B4003F97ACB848D6F9466DC3D29A887AF", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "GG0+7aAurV+7rXRO7RWNlYxPf0hWGj5myx7paFWtXBk=" + }, + "voting_power": 10 + }, + { + "address": "73E1F1EA132D63C845C50CE21976BF1268571177", + "public_key": { + "type": "tendermint/PubKeyEd25519", + "value": "MatfCgXiSKSTAzBHyFgfAHm0VY9enHGvYWrnbYDL2wc=" + }, + "voting_power": 10 + } + ] +} \ No newline at end of file diff --git a/code/crates/test/configs/2/config/priv_validator_key.json b/code/crates/test/configs/2/config/priv_validator_key.json new file mode 100644 index 000000000..49b52b545 --- /dev/null +++ b/code/crates/test/configs/2/config/priv_validator_key.json @@ -0,0 +1,34 @@ +[ + 110, + 134, + 76, + 73, + 1, + 35, + 167, + 179, + 12, + 232, + 36, + 110, + 200, + 147, + 195, + 38, + 190, + 22, + 11, + 216, + 197, + 62, + 41, + 227, + 97, + 79, + 53, + 222, + 86, + 91, + 63, + 236 +] \ No newline at end of file diff --git a/code/crates/test/configs/README.md b/code/crates/test/configs/README.md new file mode 100644 index 000000000..69f17629c --- /dev/null +++ b/code/crates/test/configs/README.md @@ -0,0 +1,5 @@ +This folder was generated by the testnet command. To recreate it, run: +``` +malachite-cli --home . testnet --nodes 3 --deterministic +``` + diff --git a/code/crates/test/proto/test.proto b/code/crates/test/proto/test.proto new file mode 100644 index 000000000..022ea3cdd --- /dev/null +++ b/code/crates/test/proto/test.proto @@ -0,0 +1,18 @@ +syntax = "proto3"; + +package test; + +import "malachite.proto"; + +message BlockMetadata { + bytes proof = 1; + malachite.Value value = 2; +} + +message Content { + oneof value { + malachite.TransactionBatch tx_batch = 90; + BlockMetadata metadata = 91; + } +} + diff --git a/code/test/src/address.rs b/code/crates/test/src/address.rs similarity index 82% rename from code/test/src/address.rs rename to code/crates/test/src/address.rs index 4103003c6..4c6a53720 100644 --- a/code/test/src/address.rs +++ b/code/crates/test/src/address.rs @@ -1,11 +1,19 @@ use core::fmt; +use serde::{Deserialize, Serialize}; use malachite_proto as proto; use crate::signing::PublicKey; -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct Address([u8; Self::LENGTH]); +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Address( + #[serde( + serialize_with = "hex::serde::serialize_upper", + deserialize_with = "hex::serde::deserialize" + )] + [u8; Self::LENGTH], +); impl Address { const LENGTH: usize = 20; @@ -28,7 +36,7 @@ impl fmt::Display for Address { #[cfg_attr(coverage_nightly, coverage(off))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for byte in self.0.iter() { - write!(f, "{:02x}", byte)?; + write!(f, "{:02X}", byte)?; } Ok(()) } diff --git a/code/crates/test/src/block_part.rs b/code/crates/test/src/block_part.rs new file mode 100644 index 000000000..4c8650a72 --- /dev/null +++ b/code/crates/test/src/block_part.rs @@ -0,0 +1,220 @@ +use std::sync::Arc; + +use signature::Signer; + +use malachite_common::{Round, SignedBlockPart, TransactionBatch}; +use malachite_proto::{self as proto}; + +use crate::{Address, Height, PrivateKey, TestContext, Value}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct BlockMetadata { + proof: Vec, + value: Value, +} + +impl BlockMetadata { + pub fn new(proof: Vec, value: Value) -> Self { + Self { proof, value } + } + + pub fn value(&self) -> Value { + self.value + } + + pub fn to_bytes(&self) -> Vec { + proto::Protobuf::to_bytes(self).unwrap() + } + + pub fn size_bytes(&self) -> usize { + self.proof.len() + self.value.size_bytes() + } +} + +impl proto::Protobuf for BlockMetadata { + type Proto = crate::proto::BlockMetadata; + + fn from_proto(proto: Self::Proto) -> Result { + Ok(Self { + proof: proto.proof, + value: Value::from_proto( + proto + .value + .ok_or_else(|| proto::Error::missing_field::("height"))?, + )?, + }) + } + + fn to_proto(&self) -> Result { + Ok(crate::proto::BlockMetadata { + proof: self.proof.clone(), + value: Option::from(self.value.to_proto().unwrap()), + }) + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Content { + TxBatch(TransactionBatch), + Metadata(BlockMetadata), +} + +impl Content { + pub fn size_bytes(&self) -> usize { + match self { + Content::TxBatch(batch) => batch.size_bytes(), + Content::Metadata(meta) => meta.size_bytes(), + } + } + + pub fn tx_count(&self) -> Option { + match self { + Content::TxBatch(batch) => Some(batch.transactions().len()), + Content::Metadata(_) => None, + } + } +} + +impl proto::Protobuf for Content { + type Proto = crate::proto::Content; + + fn from_proto(proto: Self::Proto) -> Result { + let content = proto + .value + .ok_or_else(|| proto::Error::missing_field::("value"))?; + + match content { + crate::proto::content::Value::TxBatch(batch) => { + TransactionBatch::from_proto(batch).map(Content::TxBatch) + } + crate::proto::content::Value::Metadata(metadata) => { + BlockMetadata::from_proto(metadata).map(Content::Metadata) + } + } + } + + fn to_proto(&self) -> Result { + match self { + Content::TxBatch(batch) => Ok(crate::proto::Content { + value: Some(crate::proto::content::Value::TxBatch(batch.to_proto()?)), + }), + Content::Metadata(metadata) => Ok(crate::proto::Content { + value: Some(crate::proto::content::Value::Metadata(metadata.to_proto()?)), + }), + } + } +} + +/// A part of a value for a height, round. Identified in this scope by the sequence. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct BlockPart { + pub height: Height, + pub round: Round, + pub sequence: u64, + pub content: Arc, + pub validator_address: Address, +} + +impl BlockPart { + pub fn new( + height: Height, + round: Round, + sequence: u64, + validator_address: Address, + content: Content, + ) -> Self { + Self { + height, + round, + sequence, + content: Arc::new(content), + validator_address, + } + } + + pub fn to_bytes(&self) -> Vec { + proto::Protobuf::to_bytes(self).unwrap() + } + + pub fn signed(self, private_key: &PrivateKey) -> SignedBlockPart { + let signature = private_key.sign(&self.to_bytes()); + + SignedBlockPart { + block_part: self, + signature, + } + } + + pub fn metadata(&self) -> Option<&BlockMetadata> { + match self.content.as_ref() { + Content::Metadata(metadata) => Some(metadata), + Content::TxBatch(_) => None, + } + } + + pub fn tx_count(&self) -> Option { + self.content.tx_count() + } + + pub fn size_bytes(&self) -> usize { + self.content.size_bytes() + } +} + +impl malachite_common::BlockPart for BlockPart { + fn height(&self) -> Height { + self.height + } + + fn round(&self) -> Round { + self.round + } + + fn sequence(&self) -> u64 { + self.sequence + } + + fn validator_address(&self) -> &Address { + &self.validator_address + } +} + +impl proto::Protobuf for BlockPart { + type Proto = proto::BlockPart; + + fn from_proto(proto: Self::Proto) -> Result { + Ok(Self { + height: Height::from_proto( + proto + .height + .ok_or_else(|| proto::Error::missing_field::("height"))?, + )?, + round: Round::from_proto( + proto + .round + .ok_or_else(|| proto::Error::missing_field::("round"))?, + )?, + sequence: proto.sequence, + content: Arc::new(Content::from_any( + &proto + .content + .ok_or_else(|| proto::Error::missing_field::("content"))?, + )?), + validator_address: Address::from_proto( + proto.validator_address.ok_or_else(|| { + proto::Error::missing_field::("validator_address") + })?, + )?, + }) + } + + fn to_proto(&self) -> Result { + Ok(proto::BlockPart { + height: Some(self.height.to_proto()?), + round: Some(self.round.to_proto()?), + sequence: self.sequence, + content: Some(self.content.to_any()?), + validator_address: Some(self.validator_address.to_proto()?), + }) + } +} diff --git a/code/test/src/context.rs b/code/crates/test/src/context.rs similarity index 77% rename from code/test/src/context.rs rename to code/crates/test/src/context.rs index 28a57af72..07a085999 100644 --- a/code/test/src/context.rs +++ b/code/crates/test/src/context.rs @@ -1,10 +1,10 @@ use std::sync::Arc; -use malachite_common::Context; use malachite_common::NilOrVal; use malachite_common::Round; use malachite_common::SignedProposal; use malachite_common::SignedVote; +use malachite_common::{Context, SignedBlockPart}; use crate::address::*; use crate::height::*; @@ -13,6 +13,7 @@ use crate::signing::*; use crate::validator_set::*; use crate::value::*; use crate::vote::*; +use crate::BlockPart; #[derive(Clone, Debug)] pub struct TestContext { @@ -29,6 +30,7 @@ impl TestContext { impl Context for TestContext { type Address = Address; + type BlockPart = BlockPart; type Height = Height; type Proposal = Proposal; type ValidatorSet = ValidatorSet; @@ -97,4 +99,24 @@ impl Context for TestContext { ) -> Vote { Vote::new_precommit(height, round, value_id, address) } + + fn sign_block_part(&self, block_part: Self::BlockPart) -> SignedBlockPart { + use signature::Signer; + let signature = self.private_key.sign(&block_part.to_bytes()); + SignedBlockPart::new(block_part, signature) + } + + fn verify_signed_block_part( + &self, + signed_block_part: &SignedBlockPart, + public_key: &malachite_common::PublicKey, + ) -> bool { + use signature::Verifier; + public_key + .verify( + &signed_block_part.block_part.to_bytes(), + &signed_block_part.signature, + ) + .is_ok() + } } diff --git a/code/test/src/height.rs b/code/crates/test/src/height.rs similarity index 80% rename from code/test/src/height.rs rename to code/crates/test/src/height.rs index 6ff5b507c..113a34357 100644 --- a/code/test/src/height.rs +++ b/code/crates/test/src/height.rs @@ -3,7 +3,7 @@ use core::fmt; use malachite_proto as proto; /// A blockchain height -#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Height(u64); impl Height { @@ -14,6 +14,14 @@ impl Height { pub const fn as_u64(&self) -> u64 { self.0 } + + pub fn increment(&self) -> Self { + Self(self.0 + 1) + } + + pub fn decrement(&self) -> Option { + self.0.checked_sub(1).map(Self) + } } impl fmt::Display for Height { diff --git a/code/test/src/lib.rs b/code/crates/test/src/lib.rs similarity index 77% rename from code/test/src/lib.rs rename to code/crates/test/src/lib.rs index 869cfe276..88a16421b 100644 --- a/code/test/src/lib.rs +++ b/code/crates/test/src/lib.rs @@ -1,19 +1,25 @@ #![forbid(unsafe_code)] #![deny(trivial_casts, trivial_numeric_casts)] +// For coverage on nightly +#![allow(unexpected_cfgs)] #![cfg_attr(coverage_nightly, feature(coverage_attribute))] mod address; +mod block_part; mod context; mod height; mod proposal; +mod serialization; mod signing; mod validator_set; mod value; mod vote; +pub mod proto; pub mod utils; pub use crate::address::*; +pub use crate::block_part::*; pub use crate::context::*; pub use crate::height::*; pub use crate::proposal::*; diff --git a/code/test/src/proposal.rs b/code/crates/test/src/proposal.rs similarity index 100% rename from code/test/src/proposal.rs rename to code/crates/test/src/proposal.rs diff --git a/code/crates/test/src/proto.rs b/code/crates/test/src/proto.rs new file mode 100644 index 000000000..e8de9fa99 --- /dev/null +++ b/code/crates/test/src/proto.rs @@ -0,0 +1 @@ +include!(concat!(env!("OUT_DIR"), "/test.rs")); diff --git a/code/crates/test/src/serialization.rs b/code/crates/test/src/serialization.rs new file mode 100644 index 000000000..6c7a7c982 --- /dev/null +++ b/code/crates/test/src/serialization.rs @@ -0,0 +1,89 @@ +/// Serde Ed25519 VerificationKey CometBFT serializer/deserializer. +pub mod verification_key { + use ed25519_consensus::VerificationKey; + use serde::{Deserialize, Serialize, Serializer}; + + #[derive(Serialize, Deserialize)] + struct PubKey { + #[serde(rename = "type")] + key_type: String, + #[serde(with = "crate::serialization::base64string")] + value: Vec, + } + + pub fn serialize(s: &VerificationKey, ser: S) -> Result + where + S: Serializer, + { + PubKey { + key_type: "tendermint/PubKeyEd25519".to_string(), + value: s.as_bytes().to_vec(), + } + .serialize(ser) + } + + pub fn deserialize<'de, D>(de: D) -> Result + where + D: serde::Deserializer<'de>, + { + let pk = PubKey::deserialize(de)?; + VerificationKey::try_from(pk.value.as_slice()).map_err(serde::de::Error::custom) + } +} + +/// Serde Ed25519 SigningKey CometBFT serializer/deserializer. +pub mod signing_key { + use ed25519_consensus::SigningKey; + use serde::{Deserialize, Serialize, Serializer}; + + #[derive(Serialize, Deserialize)] + struct PrivKey { + #[serde(rename = "type")] + key_type: String, + #[serde(with = "crate::serialization::base64string")] + value: Vec, + } + + pub fn serialize(s: &SigningKey, ser: S) -> Result + where + S: Serializer, + { + PrivKey { + key_type: "tendermint/PrivKeyEd25519".to_string(), + value: s.as_bytes().to_vec(), + } + .serialize(ser) + } + + pub fn deserialize<'de, D>(de: D) -> Result + where + D: serde::Deserializer<'de>, + { + let pk = PrivKey::deserialize(de)?; + SigningKey::try_from(pk.value.as_slice()).map_err(serde::de::Error::custom) + } +} + +/// Serialize/deserialize between base64-encoded String and Vec +pub mod base64string { + use base64::prelude::BASE64_STANDARD; + use base64::Engine; + use serde::{Deserialize, Serializer}; + + pub fn serialize(s: &Vec, ser: S) -> Result + where + S: Serializer, + { + ser.serialize_str(BASE64_STANDARD.encode(s).as_str()) + } + + pub fn deserialize<'de, D>(de: D) -> Result, D::Error> + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(de)?; + BASE64_STANDARD + .decode(s) + .map_err(|e| serde::de::Error::custom(e.to_string())) + } +} diff --git a/code/test/src/signing.rs b/code/crates/test/src/signing.rs similarity index 80% rename from code/test/src/signing.rs rename to code/crates/test/src/signing.rs index b13be62e7..605e881eb 100644 --- a/code/test/src/signing.rs +++ b/code/crates/test/src/signing.rs @@ -3,6 +3,7 @@ use rand::{CryptoRng, RngCore}; use signature::{Keypair, Signer, Verifier}; pub use ed25519_consensus::Signature; +use serde::{Deserialize, Serialize}; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Ed25519; @@ -33,8 +34,11 @@ impl SigningScheme for Ed25519 { } } -#[derive(Clone, Debug)] -pub struct PrivateKey(ed25519_consensus::SigningKey); +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(transparent)] +pub struct PrivateKey( + #[serde(with = "crate::serialization::signing_key")] ed25519_consensus::SigningKey, +); impl PrivateKey { #[cfg_attr(coverage_nightly, coverage(off))] @@ -58,6 +62,12 @@ impl PrivateKey { } } +impl From<[u8; 32]> for PrivateKey { + fn from(bytes: [u8; 32]) -> Self { + Self(ed25519_consensus::SigningKey::from(bytes)) + } +} + impl Signer for PrivateKey { fn try_sign(&self, msg: &[u8]) -> Result { Ok(self.0.sign(msg)) @@ -72,8 +82,11 @@ impl Keypair for PrivateKey { } } -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct PublicKey(ed25519_consensus::VerificationKey); +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(transparent)] +pub struct PublicKey( + #[serde(with = "crate::serialization::verification_key")] ed25519_consensus::VerificationKey, +); impl PublicKey { pub fn new(key: impl Into) -> Self { diff --git a/code/test/src/utils.rs b/code/crates/test/src/utils.rs similarity index 100% rename from code/test/src/utils.rs rename to code/crates/test/src/utils.rs diff --git a/code/test/src/validator_set.rs b/code/crates/test/src/validator_set.rs similarity index 94% rename from code/test/src/validator_set.rs rename to code/crates/test/src/validator_set.rs index 71966071a..e5c1e192a 100644 --- a/code/test/src/validator_set.rs +++ b/code/crates/test/src/validator_set.rs @@ -1,10 +1,11 @@ use malachite_common::VotingPower; +use serde::{Deserialize, Serialize}; use crate::signing::PublicKey; use crate::{Address, TestContext}; /// A validator is a public key and voting power -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct Validator { pub address: Address, pub public_key: PublicKey, @@ -37,7 +38,7 @@ impl malachite_common::Validator for Validator { } /// A validator set contains a list of validators sorted by address. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct ValidatorSet { pub validators: Vec, } @@ -108,6 +109,9 @@ impl ValidatorSet { vals.dedup(); } + pub fn get_keys(&self) -> Vec { + self.validators.iter().map(|v| v.public_key).collect() + } } impl malachite_common::ValidatorSet for ValidatorSet { diff --git a/code/test/src/value.rs b/code/crates/test/src/value.rs similarity index 86% rename from code/test/src/value.rs rename to code/crates/test/src/value.rs index fac05ef74..a7a0e1b6c 100644 --- a/code/test/src/value.rs +++ b/code/crates/test/src/value.rs @@ -1,4 +1,6 @@ +use malachite_common::Transaction; use malachite_proto::{self as proto}; +use std::hash::{DefaultHasher, Hash, Hasher}; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Copy)] pub struct ValueId(u64); @@ -49,6 +51,12 @@ impl Value { Self(value) } + pub fn new_from_transactions(txes: &[Transaction]) -> Self { + let mut hash = DefaultHasher::new(); + txes.hash(&mut hash); + Value::new(hash.finish()) + } + pub const fn as_u64(&self) -> u64 { self.0 } @@ -56,6 +64,10 @@ impl Value { pub const fn id(&self) -> ValueId { ValueId(self.0) } + + pub fn size_bytes(&self) -> usize { + 8 + } } impl malachite_common::Value for Value { diff --git a/code/test/src/vote.rs b/code/crates/test/src/vote.rs similarity index 100% rename from code/test/src/vote.rs rename to code/crates/test/src/vote.rs diff --git a/code/test/tests/driver.rs b/code/crates/test/tests/driver.rs similarity index 97% rename from code/test/tests/driver.rs rename to code/crates/test/tests/driver.rs index 09377daf9..f782d886d 100644 --- a/code/test/tests/driver.rs +++ b/code/crates/test/tests/driver.rs @@ -689,7 +689,12 @@ fn driver_steps_not_proposer_other_height() { }, ]; - run_steps(&mut driver, steps, sel.as_ref(), &vs); + let expected_error = Error::InvalidProposalHeight { + proposal_height: Height::new(2), + consensus_height: Height::new(1), + }; + + run_steps_failing(&mut driver, steps, expected_error, sel.as_ref(), &vs); } #[test] @@ -1286,3 +1291,38 @@ fn run_steps( .and_then(|input| output_to_input(input, sel, vs)); } } + +fn run_steps_failing( + driver: &mut Driver, + steps: Vec, + expected_error: Error, + sel: &dyn ProposerSelector, + vs: &ValidatorSet, +) { + let mut input_from_prev_output = None; + + for step in steps { + println!("Step: {}", step.desc); + + let input = step + .input + .unwrap_or_else(|| input_from_prev_output.unwrap()); + + match driver.process(input) { + Ok(mut outputs) => { + assert_eq!(outputs, step.expected_outputs, "expected outputs"); + assert_eq!(driver.round(), step.expected_round, "expected round"); + assert_eq!(driver.round_state, step.new_state, "new state"); + + input_from_prev_output = outputs + .pop() + .and_then(|input| output_to_input(input, sel, vs)); + } + + Err(error) => { + assert_eq!(error, expected_error, "expected error"); + return; + } + } + } +} diff --git a/code/test/tests/driver_extra.rs b/code/crates/test/tests/driver_extra.rs similarity index 99% rename from code/test/tests/driver_extra.rs rename to code/crates/test/tests/driver_extra.rs index 38c633ffd..92ed97787 100644 --- a/code/test/tests/driver_extra.rs +++ b/code/crates/test/tests/driver_extra.rs @@ -28,7 +28,7 @@ use malachite_test::utils::*; // `driver_steps_polka_previous_with_locked() // // - L44 with previously received polkaNil and entering prevote (due to timeoutPropose) -// `driver_steps_polka_nil_and_timout_propose()` +// `driver_steps_polka_nil_and_timeout_propose()` // // - L36 with previoustly received polkaValue and proposal, and entering prevote (due to received proposal) // `driver_steps_polka_value_then_proposal()` @@ -846,7 +846,7 @@ fn driver_steps_polka_previous_with_no_locked() { // L57 - v3 receives timeout propose, prevotes for nil (step prevote) // L44 - polkaNil is replayed and v3 precommits for nil (step precommit) #[test] -fn driver_steps_polka_nil_and_timout_propose() { +fn driver_steps_polka_nil_and_timeout_propose() { let [(v1, _sk1), (v2, _sk2), (v3, sk3)] = make_validators([2, 3, 2]); let (my_sk, my_addr) = (sk3.clone(), v3.address); diff --git a/code/test/tests/round.rs b/code/crates/test/tests/round.rs similarity index 100% rename from code/test/tests/round.rs rename to code/crates/test/tests/round.rs diff --git a/code/test/tests/round_votes.rs b/code/crates/test/tests/round_votes.rs similarity index 100% rename from code/test/tests/round_votes.rs rename to code/crates/test/tests/round_votes.rs diff --git a/code/test/tests/vote_keeper.rs b/code/crates/test/tests/vote_keeper.rs similarity index 100% rename from code/test/tests/vote_keeper.rs rename to code/crates/test/tests/vote_keeper.rs diff --git a/code/vote/Cargo.toml b/code/crates/vote/Cargo.toml similarity index 100% rename from code/vote/Cargo.toml rename to code/crates/vote/Cargo.toml diff --git a/code/vote/src/count.rs b/code/crates/vote/src/count.rs similarity index 100% rename from code/vote/src/count.rs rename to code/crates/vote/src/count.rs diff --git a/code/vote/src/keeper.rs b/code/crates/vote/src/keeper.rs similarity index 100% rename from code/vote/src/keeper.rs rename to code/crates/vote/src/keeper.rs diff --git a/code/vote/src/lib.rs b/code/crates/vote/src/lib.rs similarity index 71% rename from code/vote/src/lib.rs rename to code/crates/vote/src/lib.rs index 91c1e89fa..662aedeb2 100644 --- a/code/vote/src/lib.rs +++ b/code/crates/vote/src/lib.rs @@ -1,4 +1,4 @@ -//! Infrastructre for tallying votes within the consensus engine. +//! Infrastructure for tallying votes within the consensus engine. #![no_std] #![forbid(unsafe_code)] @@ -9,6 +9,8 @@ rustdoc::private_intra_doc_links, variant_size_differences )] +// For coverage on nightly +#![allow(unexpected_cfgs)] #![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::panic))] #![cfg_attr(coverage_nightly, feature(coverage_attribute))] @@ -90,8 +92,34 @@ impl ThresholdParam { } /// Check whether the threshold is met. - pub const fn is_met(&self, weight: Weight, total: Weight) -> bool { - // FIXME: Deal with overflows - weight * self.denominator > total * self.numerator + pub fn is_met(&self, weight: Weight, total: Weight) -> bool { + let lhs = weight + .checked_mul(self.denominator) + .expect("attempt to multiply with overflow"); + + let rhs = total + .checked_mul(self.numerator) + .expect("attempt to multiply with overflow"); + + lhs > rhs + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn threshold_param_is_met() { + assert!(ThresholdParam::TWO_F_PLUS_ONE.is_met(7, 10)); + assert!(!ThresholdParam::TWO_F_PLUS_ONE.is_met(6, 10)); + assert!(ThresholdParam::F_PLUS_ONE.is_met(4, 10)); + assert!(!ThresholdParam::F_PLUS_ONE.is_met(3, 10)); + } + + #[test] + #[should_panic(expected = "attempt to multiply with overflow")] + fn threshold_param_is_met_overflow() { + assert!(!ThresholdParam::TWO_F_PLUS_ONE.is_met(1, Weight::MAX)); } } diff --git a/code/vote/src/round_votes.rs b/code/crates/vote/src/round_votes.rs similarity index 100% rename from code/vote/src/round_votes.rs rename to code/crates/vote/src/round_votes.rs diff --git a/code/vote/src/round_weights.rs b/code/crates/vote/src/round_weights.rs similarity index 100% rename from code/vote/src/round_weights.rs rename to code/crates/vote/src/round_weights.rs diff --git a/code/vote/src/value_weights.rs b/code/crates/vote/src/value_weights.rs similarity index 70% rename from code/vote/src/value_weights.rs rename to code/crates/vote/src/value_weights.rs index b7f89a63e..2f3ab2233 100644 --- a/code/vote/src/value_weights.rs +++ b/code/crates/vote/src/value_weights.rs @@ -24,7 +24,9 @@ impl ValuesWeights { Value: Ord, { let entry = self.value_weights.entry(value).or_insert(0); - *entry += weight; // FIXME: Deal with overflows + *entry = entry + .checked_add(weight) + .expect("attempt to add with overflow"); *entry } @@ -38,7 +40,13 @@ impl ValuesWeights { /// Return the sum of the weights of all values. pub fn sum(&self) -> Weight { - self.value_weights.values().sum() // FIXME: Deal with overflows + let mut weight: Weight = 0; + for w in self.value_weights.values() { + weight = weight + .checked_add(*w) + .expect("attempt to sum with overflow"); + } + weight } } @@ -79,7 +87,22 @@ mod tests { assert_eq!(vw.get(&None), 2); assert_eq!(vw.get(&Some(1)), 2); assert_eq!(vw.get(&Some(2)), 1); + } + + #[test] + #[should_panic(expected = "attempt to add with overflow")] + fn values_weight_add_overflow() { + let mut vw: ValuesWeights> = ValuesWeights::new(); + vw.add(None, Weight::MAX); + vw.add(None, 1); + } - // FIXME: Test for and deal with overflows + #[test] + #[should_panic(expected = "attempt to sum with overflow")] + fn values_weight_sum_overflow() { + let mut vw: ValuesWeights> = ValuesWeights::new(); + vw.add(None, Weight::MAX); + vw.add(Some(1), 1); + vw.sum(); } } diff --git a/code/driver/src/error.rs b/code/driver/src/error.rs deleted file mode 100644 index 47e15aa2b..000000000 --- a/code/driver/src/error.rs +++ /dev/null @@ -1,37 +0,0 @@ -use core::fmt; - -use derive_where::derive_where; - -use malachite_common::{Context, Round}; - -/// The type of errors that can be yielded by the `Driver`. -#[derive_where(Clone, Debug, PartialEq, Eq)] -pub enum Error -where - Ctx: Context, -{ - /// No proposer was set for this round - NoProposer(Ctx::Height, Round), - - /// Proposer not found - ProposerNotFound(Ctx::Address), - - /// Validator not found in validator set - ValidatorNotFound(Ctx::Address), -} - -impl fmt::Display for Error -where - Ctx: Context, -{ - #[cfg_attr(coverage_nightly, coverage(off))] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Error::NoProposer(height, round) => { - write!(f, "No proposer set for height {height} at round {round}") - } - Error::ProposerNotFound(addr) => write!(f, "Proposer not found: {addr}"), - Error::ValidatorNotFound(addr) => write!(f, "Validator not found: {addr}"), - } - } -} diff --git a/code/network/Cargo.toml b/code/network/Cargo.toml deleted file mode 100644 index e0853a873..000000000 --- a/code/network/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "malachite-network" -version.workspace = true -edition.workspace = true -repository.workspace = true -license.workspace = true -publish.workspace = true - -[lints] -workspace = true - -[dependencies] -malachite-proto.workspace = true - -prost = { workspace = true } -prost-types = { workspace = true } -serde = { workspace = true, features = ["derive"] } diff --git a/code/network/src/lib.rs b/code/network/src/lib.rs deleted file mode 100644 index a14a9cc71..000000000 --- a/code/network/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -#![cfg_attr(coverage_nightly, feature(coverage_attribute))] - -mod msg; -mod peer_id; - -pub use msg::Msg; -pub use peer_id::PeerId; diff --git a/code/network/src/peer_id.rs b/code/network/src/peer_id.rs deleted file mode 100644 index efa14e8e2..000000000 --- a/code/network/src/peer_id.rs +++ /dev/null @@ -1,33 +0,0 @@ -use core::fmt; -use std::convert::Infallible; -use std::str::FromStr; - -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[serde(transparent)] -pub struct PeerId(String); - -impl PeerId { - pub fn new(id: impl ToString) -> Self { - Self(id.to_string()) - } - - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for PeerId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.0, f) - } -} - -impl FromStr for PeerId { - type Err = Infallible; - - fn from_str(s: &str) -> Result { - Ok(Self(s.to_string())) - } -} diff --git a/code/proto/src/impls.rs b/code/proto/src/impls.rs deleted file mode 100644 index b0a9061b5..000000000 --- a/code/proto/src/impls.rs +++ /dev/null @@ -1,87 +0,0 @@ -use malachite_common::{Context, Round, SignedProposal, SignedVote, SigningScheme, VoteType}; - -use crate::{self as proto, Error, Protobuf}; - -impl Protobuf for Round { - type Proto = proto::Round; - - fn from_proto(proto: Self::Proto) -> Result { - Ok(Round::new(proto.round)) - } - - fn to_proto(&self) -> Result { - Ok(proto::Round { - round: self.as_i64(), - }) - } -} - -impl Protobuf for SignedVote -where - Ctx::Vote: Protobuf, -{ - type Proto = proto::SignedVote; - - fn from_proto(proto: Self::Proto) -> Result { - let vote = proto - .vote - .ok_or_else(|| Error::missing_field::("vote"))?; - - Ok(Self { - vote: Ctx::Vote::from_proto(vote)?, - signature: Ctx::SigningScheme::decode_signature(&proto.signature) - .map_err(|e| Error::Other(format!("Failed to decode signature: {e}")))?, - }) - } - - fn to_proto(&self) -> Result { - Ok(proto::SignedVote { - vote: Some(self.vote.to_proto()?), - signature: Ctx::SigningScheme::encode_signature(&self.signature), - }) - } -} - -impl From for VoteType { - fn from(vote_type: proto::VoteType) -> Self { - match vote_type { - proto::VoteType::Prevote => VoteType::Prevote, - proto::VoteType::Precommit => VoteType::Precommit, - } - } -} - -impl From for proto::VoteType { - fn from(vote_type: VoteType) -> proto::VoteType { - match vote_type { - VoteType::Prevote => proto::VoteType::Prevote, - VoteType::Precommit => proto::VoteType::Precommit, - } - } -} - -impl Protobuf for SignedProposal -where - Ctx::Proposal: Protobuf, -{ - type Proto = proto::SignedProposal; - - fn from_proto(proto: Self::Proto) -> Result { - let proposal = proto - .proposal - .ok_or_else(|| Error::Other("Missing field `proposal`".to_string()))?; - - Ok(Self { - proposal: Ctx::Proposal::from_proto(proposal)?, - signature: Ctx::SigningScheme::decode_signature(&proto.signature) - .map_err(|e| Error::Other(format!("Failed to decode signature: {e}")))?, - }) - } - - fn to_proto(&self) -> Result { - Ok(proto::SignedProposal { - proposal: Some(self.proposal.to_proto()?), - signature: Ctx::SigningScheme::encode_signature(&self.signature), - }) - } -} diff --git a/code/scripts/spawn.bash b/code/scripts/spawn.bash new file mode 100755 index 000000000..2703be13a --- /dev/null +++ b/code/scripts/spawn.bash @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# This script takes: +# - a number of nodes to run as an argument, +# - the home directory for the nodes configuration folders + +function help { + echo "Usage: spawn.sh [--help] --nodes NODES_COUNT --home NODES_HOME" +} + +# Parse arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --help) help; exit 0 ;; + --nodes) NODES_COUNT="$2"; shift ;; + --home) NODES_HOME="$2"; shift ;; + *) echo "Unknown parameter passed: $1"; help; exit 1 ;; + esac + shift +done + +# Check required arguments +if [[ -z "$NODES_COUNT" ]]; then + help + exit 1 +fi + +if [[ -z "$NODES_HOME" ]]; then + help + exit 1 +fi + +# Environment variables +export MALACHITE__CONSENSUS__MAX_BLOCK_SIZE="1MiB" +export MALACHITE__CONSENSUS__TIMEOUT_PROPOSE="3s" +export MALACHITE__CONSENSUS__TIMEOUT_PREVOTE="1s" +export MALACHITE__CONSENSUS__TIMEOUT_PRECOMMIT="1s" +export MALACHITE__CONSENSUS__TIMEOUT_COMMIT="1s" +export MALACHITE__MEMPOOL__MAX_TX_COUNT="10000" +export MALACHITE__MEMPOOL__GOSSIP_BATCH_SIZE=100 +export MALACHITE__TEST__TX_SIZE="256B" +export MALACHITE__TEST__TXS_PER_PART=200 +export MALACHITE__TEST__TIME_ALLOWANCE_FACTOR=0.7 +export MALACHITE__TEST__EXEC_TIME_PER_TX="1ms" + +echo "Compiling Malachite..." +cargo build --release + +# Create nodes and logs directories, run nodes +for NODE in $(seq 0 $((NODES_COUNT - 1))); do + mkdir -p "$NODES_HOME/$NODE/logs" + rm -f "$NODES_HOME/$NODE/logs/*.log" + + echo "[Node $NODE] Spawning node..." + cargo run -q --release -- start --home "$NODES_HOME/$NODE" > "$NODES_HOME/$NODE/logs/node.log" 2>&1 & + echo $! > "$NODES_HOME/$NODE/node.pid" +done + +# Function to handle cleanup on interrupt +function exit_and_cleanup { + echo "Stopping all nodes..." + for NODE in $(seq 0 $((NODES_COUNT - 1))); do + NODE_PID=$(cat "$NODES_HOME/$NODE/node.pid") + echo "[Node $NODE] Stopping node (PID: $NODE_PID)..." + kill "$NODE_PID" + done + exit 0 +} + +# Trap the INT signal (Ctrl+C) to run the cleanup function +trap exit_and_cleanup INT + +echo "Spawned $NODES_COUNT nodes." +echo "Press Ctrl+C to stop the nodes." + +# Keep the script running +while true; do sleep 1; done diff --git a/code/scripts/spawn.fish b/code/scripts/spawn.fish new file mode 100755 index 000000000..631f8cfa7 --- /dev/null +++ b/code/scripts/spawn.fish @@ -0,0 +1,98 @@ +#!/usr/bin/env fish + +# This script takes: +# - a number of nodes to run as an argument, +# - the home directory for the nodes configuration folders + +function help + echo "Usage: spawn.fish [--help] --nodes NODES_COUNT --home NODES_HOME [--profile]" +end + +argparse -n spawn.fish help 'nodes=' 'home=' profile -- $argv +or return + +if set -ql _flag_help + help + return 0 +end + +if ! set -q _flag_nodes + help + return 1 +end + +if ! set -q _flag_home + help + return 1 +end + +if set -q _flag_profile + echo "Profiling enabled." + set profile true + set build_profile profiling +else + set profile false + set build_profile release +end + +set -x MALACHITE__CONSENSUS__MAX_BLOCK_SIZE "1 MiB" +set -x MALACHITE__TEST__TXS_PER_PART 50 +set -x MALACHITE__TEST__TIME_ALLOWANCE_FACTOR 0.7 +set -x MALACHITE__TEST__EXEC_TIME_PER_PART 10ms + +echo "Compiling Malachite..." +cargo build --profile $build_profile + +set session malachite +tmux kill-session -t $session +tmux new-session -s $session -n main -d + +set NODES_COUNT $_flag_nodes +set NODES_HOME $_flag_home + +for NODE in (seq 0 $(math $NODES_COUNT - 1)) + set NODE_HOME "$NODES_HOME/$NODE" + mkdir -p "$NODE_HOME/logs" + rm -f "$NODE_HOME/logs/*.log" + + if $profile + set cmd_prefix "samply record --save-only -o '$NODE_HOME/perf.json' --" + else + set cmd_prefix "" + end + + set pane $(tmux new-window -P -n "node-$NODE" /bin/zsh) + + echo "[Node $NODE] Spawning node..." + + tmux send -t "$pane" "$cmd_prefix ./target/$build_profile/malachite-cli start --home '$NODE_HOME' 2>&1 > '$NODE_HOME/logs/node.log' &" Enter + tmux send -t "$pane" "echo \$! > '$NODE_HOME/node.pid'" Enter + tmux send -t "$pane" "tail -f '$NODE_HOME/logs/node.log'" Enter +end + +echo "Spawned $NODES_COUNT nodes." +echo + +read -l -P "Launch tmux? [y/N] " launch_tmux +switch $launch_tmux + case Y y + tmux attach -t $session + case '*' + echo "To attach to the tmux session, run:" + echo " tmux attach -t $session" +end + +echo + +read -l -P "Press Enter to stop the nodes... " done + +echo "Stopping all nodes..." +for NODE in (seq 0 $(math $NODES_COUNT - 1)) + set NODE_PID (cat "$NODES_HOME/$NODE/node.pid") + echo "[Node $NODE] Stopping node (PID: $NODE_PID)..." + kill $NODE_PID +end +echo +read -l -P "Press Enter to kill the tmux session... " done + +tmux kill-session -t $session diff --git a/qa/.gitignore b/qa/.gitignore new file mode 100644 index 000000000..cd585698e --- /dev/null +++ b/qa/.gitignore @@ -0,0 +1,10 @@ +terraform/.terraform +terraform/.terraform.lock.hcl +terraform/.terraform.tfstate.lock.info +terraform/terraform.tfvars +terraform/terraform.tfstate* +terraform/hosts +terraform/commands.sh +viewer/data-grafana +viewer/data-prometheus +viewer/prometheus.tgz diff --git a/qa/README.md b/qa/README.md new file mode 100644 index 000000000..76b7d160f --- /dev/null +++ b/qa/README.md @@ -0,0 +1,253 @@ +# QA + +This is an opinionated QA environment with a human developer in mind. It focuses on logical blocks of a QA setup +using custom commands to simplify the language used to describe the process of running the nodes. + +## Prerequisites + +* [pssh](https://linux.die.net/man/1/pssh)(Mac) or [parallel-ssh](https://manpages.org/parallel-ssh)(Linux) on your + local machine. +* If you use parallel-ssh, create a symlink to `pssh` in your path. + +* Usually, `ln /usr/bin/parallel-ssh /usr/bin/pssh` will do the trick. + +## The command & control server + +A `cc` server is deployed along with the QA nodes. It helps manage the servers, and it is closer than a developer +machine. + +The developer can build the Docker image for testing locally and push it to the Docker Registry on the `cc` server, +using the `deploy_cc` custom command. The QA nodes can then pull the image from the registry and run it. + +The developer can create the testnet configuration remotely on the `cc` server using the `setup_config` custom command. +The configuration is stored in the `/data` folder on the server which is shared as over NFS with the QA nodes. + +The `cc` server also hosts a Prometheus server with Grafana for monitoring the nodes. The data can be downloaded using +the `get_prometheus_data` custom command. Then it can be imported to a local Grafana/Prometheus viewer for further +analysis. + +Finally, the `cc` server also works as the DNS server for the QA nodes. All node IPs can be resolved by simple names on +the servers. This is especially useful when configuring persistent peers. + +## Set up the hosts in Digital Ocean + +After creating your DO access (see the CometBFT QA infra +[steps](https://github.com/cometbft/qa-infra/blob/main/README.md#setup)), run + +```bash +cd terraform +terraform init +terraform apply -var small_nodes=0 # optional. This will create the cc server only. +terraform apply -var small_nodes=4 -var large_nodes=3 # the cc server will not be deleted if you scale the nodes. +``` + +By running terraform with zero nodes first, you create the `cc` server ahead of time. You can skip that step and create +the `cc` server with the QA nodes in one go. + +The above will create a 7-node Digital Ocean QA environment a `hosts` file and a `commands.sh` file with the custom +commands. + +Most of the node setup is done automatically in cloud-init. When terraform finishes, the servers are still installing +packages and setting up their environment. One of the first commands we will run will check if the servers have +finished building. + +## Post-terraform tasks + +There are a few custom commands to make managing the nodes easier. They are explained in the `commands.sh` file. + +Note: most of these commands require SSH authentication. If you use a Yubikey for SSH authentication, you can +saturate your machine's SSH connection with the default settings. Use a key file and `ssh-agent` or change +connection settings. + +### 0. TL;DR + +You start execution on your local machine and move over to the `cc` server when it is ready. You can also keep working +from your local machine if you feel the servers are close enough and the network is fast. + +```bash +source commands.sh # do this in all new terminal window on your machine. No need to do this on the CC server. + +ok_cc # make sure the CC server has finished initial setup. +deploy_cc # Takes 4-5 minutes. Continue in a different window while this is running. + # You can run it on cc server as well, but you have to manually put the source code at /root/malachite. + +ssh-cc # (optional) move to the CC server and run the rest of the commands closer to the QA nodes. +setup_config # depends on deploy_cc, only run it if that finished. + +ok_all # make sure all QA servers have finished initial setup +dnode-run all # run malachite on all QA servers + +# Wait some time to generate data + +dnode-stop all # stop all malachite nodes. It does not remove the docker container so the logs can be viewed. + +get_prometheus_data # this has to run on the machine where you want the data to end up. Usually, your local machine. +fetch_log all # fetch he logs of malachite-cli from each QA node + +dnode-rm all # remove the docker container "node" from the servers so the application can be re-run +``` + +### 1. Import custom commands + +```bash +source commands.sh +``` + +Make the custom commands available on your local machine. You do not need to run this on the CC server, as it gets +invoked automatically when you SSH into the server. + +### 2. Make sure CC works + +```bash +ok_cc +``` + +This loads the SSH key into your known_hosts and checks if the cloud-init execution has finished on the CC server. It +also sets up the DNS service with the created hosts and copies the `commands.sh` over for easy execution. + +It will print a date if the server successfully finished the setup. + +You have to run this every time you create or destroy new servers with Terraform. It copies the server IPs and the +correct custom commands to the CC server. + +### 4. Build your node and deploy it to the cc server. + +```bash +deploy_cc +``` + +Builds the application using Docker and deploys it into the CC server Docker Registry. + +This will take a few minutes. (4.5 minutes in Lausanne, connecting to a 4vCPU/8GB fra1 server in Digital Ocean.) + +You can continue executing the rest of the setup commands, until you want to configure the network with `setup_config`. +You will need the application for the correct generation of the application configuration. + +You can also run this command on the `cc` server (see the `ssh-cc` command below). Caveat: you need to copy the source +code over to the server + +### 4.5 (optional) Connect to the CC server + +```bash +ssh-cc +``` + +It is encouraged to run the rest of the commands from the CC server as it is closer to the QA servers and the commands +run faster. + +The custom commands are automatically available on the CC server. No need to `source commands.sh` there. + +You can keep running on your local machine, though, if that is more convenient. + +### 5. Make sure all servers finished cloud-init installations + +```bash +ok_all +``` + +Similar to `ok_cc` but all deployed servers are taken into account. Your `known_hosts` file will be updated with the +server keys and prints the date each server finished installing cloud-init. Run this multiple times until all servers +return successfully. + +### 6. Create the configuration data on the cc server + +```bash +setup_config +``` + +The configuration data is stored on the CC server under `/data`. This path is also shared with the QA nodes over NFS. + +Depends on an up-to-date host count. Re-run it after `ok_cc` if you changed the number of servers. + +### 7. Start the nodes + +```bash +dnode-run 0 2 3 +RUST_LOG=debug cnode-run 1 +``` + +You can also use the `all` keyword to start or stop all nodes at once. + +```bash +dnode-stop all +``` + +You can use `dnode`, `dnode-run`, `dnode-log` and `dnode-stop` to manage the docker container. +`dnode` is a generic command to run docker commands remotely. + +### 8. Get the data from Prometheus + +```bash +get_prometheus_data +``` + +This will copy the compressed prometheus database from the `cc` server to your local machine as `prometheus.tgz`. + +# Created files + +## hosts file + +Terraform creates a [hosts](terraform/hosts) file that can be added to any server (including your local dev machine) +for easier access to the servers. The file is +deployed onto the cc server and it is used as part of the DNS service there. + +## commands.sh file + +Terraform also creates a [commands.sh](terraform/commands.sh) file with suggested commands for CLI-based configuration +and node +management. You can run `source commands.sh` and use the functions in your shell. The descriptions of commands are +listed in the top comment of the file. The file is copied over to `cc` during `ok_cc` and invoked automatically +when you SSH into the server. + +## prometheus.tgz file + +This file gets exported using the `get_prometheus_data` command. Import it in the viewer for further analysis. + +# Viewer + +The viewer allows you to view the metrics of a testnet on your local machine. You can export the Prometheus metrics +from the cloud and keep them on your local machine even after the testnet is destroyed. You can do additional analysis +and create custom Grafana dashboards. + +## Prerequisites + +* docker on your machine +* a running `cc` server from where you download the data +* `make` on your machine. + +The commands that start with `make` will need to be run from the `viewer` directory or you can use `-C` to point make +to the directory. + +## 1. Download the data + +This command is part of the terraform-created `commands.sh` file. + +```bash +download_data +``` + +This will download compressed `prometheus.tgz` file from the `cc` server. + +## 2. Extract the data to its destination + +```bash +make extract_data +``` + +You can give a different path to the command if you stored the file elsewhere with the `FILE` environment variable. + +## 3. Start the viewer + +```bash +make viewer-start +``` + +You can view the Grafana dashboard at `http://localhost:3000`. The default username and password are `admin`/`admin`. + +## 4. Finish up + +When you are done with the data, you can stop the viewer. + +```bash +make viewer-stop +``` diff --git a/qa/docker/Dockerfile b/qa/docker/Dockerfile new file mode 100644 index 000000000..3506c801b --- /dev/null +++ b/qa/docker/Dockerfile @@ -0,0 +1,10 @@ +# Add the "code" build context to access local source code during build. +# docker build . --build-context code=$MALACHITE_DIR/code +FROM rust AS builder +RUN apt-get update && apt-get install -y protobuf-compiler +RUN --mount=type=bind,from=code,target=/mnt cargo install --path /mnt/crates/cli --target-dir /tmp + +FROM debian:bookworm-slim +RUN apt-get update && apt-get install -y iproute2 +COPY --from=builder /usr/local/cargo/bin/malachite-cli /usr/local/bin/malachite-cli +ENTRYPOINT ["malachite-cli"] diff --git a/qa/terraform/file-commands.tf b/qa/terraform/file-commands.tf new file mode 100644 index 000000000..cad9937f9 --- /dev/null +++ b/qa/terraform/file-commands.tf @@ -0,0 +1,35 @@ +resource "local_file" "commands" { + depends_on = [ + digitalocean_droplet.cc, + digitalocean_droplet.small, + digitalocean_droplet.large, + ] + content = templatefile("templates/commands.tmpl", { + path = abspath(path.root), + region = var.region, + ips = [ + for node in concat(digitalocean_droplet.small, digitalocean_droplet.large) : node.ipv4_address + ], + small = [ + for node in digitalocean_droplet.small : { + name = node.name, + ip = node.ipv4_address, + internal_ip = node.ipv4_address_private + } + ], + large = [ + for node in digitalocean_droplet.large : { + name = node.name, + ip = node.ipv4_address, + internal_ip = node.ipv4_address_private + } + ], + cc = { + name = digitalocean_droplet.cc.name + ip = digitalocean_droplet.cc.ipv4_address + internal_ip = digitalocean_droplet.cc.ipv4_address_private + } + }) + filename = "commands.sh" + file_permission = "0644" +} diff --git a/qa/terraform/file-hosts.tf b/qa/terraform/file-hosts.tf new file mode 100644 index 000000000..c034e7546 --- /dev/null +++ b/qa/terraform/file-hosts.tf @@ -0,0 +1,30 @@ +resource "local_file" "hosts" { + depends_on = [ + digitalocean_droplet.cc, + digitalocean_droplet.small, + digitalocean_droplet.large, + ] + content = templatefile("templates/hosts.tmpl", { + small = [ + for node in digitalocean_droplet.small : { + name = node.name, + ip = node.ipv4_address, + internal_ip = node.ipv4_address_private + } + ], + large = [ + for node in digitalocean_droplet.large : { + name = node.name, + ip = node.ipv4_address, + internal_ip = node.ipv4_address_private + } + ], + cc = { + name = digitalocean_droplet.cc.name + ip = digitalocean_droplet.cc.ipv4_address + internal_ip = digitalocean_droplet.cc.ipv4_address_private + } + }) + filename = "hosts" + file_permission = "0644" +} diff --git a/qa/terraform/nodes.tf b/qa/terraform/nodes.tf new file mode 100644 index 000000000..926e2e034 --- /dev/null +++ b/qa/terraform/nodes.tf @@ -0,0 +1,62 @@ +variable "ssh_keys" { + type = list(string) +} + +variable "instance_tags" { + type = list(string) + default = ["Malachite"] +} + +resource "digitalocean_droplet" "cc" { + name = "cc" + image = "debian-12-x64" + region = var.region + tags = concat(var.instance_tags, ["cc"]) + # Build takes about 4.5 minutes on a 4-core Digital Ocean server + size = "s-4vcpu-8gb" + # Build takes about 2.5 minutes on an 8-core Digital Ocean server + #size = "s-8vcpu-16gb" + ssh_keys = var.ssh_keys + user_data = templatefile("user-data/cc-data.txt", { + malachite_dashboard = filebase64("../viewer/config-grafana/provisioning/dashboards-data/main.json") + node_dashboard = filebase64("../viewer/config-grafana/provisioning/dashboards-data/node-exporter-full.json") + }) +} + +resource "digitalocean_droplet" "small" { + depends_on = [digitalocean_droplet.cc] + count = var.small_nodes + name = "small${count.index}" + image = "debian-12-x64" + region = var.region + tags = concat(var.instance_tags, ["small"]) + size = "s-4vcpu-8gb" + ssh_keys = var.ssh_keys + user_data = templatefile("user-data/user-data.txt", { + id = count.index + cc = { + name = digitalocean_droplet.cc.name + ip = digitalocean_droplet.cc.ipv4_address + internal_ip = digitalocean_droplet.cc.ipv4_address_private + } + }) +} + +resource "digitalocean_droplet" "large" { + depends_on = [digitalocean_droplet.cc] + count = var.large_nodes + name = "large${count.index}" + image = "debian-12-x64" + region = var.region + tags = concat(var.instance_tags, ["large"]) + size = "s-8vcpu-16gb" + ssh_keys = var.ssh_keys + user_data = templatefile("user-data/user-data.txt", { + id = var.small_nodes + count.index + cc = { + name = digitalocean_droplet.cc.name + ip = digitalocean_droplet.cc.ipv4_address + internal_ip = digitalocean_droplet.cc.ipv4_address_private + } + }) +} diff --git a/qa/terraform/project.tf b/qa/terraform/project.tf new file mode 100644 index 000000000..4b9006ec5 --- /dev/null +++ b/qa/terraform/project.tf @@ -0,0 +1,5 @@ +resource "digitalocean_project" "malachite-testnet" { + name = "malachite-testnet" + description = "A project to test the Malachite codebase." + resources = concat([for node in digitalocean_droplet.small: node.urn], [for node in digitalocean_droplet.large: node.urn], [digitalocean_droplet.cc.urn]) +} diff --git a/qa/terraform/provider.tf b/qa/terraform/provider.tf new file mode 100644 index 000000000..cf336e36c --- /dev/null +++ b/qa/terraform/provider.tf @@ -0,0 +1,14 @@ +terraform { + required_providers { + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + } +} + +variable "do_token" {} + +provider "digitalocean" { + token = var.do_token +} diff --git a/qa/terraform/templates/commands.tmpl b/qa/terraform/templates/commands.tmpl new file mode 100644 index 000000000..1d957ec7e --- /dev/null +++ b/qa/terraform/templates/commands.tmpl @@ -0,0 +1,263 @@ +# Environment variables for the servers +# CANDC - the IP address of the command and control server (CC is used by compilers) +# NODEi - the ip address of the node server "i" +# D_N - the number of node servers in total +# D_REGION - the Digital Ocean region where the servers are deployed +# PSSH_H - space-separated list of all the node server IP addresses for pssh input +# PSSH_P - the number of parallel processes to run with pssh +# SSH_OPTS - options for ssh run locally (forward agent, disable known_hosts) +# MALACHITE_DIR - the path to the malachite repository directory +# IS_CC - 1 means we are on the CC server, 0 we are not. (Used to determine the docker -H parameter when run locally.) +## +# Aliases for easy manual access to the servers (don't use these in scripts) +# ssh-cc - ssh into the cc server +# ssh-(nodei) - ssh into node server "i" +## +# Additional functionality in shell functions (see README for more info) +# xssh - parallel ssh command to all servers. Change PSSH_H and PSSH_P for different behavior. +# get_ip - get the IP address of a node server for programmatic use (example: get_ip 0) +# ok_cc - check if the CC server is ready to be used and update its services (DNS hosts, commands.sh, etc) +# ok_all - check if all servers are ready to be used +# deploy_cc - build the local source code into a docker image on the cc server and push it to the cc registry +# setup_config - create configuration on the cc server +# done-pull - pull the node image on all the node servers. Accepts list of IDs or "all". (example: dnode-pull 0 1 2) +# dnode-run - run the application on a node server. Accepts list of IDs or "all". (example: dnode-run 0 1 2) +# dnode-log - get the logs of the application from a node server (example: dnode-log 0 -f) +# dnode-stop - stop the application on a node server. Accepts list of IDs or "all". (example: dnode-stop 0 2) +# dnode-rm - remove node container from server. Accepts list of IDs or "all". (example: dnode-rm 0 1 2) +# cheat_sheet - get some help on the order of commands to run +# fetch_log - fetch the logs from all the node servers (example: fetch_log 0 1 2) +# get_prometheus_data - create a compressed prometheus data file (and download it from the cc server) +## + +export CANDC="${cc.ip}" +%{~ for i, n in concat(small, large) } +export NODE${i}="${n.ip}" +%{~ endfor } +export D_N="${length(small)+length(large)}" +export D_REGION="${region}" +export PSSH_H="${join(" ",ips)}" +export PSSH_P="30" +# Arrays require advanced shell, SSH_OPTS is not POSIX compatible +export SSH_OPTS=(-A -o LogLevel=ERROR -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o GlobalKnownHostsFile=/dev/null) +export MALACHITE_DIR="$(dirname $(dirname ${path}))" +export IS_CC=0 +export _CC_DOCKER_SHIM="-H ssh://root@$CANDC" +if [ $IS_CC -eq 1 ]; then + export _CC_DOCKER_SHIM="" +fi + +alias ssh-cc="ssh $SSH_OPTS root@${cc.ip}" +%{~ for i,n in concat(small, large) } +alias ssh-node${i}="ssh $SSH_OPTS root@${n.ip}" +%{~ endfor } + +xssh() { + pssh -l root -i -v -O LogLevel=ERROR -O StrictHostKeyChecking=no -O UserKnownHostsFile=/dev/null -O GlobalKnownHostsFile=/dev/null -p $PSSH_P -H "$PSSH_H" "$@" +} + +get_ip() { + I="$1" +%{~ for i, n in concat(small, large) } + test "$I" -eq "${i}" && echo "${n.ip}" && return +%{~ endfor } + echo "IP for node $I not found" && return 1 +} + +ok_cc() { + _keyscan_cc 2> /dev/null # needed for deploy_cc + PSSH_P=1 PSSH_H=$CANDC xssh "cat /etc/done" + sftp -C -q root@$${CANDC} < /dev/null; docker rm node" +} + +cheat_sheet() { +cat < /dev/null + ssh-keyscan -t ed25519 $CANDC >> $HOME/.ssh/known_hosts +} + +_keyscan_servers() { + _keyscan_cc 2> /dev/null +%{~ for n in concat(small, large) } + ssh-keygen -R ${n.ip} > /dev/null + ssh-keyscan -t ed25519 ${n.ip} >> $HOME/.ssh/known_hosts 2> /dev/null +%{~ endfor } +} + +_deploy_sync_code() { + rsync -avz --exclude target $MALACHITE_DIR/code root@$${CANDC}:/root/malachite/ +} + +_deploy_sync_qa() { + rsync -avz --exclude terraform --exclude viewer $MALACHITE_DIR/qa root@$${CANDC}:/root/malachite/ +} + +_deploy_build() { + if _is_cc; then + docker build --push -t cc.testnet/node --build-context code=$MALACHITE_DIR/code $MALACHITE_DIR/qa/docker + else + ssh-cc docker build --push -t cc.testnet/node --build-context code=$MALACHITE_DIR/code $MALACHITE_DIR/qa/docker + fi +} + +_compose_persistent_peers() { + port=$${1:-27000} + + persistent_peers="" + for i in $(seq 0 ${length(small)-1}) + do + persistent_peers="$persistent_peers,/dns/small$i/udp/$port/quic-v1" + done + for i in $(seq 0 ${length(large)-1}) + do + persistent_peers="$persistent_peers,/dns/large$i/udp/$port/quic-v1" + done + echo $${persistent_peers##,} +} + +_change_config() { + P="$@" + if [ "$P" = "all" ]; then + P="$(seq 0 $((D_N-1)))" + fi + for i in $P + do + file="/data/$i/config/config.toml" + sconfig "$file" \ + "moniker=test-$i" \ + "consensus.p2p.listen_addr=/ip4/0.0.0.0/udp/27000/quic-v1" \ + "mempool.p2p.listen_addr=/ip4/0.0.0.0/udp/28000/quic-v1" \ + "metrics.listen_addr=0.0.0.0:9000" && \ + sconfig "$file" -t stringSlice \ + "consensus.p2p.persistent_peers=$(_compose_persistent_peers)" \ + "mempool.p2p.persistent_peers=$(_compose_persistent_peers 28000)" & + done +} + +_change_one_config_entry() { + P="$(seq 0 $((D_N-1)))" + for i in $P + do + file="/data/$i/config/config.toml" + sconfig "$file" "$@" + done +} + +_parse_multiple_hosts() { + PSSH_X="" + if [ "$1" = "all" ] || [ $# -eq 0 ]; then + PSSH_X="$PSSH_H" + else + while (( "$#" )); + do + PSSH_X="$PSSH_X $(get_ip "$1")" + shift + done + fi + echo "$PSSH_X" +} diff --git a/qa/terraform/templates/hosts.tmpl b/qa/terraform/templates/hosts.tmpl new file mode 100644 index 000000000..6dab9c743 --- /dev/null +++ b/qa/terraform/templates/hosts.tmpl @@ -0,0 +1,15 @@ +127.0.0.1 localhost +${cc.ip} g-${cc.name} +%{~ for n in small } +${n.ip} g-${n.name} +%{~ endfor } +%{~ for n in large } +${n.ip} g-${n.name} +%{~ endfor } +${cc.internal_ip} ${cc.name} +%{~ for n in small } +${n.internal_ip} ${n.name} +%{~ endfor } +%{~ for n in large } +${n.internal_ip} ${n.name} +%{~ endfor } diff --git a/qa/terraform/user-data/cc-data.txt b/qa/terraform/user-data/cc-data.txt new file mode 100644 index 000000000..e74a6c231 --- /dev/null +++ b/qa/terraform/user-data/cc-data.txt @@ -0,0 +1,642 @@ +#cloud-config +manage_etc_hosts: false +apt: + sources: + source1: + source: "deb https://download.docker.com/linux/debian $RELEASE stable" + key: | + -----BEGIN PGP PUBLIC KEY BLOCK----- + + mQINBFit2ioBEADhWpZ8/wvZ6hUTiXOwQHXMAlaFHcPH9hAtr4F1y2+OYdbtMuth + lqqwp028AqyY+PRfVMtSYMbjuQuu5byyKR01BbqYhuS3jtqQmljZ/bJvXqnmiVXh + 38UuLa+z077PxyxQhu5BbqntTPQMfiyqEiU+BKbq2WmANUKQf+1AmZY/IruOXbnq + L4C1+gJ8vfmXQt99npCaxEjaNRVYfOS8QcixNzHUYnb6emjlANyEVlZzeqo7XKl7 + UrwV5inawTSzWNvtjEjj4nJL8NsLwscpLPQUhTQ+7BbQXAwAmeHCUTQIvvWXqw0N + cmhh4HgeQscQHYgOJjjDVfoY5MucvglbIgCqfzAHW9jxmRL4qbMZj+b1XoePEtht + ku4bIQN1X5P07fNWzlgaRL5Z4POXDDZTlIQ/El58j9kp4bnWRCJW0lya+f8ocodo + vZZ+Doi+fy4D5ZGrL4XEcIQP/Lv5uFyf+kQtl/94VFYVJOleAv8W92KdgDkhTcTD + G7c0tIkVEKNUq48b3aQ64NOZQW7fVjfoKwEZdOqPE72Pa45jrZzvUFxSpdiNk2tZ + XYukHjlxxEgBdC/J3cMMNRE1F4NCA3ApfV1Y7/hTeOnmDuDYwr9/obA8t016Yljj + q5rdkywPf4JF8mXUW5eCN1vAFHxeg9ZWemhBtQmGxXnw9M+z6hWwc6ahmwARAQAB + tCtEb2NrZXIgUmVsZWFzZSAoQ0UgZGViKSA8ZG9ja2VyQGRvY2tlci5jb20+iQI3 + BBMBCgAhBQJYrefAAhsvBQsJCAcDBRUKCQgLBRYCAwEAAh4BAheAAAoJEI2BgDwO + v82IsskP/iQZo68flDQmNvn8X5XTd6RRaUH33kXYXquT6NkHJciS7E2gTJmqvMqd + tI4mNYHCSEYxI5qrcYV5YqX9P6+Ko+vozo4nseUQLPH/ATQ4qL0Zok+1jkag3Lgk + jonyUf9bwtWxFp05HC3GMHPhhcUSexCxQLQvnFWXD2sWLKivHp2fT8QbRGeZ+d3m + 6fqcd5Fu7pxsqm0EUDK5NL+nPIgYhN+auTrhgzhK1CShfGccM/wfRlei9Utz6p9P + XRKIlWnXtT4qNGZNTN0tR+NLG/6Bqd8OYBaFAUcue/w1VW6JQ2VGYZHnZu9S8LMc + FYBa5Ig9PxwGQOgq6RDKDbV+PqTQT5EFMeR1mrjckk4DQJjbxeMZbiNMG5kGECA8 + g383P3elhn03WGbEEa4MNc3Z4+7c236QI3xWJfNPdUbXRaAwhy/6rTSFbzwKB0Jm + ebwzQfwjQY6f55MiI/RqDCyuPj3r3jyVRkK86pQKBAJwFHyqj9KaKXMZjfVnowLh + 9svIGfNbGHpucATqREvUHuQbNnqkCx8VVhtYkhDb9fEP2xBu5VvHbR+3nfVhMut5 + G34Ct5RS7Jt6LIfFdtcn8CaSas/l1HbiGeRgc70X/9aYx/V/CEJv0lIe8gP6uDoW + FPIZ7d6vH+Vro6xuWEGiuMaiznap2KhZmpkgfupyFmplh0s6knymuQINBFit2ioB + EADneL9S9m4vhU3blaRjVUUyJ7b/qTjcSylvCH5XUE6R2k+ckEZjfAMZPLpO+/tF + M2JIJMD4SifKuS3xck9KtZGCufGmcwiLQRzeHF7vJUKrLD5RTkNi23ydvWZgPjtx + Q+DTT1Zcn7BrQFY6FgnRoUVIxwtdw1bMY/89rsFgS5wwuMESd3Q2RYgb7EOFOpnu + w6da7WakWf4IhnF5nsNYGDVaIHzpiqCl+uTbf1epCjrOlIzkZ3Z3Yk5CM/TiFzPk + z2lLz89cpD8U+NtCsfagWWfjd2U3jDapgH+7nQnCEWpROtzaKHG6lA3pXdix5zG8 + eRc6/0IbUSWvfjKxLLPfNeCS2pCL3IeEI5nothEEYdQH6szpLog79xB9dVnJyKJb + VfxXnseoYqVrRz2VVbUI5Blwm6B40E3eGVfUQWiux54DspyVMMk41Mx7QJ3iynIa + 1N4ZAqVMAEruyXTRTxc9XW0tYhDMA/1GYvz0EmFpm8LzTHA6sFVtPm/ZlNCX6P1X + zJwrv7DSQKD6GGlBQUX+OeEJ8tTkkf8QTJSPUdh8P8YxDFS5EOGAvhhpMBYD42kQ + pqXjEC+XcycTvGI7impgv9PDY1RCC1zkBjKPa120rNhv/hkVk/YhuGoajoHyy4h7 + ZQopdcMtpN2dgmhEegny9JCSwxfQmQ0zK0g7m6SHiKMwjwARAQABiQQ+BBgBCAAJ + BQJYrdoqAhsCAikJEI2BgDwOv82IwV0gBBkBCAAGBQJYrdoqAAoJEH6gqcPyc/zY + 1WAP/2wJ+R0gE6qsce3rjaIz58PJmc8goKrir5hnElWhPgbq7cYIsW5qiFyLhkdp + YcMmhD9mRiPpQn6Ya2w3e3B8zfIVKipbMBnke/ytZ9M7qHmDCcjoiSmwEXN3wKYI + mD9VHONsl/CG1rU9Isw1jtB5g1YxuBA7M/m36XN6x2u+NtNMDB9P56yc4gfsZVES + KA9v+yY2/l45L8d/WUkUi0YXomn6hyBGI7JrBLq0CX37GEYP6O9rrKipfz73XfO7 + JIGzOKZlljb/D9RX/g7nRbCn+3EtH7xnk+TK/50euEKw8SMUg147sJTcpQmv6UzZ + cM4JgL0HbHVCojV4C/plELwMddALOFeYQzTif6sMRPf+3DSj8frbInjChC3yOLy0 + 6br92KFom17EIj2CAcoeq7UPhi2oouYBwPxh5ytdehJkoo+sN7RIWua6P2WSmon5 + U888cSylXC0+ADFdgLX9K2zrDVYUG1vo8CX0vzxFBaHwN6Px26fhIT1/hYUHQR1z + VfNDcyQmXqkOnZvvoMfz/Q0s9BhFJ/zU6AgQbIZE/hm1spsfgvtsD1frZfygXJ9f + irP+MSAI80xHSf91qSRZOj4Pl3ZJNbq4yYxv0b1pkMqeGdjdCYhLU+LZ4wbQmpCk + SVe2prlLureigXtmZfkqevRz7FrIZiu9ky8wnCAPwC7/zmS18rgP/17bOtL4/iIz + QhxAAoAMWVrGyJivSkjhSGx1uCojsWfsTAm11P7jsruIL61ZzMUVE2aM3Pmj5G+W + 9AcZ58Em+1WsVnAXdUR//bMmhyr8wL/G1YO1V3JEJTRdxsSxdYa4deGBBY/Adpsw + 24jxhOJR+lsJpqIUeb999+R8euDhRHG9eFO7DRu6weatUJ6suupoDTRWtr/4yGqe + dKxV3qQhNLSnaAzqW/1nA3iUB4k7kCaKZxhdhDbClf9P37qaRW467BLCVO/coL3y + Vm50dwdrNtKpMBh3ZpbB1uJvgi9mXtyBOMJ3v8RZeDzFiG8HdCtg9RvIt/AIFoHR + H3S+U79NT6i0KPzLImDfs8T7RlpyuMc4Ufs8ggyg9v3Ae6cN3eQyxcK3w0cbBwsh + /nQNfsA6uu+9H7NhbehBMhYnpNZyrHzCmzyXkauwRAqoCbGCNykTRwsur9gS41TQ + M8ssD1jFheOJf3hODnkKU+HKjvMROl1DK7zdmLdNzA1cvtZH/nCC9KPj1z8QC47S + xx+dTZSx4ONAhwbS/LN3PoKtn8LPjY9NP9uDWI+TWYquS2U+KHDrBDlsgozDbs/O + jCxcpDzNmXpWQHEtHU7649OXHP7UeNST1mCUCH5qdank0V1iejF6/CfTFU4MfcrG + YT90qFF93M3v01BbxP+EIY2/9tiIPbrd + =0YYh + -----END PGP PUBLIC KEY BLOCK----- +package_update: true +packages: + - git + - gcc + - prometheus + - prometheus-node-exporter + - ntpstat + - jq + - ufw + - tmux + - apt-transport-https + - ca-certificates + - curl + - gnupg-agent + - software-properties-common + - docker-ce + - docker-ce-cli + - containerd.io + - nfs-kernel-server + - dnsmasq + - pssh + - rsync +write_files: + - path: /etc/docker/daemon.json + content: | + { + "insecure-registries" : ["0.0.0.0/0"] + } + - path: /etc/systemd/resolved.conf.d/1-dnsmasq.conf + content: | + [Resolve] + DNS=127.0.0.1 + DNSStubListener=no + - path: /etc/exports + content: | + /data 10.0.0.0/8(ro,sync,no_subtree_check) + /data 172.16.0.0/12(ro,sync,no_subtree_check) + /data 192.168.0.0/16(ro,sync,no_subtree_check) + - path: /etc/dnsmasq.d/servers.conf + content: | + server=1.0.0.1 + server=1.1.1.1 + bogus-priv + expand-hosts + domain=testnet + cache-size=1000 + - path: /etc/prometheus/prometheus.yml + content: | + global: + scrape_interval: 5s + evaluation_interval: 10s + scrape_configs: + - job_name: 'malachite' + static_configs: + - targets: [ + 'small0:9000', + 'small1:9000', + 'small2:9000', + 'small3:9000', + 'small4:9000', + 'small5:9000', + 'small6:9000', + 'small7:9000', + 'small8:9000', + 'small9:9000', + 'small10:9000', + 'small11:9000', + 'small12:9000', + 'small13:9000', + 'small14:9000', + 'small15:9000', + 'small16:9000', + 'small17:9000', + 'small18:9000', + 'small19:9000', + 'small20:9000', + 'small21:9000', + 'small22:9000', + 'small23:9000', + 'small24:9000', + 'small25:9000', + 'small26:9000', + 'small27:9000', + 'small28:9000', + 'small29:9000', + 'small30:9000', + 'small31:9000', + 'small32:9000', + 'small33:9000', + 'small34:9000', + 'small35:9000', + 'small36:9000', + 'small37:9000', + 'small38:9000', + 'small39:9000', + 'small40:9000', + 'small41:9000', + 'small42:9000', + 'small43:9000', + 'small44:9000', + 'small45:9000', + 'small46:9000', + 'small47:9000', + 'small48:9000', + 'small49:9000', + 'small50:9000', + 'small51:9000', + 'small52:9000', + 'small53:9000', + 'small54:9000', + 'small55:9000', + 'small56:9000', + 'small57:9000', + 'small58:9000', + 'small59:9000', + 'small60:9000', + 'small61:9000', + 'small62:9000', + 'small63:9000', + 'small64:9000', + 'small65:9000', + 'small66:9000', + 'small67:9000', + 'small68:9000', + 'small69:9000', + 'small70:9000', + 'small71:9000', + 'small72:9000', + 'small73:9000', + 'small74:9000', + 'small75:9000', + 'small76:9000', + 'small77:9000', + 'small78:9000', + 'small79:9000', + 'small80:9000', + 'small81:9000', + 'small82:9000', + 'small83:9000', + 'small84:9000', + 'small85:9000', + 'small86:9000', + 'small87:9000', + 'small88:9000', + 'small89:9000', + 'small90:9000', + 'small91:9000', + 'small92:9000', + 'small93:9000', + 'small94:9000', + 'small95:9000', + 'small96:9000', + 'small97:9000', + 'small98:9000', + 'small99:9000', + 'large0:9000', + 'large1:9000', + 'large2:9000', + 'large3:9000', + 'large4:9000', + 'large5:9000', + 'large6:9000', + 'large7:9000', + 'large8:9000', + 'large9:9000', + 'large10:9000', + 'large11:9000', + 'large12:9000', + 'large13:9000', + 'large14:9000', + 'large15:9000', + 'large16:9000', + 'large17:9000', + 'large18:9000', + 'large19:9000', + 'large20:9000', + 'large21:9000', + 'large22:9000', + 'large23:9000', + 'large24:9000', + 'large25:9000', + 'large26:9000', + 'large27:9000', + 'large28:9000', + 'large29:9000', + 'large30:9000', + 'large31:9000', + 'large32:9000', + 'large33:9000', + 'large34:9000', + 'large35:9000', + 'large36:9000', + 'large37:9000', + 'large38:9000', + 'large39:9000', + 'large40:9000', + 'large41:9000', + 'large42:9000', + 'large43:9000', + 'large44:9000', + 'large45:9000', + 'large46:9000', + 'large47:9000', + 'large48:9000', + 'large49:9000', + 'large50:9000', + 'large51:9000', + 'large52:9000', + 'large53:9000', + 'large54:9000', + 'large55:9000', + 'large56:9000', + 'large57:9000', + 'large58:9000', + 'large59:9000', + 'large60:9000', + 'large61:9000', + 'large62:9000', + 'large63:9000', + 'large64:9000', + 'large65:9000', + 'large66:9000', + 'large67:9000', + 'large68:9000', + 'large69:9000', + 'large70:9000', + 'large71:9000', + 'large72:9000', + 'large73:9000', + 'large74:9000', + 'large75:9000', + 'large76:9000', + 'large77:9000', + 'large78:9000', + 'large79:9000', + 'large80:9000', + 'large81:9000', + 'large82:9000', + 'large83:9000', + 'large84:9000', + 'large85:9000', + 'large86:9000', + 'large87:9000', + 'large88:9000', + 'large89:9000', + 'large90:9000', + 'large91:9000', + 'large92:9000', + 'large93:9000', + 'large94:9000', + 'large95:9000', + 'large96:9000', + 'large97:9000', + 'large98:9000', + 'large99:9000' + ] + - job_name: 'server' + static_configs: + - targets: [ + 'small0:9100', + 'small1:9100', + 'small2:9100', + 'small3:9100', + 'small4:9100', + 'small5:9100', + 'small6:9100', + 'small7:9100', + 'small8:9100', + 'small9:9100', + 'small10:9100', + 'small11:9100', + 'small12:9100', + 'small13:9100', + 'small14:9100', + 'small15:9100', + 'small16:9100', + 'small17:9100', + 'small18:9100', + 'small19:9100', + 'small20:9100', + 'small21:9100', + 'small22:9100', + 'small23:9100', + 'small24:9100', + 'small25:9100', + 'small26:9100', + 'small27:9100', + 'small28:9100', + 'small29:9100', + 'small30:9100', + 'small31:9100', + 'small32:9100', + 'small33:9100', + 'small34:9100', + 'small35:9100', + 'small36:9100', + 'small37:9100', + 'small38:9100', + 'small39:9100', + 'small40:9100', + 'small41:9100', + 'small42:9100', + 'small43:9100', + 'small44:9100', + 'small45:9100', + 'small46:9100', + 'small47:9100', + 'small48:9100', + 'small49:9100', + 'small50:9100', + 'small51:9100', + 'small52:9100', + 'small53:9100', + 'small54:9100', + 'small55:9100', + 'small56:9100', + 'small57:9100', + 'small58:9100', + 'small59:9100', + 'small60:9100', + 'small61:9100', + 'small62:9100', + 'small63:9100', + 'small64:9100', + 'small65:9100', + 'small66:9100', + 'small67:9100', + 'small68:9100', + 'small69:9100', + 'small70:9100', + 'small71:9100', + 'small72:9100', + 'small73:9100', + 'small74:9100', + 'small75:9100', + 'small76:9100', + 'small77:9100', + 'small78:9100', + 'small79:9100', + 'small80:9100', + 'small81:9100', + 'small82:9100', + 'small83:9100', + 'small84:9100', + 'small85:9100', + 'small86:9100', + 'small87:9100', + 'small88:9100', + 'small89:9100', + 'small90:9100', + 'small91:9100', + 'small92:9100', + 'small93:9100', + 'small94:9100', + 'small95:9100', + 'small96:9100', + 'small97:9100', + 'small98:9100', + 'small99:9100', + 'large0:9100', + 'large1:9100', + 'large2:9100', + 'large3:9100', + 'large4:9100', + 'large5:9100', + 'large6:9100', + 'large7:9100', + 'large8:9100', + 'large9:9100', + 'large10:9100', + 'large11:9100', + 'large12:9100', + 'large13:9100', + 'large14:9100', + 'large15:9100', + 'large16:9100', + 'large17:9100', + 'large18:9100', + 'large19:9100', + 'large20:9100', + 'large21:9100', + 'large22:9100', + 'large23:9100', + 'large24:9100', + 'large25:9100', + 'large26:9100', + 'large27:9100', + 'large28:9100', + 'large29:9100', + 'large30:9100', + 'large31:9100', + 'large32:9100', + 'large33:9100', + 'large34:9100', + 'large35:9100', + 'large36:9100', + 'large37:9100', + 'large38:9100', + 'large39:9100', + 'large40:9100', + 'large41:9100', + 'large42:9100', + 'large43:9100', + 'large44:9100', + 'large45:9100', + 'large46:9100', + 'large47:9100', + 'large48:9100', + 'large49:9100', + 'large50:9100', + 'large51:9100', + 'large52:9100', + 'large53:9100', + 'large54:9100', + 'large55:9100', + 'large56:9100', + 'large57:9100', + 'large58:9100', + 'large59:9100', + 'large60:9100', + 'large61:9100', + 'large62:9100', + 'large63:9100', + 'large64:9100', + 'large65:9100', + 'large66:9100', + 'large67:9100', + 'large68:9100', + 'large69:9100', + 'large70:9100', + 'large71:9100', + 'large72:9100', + 'large73:9100', + 'large74:9100', + 'large75:9100', + 'large76:9100', + 'large77:9100', + 'large78:9100', + 'large79:9100', + 'large80:9100', + 'large81:9100', + 'large82:9100', + 'large83:9100', + 'large84:9100', + 'large85:9100', + 'large86:9100', + 'large87:9100', + 'large88:9100', + 'large89:9100', + 'large90:9100', + 'large91:9100', + 'large92:9100', + 'large93:9100', + 'large94:9100', + 'large95:9100', + 'large96:9100', + 'large97:9100', + 'large98:9100', + 'large99:9100' + ] + scrape_interval: 1s + - path: /etc/nsswitch.conf + content: | + passwd: files + group: files + shadow: files + gshadow: files + hosts: dns + networks: files + protocols: db files + services: db files + ethers: db files + rpc: db files + netgroup: nis + - path: /root/docker/grafana.yml + content: | + apiVersion: 1 + datasources: + - name: prometheus + uid: prometheus + type: prometheus + url: http://host.docker.internal:9090 + is_default: true + editable: true + - path: /root/docker/malachite.yml + content: | + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: '/dashboards' + foldersFromFilesStructure: true + - path: /root/dashboards/main.json + encoding: b64 + content: ${malachite_dashboard} + - path: /root/docker/compose.yml + content: | + services: + registry: + container_name: registry + image: registry:2 + ports: + - 0.0.0.0:80:5000 + volumes: + - registry:/var/lib/registry + environment: + REGISTRY_HTTP_SECRET: siSRSRTHSRTHSERGehrgjsoiejrg45625623452345isejrgisejrgsergserg + restart: on-failure + grafana: + container_name: grafana + image: grafana/grafana-oss + volumes: + - /root/docker/grafana.yml:/etc/grafana/provisioning/datasources/prometheus.yml + - /root/docker/malachite.yml:/etc/grafana/provisioning/dashboards/malachite.yml + - /root/dashboards:/dashboards + - grafana:/var/lib/grafana + ports: + - 0.0.0.0:3000:3000 + environment: + GF_SECURITY_ADMIN_USER: testnet + GF_SECURITY_ADMIN_PASSWORD: militant-souvenir-dash-teleview + GF_LOG_LEVEL: error + GF_ANALYTICS_ENABLED: false + GF_ANALYTICS_REPORTING_ENABLED: false + GF_ANALYTICS_CHECK_FOR_PLUGIN_UPDATES: false + GF_ANALYTICS_CHECK_FOR_UPDATES: false + GF_ANALYTICS_FEEDBACK_LINKS_ENABLED: false + GF_SECURITY_DISABLE_GRAVATAR: true + GF_USERS_DEFAULT_THEME: system + GF_USERS_EDITORS_CAN_ADMIN: true + GF_AUTH_ANONYMOUS_ENABLED: true + GF_AUTH_ANONYMOUS_ORG_ROLE: Editor + GF_AUTH_BASIC_ENABLED: false + GF_NEWS_NEWS_FEED_ENABLED: false + GF_RENDERING_RENDERER_TOKEN: "-" + GF_RENDERING_SERVER_URL: http://grafana-image-renderer:8081/render + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + extra_hosts: + - "host.docker.internal:host-gateway" + grafana-image-renderer: + image: grafana/grafana-image-renderer + container_name: grafana-image-renderer + volumes: + registry: + grafana: +runcmd: + - ln /usr/bin/parallel-ssh /usr/bin/pssh + - mkdir /data + - chown nobody:nogroup /data + - systemctl daemon-reload + - systemctl restart systemd-resolved + - curl -s -o /root/dashboards/node-exporter-full.json -L https://raw.githubusercontent.com/rfmoz/grafana-dashboards/master/prometheus/node-exporter-full.json + - systemctl enable prometheus-node-exporter + - systemctl start prometheus-node-exporter + - systemctl enable docker + - systemctl start docker + - systemctl enable nfs-kernel-server + - systemctl start nfs-kernel-server + - systemctl restart systemd-journald + - systemctl restart dnsmasq + - docker compose -f /root/docker/compose.yml up -d + - curl -s -o /usr/bin/sconfig -L https://github.com/freshautomations/sconfig/releases/download/v0.2.0/sconfig_linux_amd64 + - chmod 755 /usr/bin/sconfig + - date > /etc/done + - ln /etc/done /etc/cc + diff --git a/qa/terraform/user-data/user-data.txt b/qa/terraform/user-data/user-data.txt new file mode 100644 index 000000000..671b00b52 --- /dev/null +++ b/qa/terraform/user-data/user-data.txt @@ -0,0 +1,165 @@ +#cloud-config +manage_etc_hosts: false +network: + version: 1 + config: + - type: nameserver + address: + - ${cc.internal_ip} + search: + - testnet +apt: + sources: + source1: + source: "deb https://download.docker.com/linux/debian $RELEASE stable" + key: | + -----BEGIN PGP PUBLIC KEY BLOCK----- + + mQINBFit2ioBEADhWpZ8/wvZ6hUTiXOwQHXMAlaFHcPH9hAtr4F1y2+OYdbtMuth + lqqwp028AqyY+PRfVMtSYMbjuQuu5byyKR01BbqYhuS3jtqQmljZ/bJvXqnmiVXh + 38UuLa+z077PxyxQhu5BbqntTPQMfiyqEiU+BKbq2WmANUKQf+1AmZY/IruOXbnq + L4C1+gJ8vfmXQt99npCaxEjaNRVYfOS8QcixNzHUYnb6emjlANyEVlZzeqo7XKl7 + UrwV5inawTSzWNvtjEjj4nJL8NsLwscpLPQUhTQ+7BbQXAwAmeHCUTQIvvWXqw0N + cmhh4HgeQscQHYgOJjjDVfoY5MucvglbIgCqfzAHW9jxmRL4qbMZj+b1XoePEtht + ku4bIQN1X5P07fNWzlgaRL5Z4POXDDZTlIQ/El58j9kp4bnWRCJW0lya+f8ocodo + vZZ+Doi+fy4D5ZGrL4XEcIQP/Lv5uFyf+kQtl/94VFYVJOleAv8W92KdgDkhTcTD + G7c0tIkVEKNUq48b3aQ64NOZQW7fVjfoKwEZdOqPE72Pa45jrZzvUFxSpdiNk2tZ + XYukHjlxxEgBdC/J3cMMNRE1F4NCA3ApfV1Y7/hTeOnmDuDYwr9/obA8t016Yljj + q5rdkywPf4JF8mXUW5eCN1vAFHxeg9ZWemhBtQmGxXnw9M+z6hWwc6ahmwARAQAB + tCtEb2NrZXIgUmVsZWFzZSAoQ0UgZGViKSA8ZG9ja2VyQGRvY2tlci5jb20+iQI3 + BBMBCgAhBQJYrefAAhsvBQsJCAcDBRUKCQgLBRYCAwEAAh4BAheAAAoJEI2BgDwO + v82IsskP/iQZo68flDQmNvn8X5XTd6RRaUH33kXYXquT6NkHJciS7E2gTJmqvMqd + tI4mNYHCSEYxI5qrcYV5YqX9P6+Ko+vozo4nseUQLPH/ATQ4qL0Zok+1jkag3Lgk + jonyUf9bwtWxFp05HC3GMHPhhcUSexCxQLQvnFWXD2sWLKivHp2fT8QbRGeZ+d3m + 6fqcd5Fu7pxsqm0EUDK5NL+nPIgYhN+auTrhgzhK1CShfGccM/wfRlei9Utz6p9P + XRKIlWnXtT4qNGZNTN0tR+NLG/6Bqd8OYBaFAUcue/w1VW6JQ2VGYZHnZu9S8LMc + FYBa5Ig9PxwGQOgq6RDKDbV+PqTQT5EFMeR1mrjckk4DQJjbxeMZbiNMG5kGECA8 + g383P3elhn03WGbEEa4MNc3Z4+7c236QI3xWJfNPdUbXRaAwhy/6rTSFbzwKB0Jm + ebwzQfwjQY6f55MiI/RqDCyuPj3r3jyVRkK86pQKBAJwFHyqj9KaKXMZjfVnowLh + 9svIGfNbGHpucATqREvUHuQbNnqkCx8VVhtYkhDb9fEP2xBu5VvHbR+3nfVhMut5 + G34Ct5RS7Jt6LIfFdtcn8CaSas/l1HbiGeRgc70X/9aYx/V/CEJv0lIe8gP6uDoW + FPIZ7d6vH+Vro6xuWEGiuMaiznap2KhZmpkgfupyFmplh0s6knymuQINBFit2ioB + EADneL9S9m4vhU3blaRjVUUyJ7b/qTjcSylvCH5XUE6R2k+ckEZjfAMZPLpO+/tF + M2JIJMD4SifKuS3xck9KtZGCufGmcwiLQRzeHF7vJUKrLD5RTkNi23ydvWZgPjtx + Q+DTT1Zcn7BrQFY6FgnRoUVIxwtdw1bMY/89rsFgS5wwuMESd3Q2RYgb7EOFOpnu + w6da7WakWf4IhnF5nsNYGDVaIHzpiqCl+uTbf1epCjrOlIzkZ3Z3Yk5CM/TiFzPk + z2lLz89cpD8U+NtCsfagWWfjd2U3jDapgH+7nQnCEWpROtzaKHG6lA3pXdix5zG8 + eRc6/0IbUSWvfjKxLLPfNeCS2pCL3IeEI5nothEEYdQH6szpLog79xB9dVnJyKJb + VfxXnseoYqVrRz2VVbUI5Blwm6B40E3eGVfUQWiux54DspyVMMk41Mx7QJ3iynIa + 1N4ZAqVMAEruyXTRTxc9XW0tYhDMA/1GYvz0EmFpm8LzTHA6sFVtPm/ZlNCX6P1X + zJwrv7DSQKD6GGlBQUX+OeEJ8tTkkf8QTJSPUdh8P8YxDFS5EOGAvhhpMBYD42kQ + pqXjEC+XcycTvGI7impgv9PDY1RCC1zkBjKPa120rNhv/hkVk/YhuGoajoHyy4h7 + ZQopdcMtpN2dgmhEegny9JCSwxfQmQ0zK0g7m6SHiKMwjwARAQABiQQ+BBgBCAAJ + BQJYrdoqAhsCAikJEI2BgDwOv82IwV0gBBkBCAAGBQJYrdoqAAoJEH6gqcPyc/zY + 1WAP/2wJ+R0gE6qsce3rjaIz58PJmc8goKrir5hnElWhPgbq7cYIsW5qiFyLhkdp + YcMmhD9mRiPpQn6Ya2w3e3B8zfIVKipbMBnke/ytZ9M7qHmDCcjoiSmwEXN3wKYI + mD9VHONsl/CG1rU9Isw1jtB5g1YxuBA7M/m36XN6x2u+NtNMDB9P56yc4gfsZVES + KA9v+yY2/l45L8d/WUkUi0YXomn6hyBGI7JrBLq0CX37GEYP6O9rrKipfz73XfO7 + JIGzOKZlljb/D9RX/g7nRbCn+3EtH7xnk+TK/50euEKw8SMUg147sJTcpQmv6UzZ + cM4JgL0HbHVCojV4C/plELwMddALOFeYQzTif6sMRPf+3DSj8frbInjChC3yOLy0 + 6br92KFom17EIj2CAcoeq7UPhi2oouYBwPxh5ytdehJkoo+sN7RIWua6P2WSmon5 + U888cSylXC0+ADFdgLX9K2zrDVYUG1vo8CX0vzxFBaHwN6Px26fhIT1/hYUHQR1z + VfNDcyQmXqkOnZvvoMfz/Q0s9BhFJ/zU6AgQbIZE/hm1spsfgvtsD1frZfygXJ9f + irP+MSAI80xHSf91qSRZOj4Pl3ZJNbq4yYxv0b1pkMqeGdjdCYhLU+LZ4wbQmpCk + SVe2prlLureigXtmZfkqevRz7FrIZiu9ky8wnCAPwC7/zmS18rgP/17bOtL4/iIz + QhxAAoAMWVrGyJivSkjhSGx1uCojsWfsTAm11P7jsruIL61ZzMUVE2aM3Pmj5G+W + 9AcZ58Em+1WsVnAXdUR//bMmhyr8wL/G1YO1V3JEJTRdxsSxdYa4deGBBY/Adpsw + 24jxhOJR+lsJpqIUeb999+R8euDhRHG9eFO7DRu6weatUJ6suupoDTRWtr/4yGqe + dKxV3qQhNLSnaAzqW/1nA3iUB4k7kCaKZxhdhDbClf9P37qaRW467BLCVO/coL3y + Vm50dwdrNtKpMBh3ZpbB1uJvgi9mXtyBOMJ3v8RZeDzFiG8HdCtg9RvIt/AIFoHR + H3S+U79NT6i0KPzLImDfs8T7RlpyuMc4Ufs8ggyg9v3Ae6cN3eQyxcK3w0cbBwsh + /nQNfsA6uu+9H7NhbehBMhYnpNZyrHzCmzyXkauwRAqoCbGCNykTRwsur9gS41TQ + M8ssD1jFheOJf3hODnkKU+HKjvMROl1DK7zdmLdNzA1cvtZH/nCC9KPj1z8QC47S + xx+dTZSx4ONAhwbS/LN3PoKtn8LPjY9NP9uDWI+TWYquS2U+KHDrBDlsgozDbs/O + jCxcpDzNmXpWQHEtHU7649OXHP7UeNST1mCUCH5qdank0V1iejF6/CfTFU4MfcrG + YT90qFF93M3v01BbxP+EIY2/9tiIPbrd + =0YYh + -----END PGP PUBLIC KEY BLOCK----- +package_update: true +packages: + - git + - gcc + - golang-1.21-go + - prometheus + - prometheus-node-exporter + - ntpstat + - jq + - ufw + - tmux + - apt-transport-https + - ca-certificates + - curl + - gnupg-agent + - software-properties-common + - docker-ce + - docker-ce-cli + - containerd.io + - nfs-common +write_files: +# Prometheus node exporter service + - path: /etc/prometheus/prometheus-node-exporter.service + content: | + [Unit] + Description=Node Exporter + Wants=network-online.target + After=network-online.target + [Service] + User=prometheus + Group=prometheus + Type=simple + ExecStart=/usr/bin/prometheus-node-exporter + [Install] + WantedBy=multi-user.target +# Allow docker to download from insecure registries + - path: /etc/docker/daemon.json + content: | + { + "insecure-registries" : ["${cc.internal_ip}"] + } +# Force systemd-resolved to use custom DNS server + - path: /etc/systemd/resolved.conf.d/DigitalOcean.conf + content: | + [Resolve] + DNS=${cc.internal_ip} +# Force resolv to use DNS for host resolution + - path: /etc/nsswitch.conf + content: | + passwd: files + group: files + shadow: files + gshadow: files + hosts: dns + networks: files + protocols: db files + services: db files + ethers: db files + rpc: db files + netgroup: nis +# Set server ID + - path: /etc/id + content: | + ${id} +runcmd: +# Integrate Prometheus node exporter service + - systemctl daemon-reload +# Update resolv.conf with forced DNS server + - systemctl restart systemd-resolved +# Start prometheus node exporter + - systemctl enable prometheus-node-exporter + - systemctl start prometheus-node-exporter +# Start docker + - systemctl enable docker + - systemctl start docker +# Fix journald not logging entries + - systemctl restart systemd-journald +# Set up config directory + - ln -s /data/${id} /config +# Set up NFS share + - mkdir /data + - chown nobody:nogroup /data + - echo "${cc.internal_ip}:/data /data nfs defaults 0 0" >> /etc/fstab +# Mount NFS share + - sleep 60 # Wait for CC NFS Server to come up with fingers crossed + - mount /data +# Indicate finish + - date > /etc/done diff --git a/qa/terraform/variables.tf b/qa/terraform/variables.tf new file mode 100644 index 000000000..32e468a26 --- /dev/null +++ b/qa/terraform/variables.tf @@ -0,0 +1,22 @@ +variable "small_nodes" { + type = number + default = 2 +} + +variable "large_nodes" { + type = number + default = 0 +} + +variable "region" { + type = string + default = "fra1" +} + +output "next_steps" { + value = < h` (future). - -The pseudocode description of the algorithm ignores messages from different -heights. -If we take the same approach in this specification, we have to specify -separately modules responsible to handle those messages. - - -- Past heights (`h' < h`): the consensus state machine is not affected by such - messages. However, their reception might indicate that a peer is lagging - behind in the protocol, and need to be synchronized. - - In CometBFT's implementation we handle message from the previous height - (`h' = h - 1`) for the `LastCommit` vote set. This only happens during the - first step of the first round (`r = 0`) of a height. -- Future heights (`h' > h`): the consensus state machine is not able to process - message from future heights in a proper way, as the validator set for them is - not known. However, once the process reaches this height `h'`, those messages - are _required_ for proper operation. There are two options here: - 1. Buffer a limited amount of such messages - 2. Assume that the communication subsystem (p2p) is able to retrieve (ask for - retransmission) of them when the process reaches height `h'`. - Notice that this option implies that processes keep a minimal set of - consensus messages that enables peers lagging behind to decide a past height. - -### Previous rounds - -Messages from rounds `(h, r')` with `r' < r`: same height `h` but previous round `r'`. - -The consensus state machine requires receiving and processing messages from -previous rounds: - -- `PREVOTE` messages can produce a Proof of Lock (POL) `2f + 1 ⟨PREVOTE, h, vr, id(v)⟩` - needed for accepting `PROPOSAL(h, r, v, vr)` message from the current round, - where `vr == r' < r` (L28). -- `PRECOMMIT` messages can produce a Precommit quorum `2f + 1 ⟨PRECOMMIT, h, r', id(v)⟩` - that leads to the decision of `v` at round `r'` (L49). -- `PROPOSAL` messages can be required to match a produced Precommit quorum (L49). - - Associated full value messages are required to produce the `⟨PROPOSAL, h, r', v, *⟩` event - -The production of the enumerated events from previous rounds should be -identical to the production of events from messages from the [current round](#current-round). - -### Future rounds - -Messages from rounds `(h, r')` with `r' > r`: same height `h` but future round `r'`. - -#### Round skipping - -The consensus state machine requires receiving and processing messages from -future rounds for enabling the _round skipping_ mechanism, defined as follows -in the pseudocode: - -``` -55: upon f + 1 ⟨∗, hp, round, ∗, ∗⟩ with round > roundp do -56: StartRound(round) -``` - -The current interpretation of this rule is that messages from a round `r' > r` -are received from `f + 1` voting-power equivalent distinct senders. -This means, that at least `1` correct process is at round `r'`. - -While this threshold does not need to be adopted (it can be configurable), -messages from a future round should initially have their unique senders counted. -Once the round skip threshold of processes is reached, the corresponding event -should be produced. - -#### Limits - -The same reasoning applied for messages from [future heights](#different-heights) -applies for messages from future rounds. - -Messages from future rounds are _required_ for the proper operation of the -consensus state machine once the process reaches their round `r'`. -There are two options, which can in particular be combined: - -1. Buffer a limited amount of such messages, or messages from a limited amount - of future rounds `r'` - - In CometBFT's implementation, only messages from round `r' = r + 1` are tracked. -2. Assume that the communication subsystem (p2p) is able to retrieve (ask for - retransmission) of messages from future rounds when the process reaches round `r'`. - Since messages from [previous rounds](#previous-rounds) are stored by - default, peers that have reached the future round `r'` should be able to - retransmit them. - -### Current round - -Messages matching the current round `(h, r)` of a process produce most of the -relevant events for the consensus state machine. - -### Counting votes - -Messages `⟨PREVOTE, h, r, *⟩` and `⟨PRECOMMIT, h, r, *⟩` are generically called votes. -They refer to a round step `(h, r, s)` of consensus, where `s` is defined by -the vote type, either `PREVOTE` or `PRECOMMIT`. - -The processing of _individual_ vote messages doesn't produce events relevant for -the consensus state machine. -But when the number of unique vote messages referring to a given round step -`(h, r, s)` reaches a given _threshold_, relevant events are produced; -the produced event depends on the value carried by such votes. - -General assumptions regarding vote messages: - -- Vote messages are produced, signed and broadcast by a validator, which is its - *sender* - - To define whether a vote message for round step `(h, r, s)` is valid, the - validator set for height `h` must be known. - The validator set can change over heights, but it is the same within a height. -- To each validator in the validator set of a height `h` is associated a *voting power* - - Thresholds are computed from the voting power associated to the - sender of each vote message -- A vote message carries a value: either a reference to a proposed value - `id(v)`, or the special `nil` value - - For practical effects, it should be considered that the size of vote - messages is constant -- Correct validators produce at most one vote message per round step: either - for a `id(v)` or for `nil` -- Byzantine validators may equivocate and produce multiple distinct vote - messages for the same round step. Equivocating vote messages differ on the - value they carry: for `nil`, `id(v)`, `id(v')`, etc. - - This possibility constitutes an attack vector. A process must thus restrict - the number of distinct vote messages from the same sender and referring to - the same round step that can be stored. - -#### `f + 1` threshold - -This threshold represents that vote messages referring to a round step were -received from a enough number of unique senders, so that it is guaranteed that -_at least one_ of the senders is a _correct_ validator. - -The rationale here is that the cumulative voting power of Byzantine validators -cannot exceed `f`, so that at least one of the considered vote messages must -have been produced by a correct validator. - -#### `2f + 1` threshold - -This threshold represents that vote messages referring to a round step were -received from a enough number of unique senders, so that it is guaranteed that -_the majority_ of the senders are _correct_ validators. - -The rationale here is that the cumulative voting power of Byzantine validators -cannot exceed `f`, so that the subset of considered vote messages that must -have been produced by correct validators have a cumulative voting power of at -least `f + 1`, which is strictly greater than `f`. - -## Consensus protocol - round state machine - -This document provides an overview of the Tendermint consensus protocol and follows ["The latest gossip on BFT consensus"](#References) and the English and Quint specifications located in the [specs](../../specs) directory. - -The consensus state-machine operates on complex `Event`s that reflect the -reception of one or multiple `Message`s, combined with state elements and the -interaction with other modules. - -### Round state-machine - -The state machine represents the operation of consensus at a single `Height(h)` and `Round(r)`. -The diagram below offers a visual representation of the state machine. It shows the input events, using green for simple inputs (e.g. timeouts, proposal) -and red for the complex events (e.g. `ProposalAndPolkaCurrent` is sent to the state machine when a valid proposal and a polka of prevotes have been received). -The actions are shown in italics (blue) and the output messages are shown in blue. - -![Consensus SM Diagram](assets/sm_diagram.jpeg) - -The set of states can be summarized as: - -- `Unstarted` - - Initial state - - Can be used to store messages early received for this round - - In the algorithm when `roundp < r`, where `roundp` is the node's current round -- InProgress (`Propose`, `Prevote`, `Precommit`) - - Actual consensus single-round execution - - In the algorithm when `roundp == r` -- `Commit` - - Final state for a successful round - -#### Exit transitions -The table below summarizes the major state transitions in the `Round(r)` state machine. -The transactions from state `InProgress` consider that node can be at any of -the `Propose`, `Prevote`, `Precommit` states. -The `Ref` column refers to the line of the pseudocode where the events can be found. - -| From | To | Ev Name | Event Details | Action | Ref | -| ---------- |------------|------------------------------|-------------------------------------------------------------------|-----------------------------------| --- | -| InProgress | InProgress | PrecommitAny | `2f + 1 ⟨PRECOMMIT, h, r, *⟩`
for the first time | schedule `TimeoutPrecommit(h, r)` | L47 | -| InProgress | Unstarted | TimeoutPrecommit | `TimeoutPrecommit(h, r)` | `next_round(r+1)` | L65 | -| InProgress | Unstarted | SkipRound(r') | `f + 1 ⟨*, h, r', *, *⟩` with `r' > r` | `next_round(r')` | L55 | -| InProgress | Commit | ProposalAndPrecommitValue(v) | `⟨PROPOSAL, h, r', v, *⟩`
`2f + 1 ⟨PRECOMMIT, h, r', id(v)⟩` | `commit(v)` | L49 | - -#### InProgress round - -The table below summarizes the state transitions within the `InProgress` state -of the `Round(r)` state machine. -The following state transitions represent the core of the consensus algorithm. -The `Ref` column refers to the line of the pseudocode where the events can be found. - -| From | To | Event | Details | Actions and Return | Ref | -|-----------|-----------|----------------------------------------|----------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------|-----| -| Unstarted | Propose | NewRound(proposer) | `StartRound` with `proposer(h, r) = p` | **async `getValue()` and schedule `TimeoutPropose(h, r)`** | L19 | -| Unstarted | Propose | NewRound(non-proposer) | `StartRound` with `proposer(h, r) != p` (optional restriction) | schedule `TimeoutPropose(h, r)` | L21 | -| **Propose** | **Propose** | **ProposeValue(v)** | `getValue()` returned | broadcast `⟨PROPOSAL, h, r, v, validRound⟩` | L19 | -| Propose | Prevote | Proposal(v, -1) | `⟨PROPOSAL, h, r, v, −1⟩` | broadcast `⟨PREVOTE, h, r, {id(v), nil}⟩` | L23 | -| Propose | Prevote | **InvalidProposal**(v, -1) | `⟨PROPOSAL, h, r, v, −1⟩` | broadcast `⟨PREVOTE, h, r, nil⟩` | L32 | -| Propose | Prevote | ProposalAndPolkaPrevious(v, vr) | `⟨PROPOSAL, h, r, v, vr⟩`
`2f + 1 ⟨PREVOTE, h, vr, id(v)⟩` with `vr < r` | broadcast `⟨PREVOTE, h, r, {id(v), nil}⟩` | L30 | -| Propose | Prevote | **InvalidProposalAndPolkaPrevious**(v, vr) | `⟨PROPOSAL, h, r, v, vr⟩`
`2f + 1 ⟨PREVOTE, h, vr, id(v)⟩` with `vr < r` | broadcast `⟨PREVOTE, h, r, nil⟩` | L32 | -| Propose | Prevote | TimeoutPropose | `TimeoutPropose(h, r)` | broadcast `⟨PREVOTE, h, r, nil⟩` | L57 | -| Prevote | Prevote | PolkaAny | `2f + 1 ⟨PREVOTE, h, r, *⟩`
for the first time | schedule `TimeoutPrevote(h, r)⟩` | L34 | -| Prevote | Precommit | ProposalAndPolkaCurrent(v) | `⟨PROPOSAL, h, r, v, ∗⟩`
`2f + 1 ⟨PREVOTE, h, r, id(v)⟩`
for the first time | update `lockedValue, lockedRound, validValue, validRound`,
broadcast `⟨PRECOMMIT, h, r, id(v)⟩` | L36 | -| Prevote | Precommit | PolkaNil | `2f + 1 ⟨PREVOTE, h, r, nil⟩` | broadcast `⟨PRECOMMIT, h, r, nil⟩` | L44 | -| Prevote | Precommit | TimeoutPrevote | `TimeoutPrevote(h, r)` | broadcast `⟨PRECOMMIT, h, r, nil⟩` | L61 | -| Precommit | Precommit | PolkaValue(v) | `⟨PROPOSAL, h, r, v, ∗⟩`
`2f + 1 ⟨PREVOTE, h, r, id(v)⟩`
for the first time | update `validValue, validRound` | L36 | - -The ordinary operation of a round of consensus consists on the sequence of -round steps `Propose`, `Prevote`, and `Precommit`, represented in the table. -The conditions for concluding a round of consensus, therefore for leaving the -`InProgress` state, are presented in the previous subsection. - -##### Validity Checks -The pseudocode of the algorithm includes validity checks for the messages. These checks have been moved out of the state machine and are now performed by the `driver` module. -For this reason: -- `L22` is covered by `Proposal(v, -1) and `InvalidProposal(v, -1)` -- `L28` is covered by `ProposalAndPolkaPrevious(v, vr)` and `InvalidProposalAndPolkaPrevious(v, vr)` -- `L36` and `L49` are only called with valid proposal - -TODO - show the full algorithm with all the changes - -##### Asynchronous getValue() and ProposeValue(v) -The original algorithm is modified to allow for asynchronous `getValue()`. The details are described below. - - - - - - - - - - - - -
arXiv paperAsync getValue()
- -``` -function StartRound(round) { - round_p ← round - step_p ← propose - if proposer(h_p, round_p) = p { - if validValue_p != nil { - proposal ← validValue_p - - - - } else { - proposal ← getValue() - - } - - - broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ - } else { - schedule OnTimeoutPropose(h_p,round_p) to - be executed after timeoutPropose(round_p) - } -} -``` - - - -``` -function StartRound(round) { - round_p ← round - step_p ← propose - if proposer(h_p, round_p) = p { - if validValue_p != nil { - proposal ← validValue_p - - broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ - - } else { - getValue() // async - schedule OnTimeoutPropose(h_p,round_p) to - be executed after timeoutPropose(round_p) - } - - - } else { - schedule OnTimeoutPropose(h_p,round_p) to - be executed after timeoutPropose(round_p) - } -} -``` - -
- -- New Rule added - - - - - - - - - - - - -
arXiv paperAsync getValue()
- -``` -``` - - - -``` -upon PROPOSEVALUE (h_p, round_p, v) { - proposal ← v - broadcast ⟨PROPOSAL, h_p, round_p, proposal, -1⟩ -} -``` - -
- - -#### Notes -Most of the state transitions represented in the previous tables consider message and -events referring to the node's current round `r`. -In the pseudocode this current round of a node is referred as `round_p`. - -There are however exceptions that have to be handled properly: -- the transition `L28` requires the node to have access to `PREVOTE` messages from a previous round `r' < r`. -- the transition `L49` requires the node to have access to `PRECOMMIT` messages from different round `r' != r`. -- the transition `L55` requires the node to have access to all messages from a future round `r' > r`. - -## References - -* ["The latest gossip on BFT consensus"](https://arxiv.org/pdf/1807.04938.pdf), by _Buchman, Kwon, Milosevic_. 2018. \ No newline at end of file diff --git a/specs/english/consensus/README.md b/specs/english/consensus/README.md new file mode 100644 index 000000000..e28586e72 --- /dev/null +++ b/specs/english/consensus/README.md @@ -0,0 +1,560 @@ +# Malachite Documentation + +Malachite is an implementation of the [Tendermint consensus algorithm][arxiv] in Rust. +It comes together with an executable specification in [Quint][quint-spec]. We use +model-based testing to make sure that the implementation corresponds to the +specification. + +Tendermint consensus algorithm works by a set of validator nodes exchanging messages over a +network, and the local consensus instances act on the incoming messages if +certain conditions are met (e.g., if a threshold number of specific messages is +received a state transition should happen). +The architecture of Malachite separates: + +- counting messages in a *vote keeper* ([Quint][quint-votekeeper]), +- creating consensus inputs in a *driver* ([Quint][quint-driver]), e.g., if a threshold is reached +- doing the state transition depending on the consensus input in the *state machine* ([Quint][quint-sm]) + +A detailed executable specification of these functionalities are given in Quint. +In this (English) document we discuss some underlying principles, namely, + +- [Message handling](#message-handling): How to treat incoming messages. Which messages to store, +and on what conditions to generate consensus inputs. + +- [Round state machine](#round-state-machine): How to change state depending on the +current state and a consensus input. + + +## Message Handling + +The consensus state-machine operates on complex Events that reflect the +reception of one or multiple Messages, combined with state elements and the +interaction with other modules. + +The Tendermint consensus algorithm defines three message types, each type +associated to a round step: + +- `PROPOSAL`: broadcast by the proposer of a round at the `propose` round step. + Carries the value `v` proposed for the current height of consensus. + Only proposed values can be decided. +- `PREVOTE`: broadcast by validators at the `prevote` round step. + Carries either the unique identifier `id(v)` of a proposed value `v`, + in the event that the proposed value was accepted, + or the special `nil` value, otherwise + (i.e., when the proposed value has not been received + or when it was received but not accepted). +- `PRECOMMIT`: broadcast by validators at the `precommit` round step. + Carries either the unique identifier `id(v)` of a proposed value `v`, + in the event that the proposed value was accepted by `2f + 1` validators, + or the special `nil` value, otherwise. + A value is decided when it receives `2f + 1` precommits. + +This section overviews how messages should be handled at different stages of +the protocol. + +### Proposals + +Messages `⟨PROPOSAL, h, r, v, vr⟩` are generically called proposals. +They are produced at the `propose` round step and are inputs for all round +steps. +General assumptions regarding proposal messages: + +- Proposal messages are produced, signed and broadcast by a validator, + referred the message's *sender*. +- The sender of a proposal message of round `(h, r)` must be the proposer of + round `(h, r)`. The proposer of a round is deterministically computed from + the round identifier `(h, r)` and the validator set of height `h`. +- Proposal messages are the only messages carrying a (full) proposed value `v`. + Knowing the proposed value `v` of a round `(h, r)` is a requirement for + voting for `v` and for deciding `v` in round `(h, r)`. +- Correct validators only broadcast a proposal message in a round if they are + the proposer of that round. + A correct proposer broadcasts a single proposal message, carrying a single + value `v`, per round. +- Byzantine validators may broadcast proposal messages in rounds where they + are not the round's proposer. + - Correct validators can easily identify this attack and ignore such + invalid proposal messages. +- A Byzantine validator that is the proposer of a round may broadcast multiple + proposal messages, carrying distinct proposed values, in that round. + This behaviour constitutes an equivocation attack. + - A correct validator could in theory only consider the first proposal + message received for a round, say it proposes `v`. + The problem of this approach is that `2f + 1` validators might accept, or + even decide, a different value `v' != v`. + By ignoring the equivocating proposal for `v'`, the validator will not be + able to vote for or decide `v'`, which in Tendermint consensus algorithm + may compromise liveness. + + **Note:** in contrast to algorithms from theoretical papers, a node running Tendermint consensus terminates$ + a consensus instance after it has decided; it will no longer react on messages from that instance or send + messages for that instance (if it is a validator). In contrast, in theoretical algorithms, even after deciding, validators keep on + participating and sending messages. In the theoretical setting these validators will help the validator that + has only considered to first proposal from a faulty proposer, to make progress. In Tendermint consensus, this + help is not there. Thus, there is above discussed liveness issue. + - Storing multiple proposal messages for the same round is, by itself, an + attack vector. Validators must thus restrict the number of proposal + messages stored in rounds where multiple proposals are produced. + +### Counting votes + +Messages `⟨PREVOTE, h, r, *⟩` and `⟨PRECOMMIT, h, r, *⟩` are generically called votes. +They refer to a round step `(h, r, s)` of consensus, where `s` is defined by +the vote type, either `prevote` or `precommit`. + +The processing of _individual_ vote messages doesn't produce events relevant for +the consensus state machine. +But when the number of unique vote messages referring to a given round step +`(h, r, s)` reaches a given _threshold_, relevant events are produced; +the produced event depends on the value carried by such votes. + +General assumptions regarding vote messages: + +- Vote messages are produced, signed and broadcast by a validator, + referred the message's *sender*. + - The sender of a vote message must be part of the current *validator set*. + To define whether a vote message from height `h` is valid, the validator + set for height `h` must be known. +- To each validator in the validator set of a height `h` is associated a *voting power*. + - Thresholds are computed from the voting power associated to the + sender of each vote message. +- A vote message carries either the unique identifier `id(v)` of a proposed + value `v`, or the special `nil` value. +- Correct validators broadcast at most one vote message per round step: + carrying either a `id(v)` or `nil`. +- Byzantine validators may broadcast multiple distinct vote messages for the same + round step: equivocation attack. Equivocating vote messages differ on the + value they carry: `nil`, `id(v)`, `id(v')` with `v' != v`. + - A correct validator could "in theory" only consider the first vote message + received from a sender per round step, say it carries `id(v)`. + The problem of this approach is that `2f + 1` validators might only + consider a different vote message from the same sender and round step, + carrying `id(v')` with `v' != v`. This may lead other validators to decide `v'`. + By ignoring the equivocating voting message carrying `id(v')`, the + validator might not be able to decide `v'`, which may compromise + liveness of the consensus algorithm. + + **Note**: the consequences on liveness are the same discussed in the note for Proposal messages. + - Storing multiple vote messages from the same sender and referring to the + same round step is, by itself, an attack vector. Validators must thus + restrict the number of votes stored per sender and round step. + +#### `f + 1` threshold + +This threshold represents that vote messages referring to a round step were +received from a enough number of unique senders, so that it is guaranteed that +_at least one_ of the senders is a correct validator. + +The rationale here is that the cumulative voting power of Byzantine validators +cannot exceed `f`, so that at least one of the considered vote messages must +have been produced by a correct validator. + +#### `2f + 1` threshold + +This threshold represents that vote messages referring to a round step were +received from a enough number of unique senders, so that it is guaranteed that +the voting power of senders that are correct validators exceeds the voting +power of senders that might be Byzantine validators. +In a simplified setup, where validators have the same voting power, the +`2f + 1` threshold guarantees that _the majority_ of the senders are correct +validators. + +The rationale here is that the cumulative voting power of Byzantine validators +cannot exceed `f`, so that the subset of considered vote messages that must +have been produced by correct validators have a cumulative voting power of at +least `f + 1`, which is strictly greater than `f`. + +### Different rounds + +Messages matching the current height and round of a validator produce most of +the relevant events for the consensus state machine. +Messages from different rounds, however, also trigger relevant events. + +This section assumes that a validator is at round `r` of height `h` of +consensus, or in short, at round `(h, r)`. + +#### Previous rounds + +The consensus state machine has events requiring messages from previous rounds +`(h, r')` with `r' < r`: + +- `PREVOTE` messages may be required to produce a Proof of Lock (POL or Polka) for a + value `v` needed for accepting a `PROPOSAL(h, r, v, vr)` message, with + `0 ≤ vr < r`, of the current round (L28). + - A Polka for `v` at round `vr` is a `2f + 1` threshold of `⟨PREVOTE, h, vr, id(v)⟩` messages. +- `PROPOSAL` messages from previous rounds can be required to decide a value + (L49), see more details below. +- `PRECOMMIT` messages can produce a `2f + 1` threshold of `⟨PRECOMMIT, h, r', id(v)⟩` + messages which, together with a `PROPOSAL(h, r', v, *)` message, + leads to the decision of `v` at round `r'` (L49). + +As a result, a validator needs to keep track of messages from previous +rounds to produce the enumerated events: + +1. `PROPOSAL` messages should be maintained when a validator moves to higher rounds, + as well as new `PROPOSAL` messages from previous rounds should be stored. + - Reason I: a `2f + 1` threshold of `⟨PRECOMMIT, h, r', id(v)⟩` messages + could still be obtained, and an existing proposal message for `v` in the + previous round `r' < r` enables the validator to decide `v`. + - Reason II: a `2f + 1` threshold of `⟨PRECOMMIT, h, r', id(v)⟩` messages + was already obtained, but the proposal message for `v` at round `r'` + is missing. Once received, the validator can decide `v`. +2. `PREVOTE` messages should be maintained when a validator moves to higher rounds, + as well as new `PREVOTE` messages from previous rounds should be stored. + - Reason I: a `PROPOSAL(h, r, v, vr)` with `0 ≤ vr < r` can be received in + the current round, requiring an existing `2f + 1` threshold of `⟨PREVOTE, h, vr, id(v)⟩` messages. + - Reason II: a `2f + 1` threshold of `⟨PREVOTE, h, vr, id(v)⟩` messages + can still be obtained and unblock the processing of `PROPOSAL(h, r, v, vr)` + received in the current round. + - Observe that `PREVOTE` messages for `nil` do not need to be maintained for previous rounds. +3. `PRECOMMIT` messages should be maintained when a validator moves to higher rounds, + as well as new `PRECOMMIT` messages from previous rounds should be stored. + - Reason I: a `2f + 1` threshold of `⟨PRECOMMIT, h, r', id(v)⟩` messages + can be obtained, and there is a proposal message for `v` in round + `r'`, leading the validator to decide `v`. + - Reason II: a `2f + 1` threshold of `⟨PRECOMMIT, h, r', id(v)⟩` messages + can be obtained, but there is no proposal message for `v` in round + `r'`. This enables Reason II of 1., i.e., receiving a late proposal. + - Observe that `PRECOMMIT` messages for `nil` do not need to be maintained for previous rounds. + +#### Future rounds + +The consensus state machine requires receiving and processing messages from +future rounds `(h, r')` with `r' > r` for enabling the _round skipping_ mechanism. +This mechanism is defined in the pseudocode as follows: + +``` +55: upon f + 1 ⟨∗, hp, round, ∗, ∗⟩ with round > roundp do +56: StartRound(round) +``` + +The definition is ambiguous and the event triggering round skipping can be +interpreted in two main ways: + +1. Messages of any type and round `r' > r` are received so that the + `f + 1` threshold is reached. +2. Messages of a given type and round `r' > r` are received so that the + `f + 1` threshold is reached. + +Since proposal messages for a round have a single sender, the round's proposer, +in both interpretations the vote messages are the ones that really count +towards the `f + 1` threshold. +The question then is whether we count the senders of `PREVOTE` and `PRECOMMIT` +messages separately (i.e., one set per vote type) or together. + +According to the vote keeper [spec in Quint][quint-votekeeper], the +first interpretation has been adopted. +Namely, the senders of both `PREVOTE` and `PRECOMMIT` messages of a round `r' > r` +are counted together towards the `f + 1` threshold. + +#### Attack vectors + +In addition to the attack vectors induced by equivocating validators, +for [proposal messages](#proposals) and [vote messages](#counting-votes), +the need of storing message referring to previous or future rounds introduces +new attack vectors. + +In the case of messages of [previous rounds](#previous-rounds), the attack +vectors are the same as for messages matching the current round, as the +validator is supposed in any case to store all messages of previous rounds. +A possible mitigation is the observation that vote messages for `nil` have no +use when they refer to previous rounds. + +In the case of messages of [future rounds](#future-rounds) `r' > r`, +in addition to tracking message senders to enable round skipping, +a validator _must_ store the (early) received messages so that they can be +processed and produce relevant events once the validator starts the future +round `r'`. +This constitutes an important attack vector, as Byzantine validators could +broadcast messages referring to an arbitrary number of future rounds. + +There is no trivial solution for preventing the attack derived from the need of +storing messages of future rounds. +However, the following approaches, individually or combined, can mitigate the +effects of this attack: + +1. Store messages only for a limited number future rounds, say future rounds + `r'` where `r < r' ≤ r_max`. + - For instance, CometBFT only tracks messages of a single future round, + i.e., `r_max = r + 1`. +2. Assume that the communication subsystem (p2p) is able to retrieve messages + from a future round `r' > r` once the validator reaches round `r'`. + Since validators keep track of messages of both the current and previous + rounds, they should be able to transmit those messages to their lagging peers. + +### Different heights + +Heights in Tendermint consensus algorithm are communication-closed. +This means that if a validator is at height `h`, messages from either `h' < h` +(past) or `h' > h` (future) heights have no effect on the operation of height `h`. + +However, due to asynchrony, different validators can be at different heights. +More specifically, assuming a lock-step operation for heights (i.e., a +validator only starts height `h + 1` once height `h` is decided), some +validators can be trying to decide a value for height `h` while others have +already transitioned to heights `h' > h`. + +An open question is whether the consensus protocol should be in charge of +handling lagging validators. +This is probably easier to be implement by a separate or auxiliary component, +which implements a syncing protocol. + +#### Past heights + +The consensus state machine is not affected by messages from past heights. +However, the reception of such messages from a peer indicates that the peer may +lagging behind in the protocol, and need to be caught up. + +To catchup a peer that is behind in the protocol (previous heights) it would be +enough to provide the peer with the `Proposal` for the decided value `v` and +a `2f + 1` threshold of `Precommit` messages of the decision round for `id(v)`. +These messages, forming a _decision certificate_, should be stored for a given +number of previous heights. + +#### Future heights + +The consensus state machine is not able to process message from future heights +in a proper way, as the validator set for for a future height may not be known +until the future height is started. +However, once the validator reaches the future height, messages belonging to +that height that were early received are **required** for proper operation. + +An additional complication when handling messages from future heights is that, +contrarily to what happens with messages of [future rounds](#future-rounds), +there is no mechanism that allows the validator to switch to the future height +when it receives a given set of messages from that height. +In fact, considering the lock-step operation of the consensus algorithm, a +node can only start height `h` once height `h - 1` is decided. +Moreover, messages of future heights `h' > h` do not enable, in any way, a +node to reach a decision in its current height `h`. + +#### Attack vectors + +In addition to the attack vectors induced by equivocating validators, +for [proposal messages](#proposals) and [vote messages](#counting-votes), +the need of storing message referring to previous or future heights introduces +new attack vectors. + +If messages from [previous heights](#previous-heights) from a peer trigger a different node to +execute procedures for trying to catch up that peer, a Byzantine peer may +indefinitely claim to be stuck in a previous height, or that it is behind by +several heights. +In both cases the node will consume resources to catchup a peer that possibly +does not need to be caught up. + +The fact that a validator needs to store messages from [future heights](#future-heights), +so that they can be processed and produce relevant events once the validator +eventually starts the corresponding heights, +constitutes a very important attack vector, as Byzantine validators could +broadcast messages referring to an arbitrary number of future heights. + +There is no trivial solution for preventing the attack derived from the need of +storing messages of future heights. +However, the following approaches, individually or combined, can mitigate the +effects of this attack: + +1. Buffer messages for a limited number of future heights, say heights + `h'` where `h < h' ≤ h_max`. +2. Assume that the communication subsystem (p2p) is able to retrieve messages + from future heights `h' > h` once the validator reaches height `h'`. + Notice that this option implies that validators keep a minimal set of + consensus messages from [previous heights](#past-heights) so that to enable +peers lagging behind to decide a past height. + + +## Round state machine + +The consensus state-machine operates on complex `Event`s that reflect the +reception of one or multiple `Message`s, combined with state elements and the +interaction with other modules. + +The state machine represents the operation of consensus at a single `Height(h)` and `Round(r)`. +The diagram below offers a visual representation of the state machine. It shows the input events, using green for simple inputs (e.g. timeouts, proposal) +and red for the complex events (e.g. `ProposalAndPolkaCurrent` is sent to the state machine when a valid proposal and a polka of prevotes have been received). +The actions are shown in italics (blue) and the output messages are shown in blue. + +![Consensus SM Diagram](assets/sm_diagram.jpeg) + +The set of states can be summarized as: + +- `Unstarted` + - Initial state + - Can be used to store messages early received for this round + - In the algorithm when `roundp < r`, where `roundp` is the node's current round +- InProgress (`Propose`, `Prevote`, `Precommit`) + - Actual consensus single-round execution + - In the algorithm when `roundp == r` +- `Commit` + - Final state for a successful round + +### Exit transitions + +The table below summarizes the major state transitions in the `Round(r)` state machine. +The transactions from state `InProgress` consider that node can be at any of +the `Propose`, `Prevote`, `Precommit` states. +The `Ref` column refers to the line of the pseudocode where the events can be found. + +| From | To | Ev Name | Event Details | Action | Ref | +| ---------- |------------|------------------------------|-------------------------------------------------------------------|-----------------------------------| --- | +| InProgress | InProgress | PrecommitAny | `2f + 1 ⟨PRECOMMIT, h, r, *⟩`
for the first time | schedule `TimeoutPrecommit(h, r)` | L47 | +| InProgress | Unstarted | TimeoutPrecommit | `TimeoutPrecommit(h, r)` | `next_round(r+1)` | L65 | +| InProgress | Unstarted | SkipRound(r') | `f + 1 ⟨*, h, r', *, *⟩` with `r' > r` | `next_round(r')` | L55 | +| InProgress | Commit | ProposalAndPrecommitValue(v) | `⟨PROPOSAL, h, r', v, *⟩`
`2f + 1 ⟨PRECOMMIT, h, r', id(v)⟩` | `commit(v)` | L49 | + +### InProgress round + +The table below summarizes the state transitions within the `InProgress` state +of the `Round(r)` state machine. +The following state transitions represent the core of the consensus algorithm. +The `Ref` column refers to the line of the pseudocode where the events can be found. + +| From | To | Event | Details | Actions and Return | Ref | +|-----------|-----------|----------------------------------------|----------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------|-----| +| Unstarted | Propose | NewRound(proposer) | `StartRound` with `proposer(h, r) = p` | **async `getValue()` and schedule `TimeoutPropose(h, r)`** | L19 | +| Unstarted | Propose | NewRound(non-proposer) | `StartRound` with `proposer(h, r) != p` (optional restriction) | schedule `TimeoutPropose(h, r)` | L21 | +| **Propose** | **Propose** | **ProposeValue(v)** | `getValue()` returned | broadcast `⟨PROPOSAL, h, r, v, validRound⟩` | L19 | +| Propose | Prevote | Proposal(v, -1) | `⟨PROPOSAL, h, r, v, −1⟩` | broadcast `⟨PREVOTE, h, r, {id(v), nil}⟩` | L23 | +| Propose | Prevote | **InvalidProposal**(v, -1) | `⟨PROPOSAL, h, r, v, −1⟩` | broadcast `⟨PREVOTE, h, r, nil⟩` | L32 | +| Propose | Prevote | ProposalAndPolkaPrevious(v, vr) | `⟨PROPOSAL, h, r, v, vr⟩`
`2f + 1 ⟨PREVOTE, h, vr, id(v)⟩` with `vr < r` | broadcast `⟨PREVOTE, h, r, {id(v), nil}⟩` | L30 | +| Propose | Prevote | **InvalidProposalAndPolkaPrevious**(v, vr) | `⟨PROPOSAL, h, r, v, vr⟩`
`2f + 1 ⟨PREVOTE, h, vr, id(v)⟩` with `vr < r` | broadcast `⟨PREVOTE, h, r, nil⟩` | L32 | +| Propose | Prevote | TimeoutPropose | `TimeoutPropose(h, r)` | broadcast `⟨PREVOTE, h, r, nil⟩` | L57 | +| Prevote | Prevote | PolkaAny | `2f + 1 ⟨PREVOTE, h, r, *⟩`
for the first time | schedule `TimeoutPrevote(h, r)⟩` | L34 | +| Prevote | Precommit | ProposalAndPolkaCurrent(v) | `⟨PROPOSAL, h, r, v, ∗⟩`
`2f + 1 ⟨PREVOTE, h, r, id(v)⟩`
for the first time | update `lockedValue, lockedRound, validValue, validRound`,
broadcast `⟨PRECOMMIT, h, r, id(v)⟩` | L36 | +| Prevote | Precommit | PolkaNil | `2f + 1 ⟨PREVOTE, h, r, nil⟩` | broadcast `⟨PRECOMMIT, h, r, nil⟩` | L44 | +| Prevote | Precommit | TimeoutPrevote | `TimeoutPrevote(h, r)` | broadcast `⟨PRECOMMIT, h, r, nil⟩` | L61 | +| Precommit | Precommit | PolkaValue(v) | `⟨PROPOSAL, h, r, v, ∗⟩`
`2f + 1 ⟨PREVOTE, h, r, id(v)⟩`
for the first time | update `validValue, validRound` | L36 | + +The ordinary operation of a round of consensus consists on the sequence of +round steps `Propose`, `Prevote`, and `Precommit`, represented in the table. +The conditions for concluding a round of consensus, therefore for leaving the +`InProgress` state, are presented in the previous subsection. + +#### Validity Checks + +The pseudocode of the algorithm includes validity checks for the messages. These checks have been moved out of the state machine and are now performed by the `driver` module. +For this reason: +- `L22` is covered by `Proposal(v, -1) and `InvalidProposal(v, -1)` +- `L28` is covered by `ProposalAndPolkaPrevious(v, vr)` and `InvalidProposalAndPolkaPrevious(v, vr)` +- `L36` and `L49` are only called with valid proposal + +TODO - show the full algorithm with all the changes + +#### Asynchronous getValue() and ProposeValue(v) + +The original algorithm is modified to allow for asynchronous `getValue()`. The details are described below. + + + + + + + + + + + + +
arXiv paperAsync getValue()
+ +``` +function StartRound(round) { + round_p ← round + step_p ← propose + if proposer(h_p, round_p) = p { + if validValue_p != nil { + proposal ← validValue_p + + + + } else { + proposal ← getValue() + + } + + + broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ + } else { + schedule OnTimeoutPropose(h_p,round_p) to + be executed after timeoutPropose(round_p) + } +} +``` + + + +``` +function StartRound(round) { + round_p ← round + step_p ← propose + if proposer(h_p, round_p) = p { + if validValue_p != nil { + proposal ← validValue_p + + broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ + + } else { + getValue() // async + schedule OnTimeoutPropose(h_p,round_p) to + be executed after timeoutPropose(round_p) + } + + + } else { + schedule OnTimeoutPropose(h_p,round_p) to + be executed after timeoutPropose(round_p) + } +} +``` + +
+ +- New Rule added + + + + + + + + + + + + +
arXiv paperAsync getValue()
+ +``` +``` + + + +``` +upon PROPOSEVALUE (h_p, round_p, v) { + proposal ← v + broadcast ⟨PROPOSAL, h_p, round_p, proposal, -1⟩ +} +``` + +
+ + +### Notes + +Most of the state transitions represented in the previous tables consider message and +events referring to the node's current round `r`. +In the pseudocode this current round of a node is referred as `round_p`. + +There are however exceptions that have to be handled properly: +- the transition `L28` requires the node to have access to `PREVOTE` messages from a previous round `r' < r`. +- the transition `L49` requires the node to have access to `PRECOMMIT` messages from different round `r' != r`. +- the transition `L55` requires the node to have access to all messages from a future round `r' > r`. + +## References + +* ["The latest gossip on BFT consensus"][arxiv], by _Buchman, Kwon, Milosevic_. 2018. + +[arxiv]: https://arxiv.org/pdf/1807.04938.pdf +[quint-spec]: ../quint/README.md +[quint-votekeeper]: ../quint/specs/votekeeper.qnt +[quint-driver]: ../quint/specs/driver.qnt +[quint-sm]: ../quint/specs/consensus.qnt diff --git a/specs/english/forced-updates/README.md b/specs/english/forced-updates/README.md new file mode 100644 index 000000000..ca90577fa --- /dev/null +++ b/specs/english/forced-updates/README.md @@ -0,0 +1,167 @@ +# Starknet Forced Staking Updates Specification + +We consider a composition of three components +- L1. a smart contract on Ethereum +- L2. distributed system of full nodes and validators running a BFT consensus engine +- PR. nodes running prover software (potentially on the same machines as the full nodes/validators). That produce proofs to be sent to L1 (proofs that are stored on L2 are handled somewhere else, TODO: add pointer) + +#### Outline of the protocol +The rough idea of the staking protocol is a follows: +- The stake is managed on the L1 staking registry. When a new staking event, called registration, happens, a message is sent from L1 to L2 +- This message: + - results in a deferred update of the L2 validator set based on Starknet Validator Epochs SVE. If a registration is received by L2 in epoch _E_, it will affect the validator set of epoch _E+2_. + - must be confirmed within a timeout. The timeout is defined with respect to Ethereum Validator Epochs EVE on L1. Think of them in the order of a day. If a registration times out, a reset must happen (details follow below). + + +## Overview + +L2 uses L1 for security. This has two aspects: +1. proofs of L2 block production are submitted to and verified on L1. Once an L1 block with a valid proof becomes finalized, the L2 state becomes finalized. Thus, if clients wait for finalization of L1, they get the full security of L1 +2. before L2 blocks are finalized, they are secured by a proof-of-stake mechanism for L2. By bonding stake on L1, validators have the incentive to follow the protocol. + - **the goal of this protocol is to enforce the adoption of a new validator set, produced by L1** + - for this to work, every change in the bonded stake on L1, so-called registrations, need to be reliably transmitted to L2 + - this is enforced by a timeout mechanism based on EVE epoch (say a day) and Point 1.: intuitively, L1 will only accept proofs for a L2 block B, if all registration from two days ago have been included in the L2 blocks up to B; if a timeout has expired, L1 enforces an L2 reset, by requiring a proof for a specific block that contains all registrations from two days ago, and a new forkID. + +Notice, however, that there is no explicit signalization from L1 to start the reset protocol. Instead, validators that remain in the reset validator set and nodes that become validators in the reset validator set are expected to initiate the Fork Protocol, once they realize that it is needed. It is assumed that nodes joining the validator set of a fork have access to all state they need to produce and validate blocks (i.e., make progress) in that fork. + +For all effects, it can be useful to consider the first block of a new fork as it was a genesis state or block. + +If L2 made progress until height 1000, but the last accepted proof on L1 was for height 900, on L2 this effectively means that correct validators need to roll-back to the state of 900 for the reset, and dismiss the previously produced blocks. + +**Requirement.** In addition to ensure safety (every proof accepted on L1 contains all sufficiently old registrations), the protocol should ensure progress in favorable situations, that is: If at the end of an EVE epoch the validator set defined by L1 registrations contains a quorum of honest validators that are alive for sufficiently long, new blocks should be added to L2, and if there are alive provers, proofs should be added to L1. + + +## Central aspects of the composition +The validity property of consensus (which determines whether a specific block can be decided on in L2), is defined by L1 and PR: **A block _b_ produced by L2 is valid iff L1 can successfully verify _PR(b)_** +- _PR(b)_ actually stands for a combined proof of multiple L2 blocks. In practice, not every block is proven individually to L1 +- validity is dependent on time; in particular the time on Ethereum. A block that is valid now, can become invalid if it takes too long to get a proof on L1. (This is due to stale registrations introduced below) + +### Proofs +L1 accepts proofs for the block generation function. This function, roughly speaking, has two branches: +1. normal block production (no error condition) +2. production of an initial block of a fork after reset + +#### Normal block production: +_PR(b)_ is a proof that _b_ was produced properly, including: +- the state transition encoded in _b_ is consistent with the transactions in the block, and the complete history of transaction in the prefix of the blockchain (iteratively, that is, one can apply a proof of a block to the proof of the prefix) +- other meta data consistency is met (the staged and unstaged validator set changes are consistent with the received registrations; same forkID as previous block; lastblockID is hash of last block, etc.) +- if the block contains transactions, it must also contain a proof (TODO: more details to come out of proof specification work that happens in parallel) +- a quorum of validators has signed the block. "Quorum" is defined by the history of the blockchain and the epoched validator set changes (we can write this more precisely) + +**Observation** assumption/design decision: full nodes (validators) can check this kind of validity by observing only L2 (this doesn't mean that this is the validity that L1 is going to use in case there is a fork). + +#### Fork block production: +Similar to above but: +- different meta data constraints, e.g. the new forkID comes from the stale registrations of L1 +- the new validator set is defined by data from L1 and L2 + - the last block of L2 proved to L1 (validator set, staged and unstaged updates; TODO: clarify with Starkware) + - stale registrations from L1; + - they must appear as transactions in the L2 block (so that L1 can verify they have been handled; TODO: verify with Starknet), + - in contrast to the normal flow, they must be applied instantaneously (to the metadata, that is, the validator set) + +**Observation** assumption/design decision: full nodes (validators) need to observe L1 (stale registrations, last proven block) and L2 for this. + + +### Registrations +The "required validators" is information that originates from L1, via so called registrations, and is enforced by L1 +- L1 uses L1->L2 messaging (with acknowledgements) to make sure that L2 is aware of all registrations +- if acknowledgements time out (in terms of EVE epochs), a reset happens (L2 validator nodes observe that and take action) + - a reset means, that L1 stops accepting "normal block production proofs" and requires specific "fork block production proofs" + - as these specific proofs **enforce** the first block to contain timed-out registrations and a new validator set (and corresponding signatures), and a new forkID, **validity enforces a reconfiguration** +- intuitively, L1 observes (via results that come with proofs) whether all its registrations are mirrored on L2. Then the existence of a proof of block production implies that the correct validator set as defined by the registration is used (and there are enough signatures) + + +### L1->L2 messaging +L1->L2 messaging is done by an oracle flow (not the IBC way of cryptographic proofs): the proposer sees a message to be sent on L1. When it can be sure that the other validators also have seen the message it puts it into the proposal, and the validators vote on it. This means, for validating a proposal, a validator needs to closely follow what happens on L1. + +## Formalizing the protocol in Quint + +We have formalized the reset protocol in Quint. To do so, we abstracted away many details not relevant to the understanding of the protocol. The specification includes: + +- protocol functionality: how data inside blocks is computed and validated +- state machine consisting of L1, L2, and a set collecting registrations +- invariants (that have been preliminarily tested) and temporal formulas (that are just written but have not been investigated further) + +### Protocol functionality + +This contains mainly the following functions (and their auxiliary functions): +- `pure def newL1Block (prev: L1Block, regs: Set[Registration], proof: L2Proof, delay: Time) : L1Block` + - this returns a new L1 block, based on the previous block, newly added registrations, potentially a submitted proof for several L2 blocks, and a delay parameter the defines the time difference between the new block and the old one, to model progress in time + - this function uses the crucial function `proofOK` to check whether the submitted proof can be verified. This captures central functionality for the rest protocol, namely whether the proof + - is for the right heights and forkID, and + - has all required unfulfilled updates. +- `pure def newL2Block (chain: List[L2Block], regs: Set[Registration]) : L2Block` + - this returns a new L2 block during normal operation, based on the previous block and newly added registrations (that should be thought of having received via L1->L2 messaging) + - it contains a branch with the following cases: + - a new block within an SVE epoch or + - a new block for a new SVE epoch +- `pure def forkBlock (prev: L2Block, regs: Set[Registration], h: Height, fID: ForkID) : L2Block` + - this returns a new L2 block in the case of a reset. In addition to the "normal" parameters, it needs the last provenHeight and the new forkID which is information that the validators need to obtain from data on L1 + +- `pure def makeProof (l2: List[L2Block], to_height: Height, l1: List[L1Block]) : L2Proof` + - This returns our abstraction of a proof of multiple L2 blocks. `L2Proof` is a sum-type to allow invalid and absent proofs + - The function needs + - data from L2 to compute the result of confirmed registration + - data from L1 namely the provenHeight + +### State Machine + +The state machine contains the following variables: +``` +var L1: List[L1Block] +var L2: List[L2Block] +var envRegs: Set[Registration] +``` + +In addition to several parameterized actions that we can use to control the creation of specific scenarios, we have the following actions to generate random traces: + +- `addRegistration` +- `addL1Block` +- `addL2Block` +- `reset` + +#### addRegistration +The addRegistration action creates a new registration with random content that is added to the state variable envRegs. This action represents the submission of registration from an external actor. The registration is not yet added to a L1 block. + +#### addL1Block +The addL1Block action appends a new L1Block to the L1 blockchain (the state variable L1). The new L1 block includes all submitted registrations stored in the state variable envRegs, added to the addL1Block fields newRegistrations and unfulfilled_updates. The unfulfilled_updates field contains the submitted but not yet confirmed (i.e., pending) registrations. + +#### addL2Block +The addL2Block action appends a new L2Block to the L2 blockchain (the state variable L2). The new L2 block includes a random subset regs of the unfulfilled_updates field of the latest L1 block. Note that regs can be empty or its registrations may not follow the registration total order. The action uses the function `newL2Block` to compute the new block + +#### reset +The reset action produces forks in the L2 blockchain when L2 fails to prove the inclusion in L2 blocks of the registrations produced by L1. There is a deadline, given in terms of L1 epochs, for each registration produced by L1 to be committed by L2. When the deadline for a registration is reached and the registration is still pending, i.e., it was not yet confirmed by L2, we say that the registration is stale. When there is a stale registration in L1, a fork should be produced in L2 as a way to enforce that all stale registrations are reflected in the validator set adopted by L2. + +The reset action checks whether there are stale registrations in L1 by considering the last block appended to L1, and checking if there is any registration in pendingRegistrations whose submission epoch is older than two L1 epochs, considering as the reference, current epoch, the epoch of the last block appended to L1. If this is a case a fork is produced. + +To produce a fork, the L2 blockchain is rolled-back to the latest (highest) L2's provenHeight. +All L2 blocks with height higher than the latest L2's provenHeight are thus dropped. (In the Quint specification we currently store dropped blocks in the state variable `prevDeletedL2blocks` to inspect the reset scenarios completely) + +Once L2 is rolled-back to latest L2's provenHeight, say h, a new block is appended to L2 with height h+1. This is a fork block produced by the forkBlock function. + +### Invariants and temporal formulas + +For details we refer to the Quint file, and the upcoming analysis documentation (Points (c) and (d) in the SoW. TODO). + +## Issues + +### Transfer registrations instead of valsets + +QUESTION: As there is epoched staking, I wonder why registrations are sent one-by-one. In principle they could be sent as a batch at the end of an EVE epoch. + +- This will lead to slightly different behavior on L2, as the Starknet epochs are not synchronized with EVE +- this would potentially simplify ordering of messages in L1->L2? +- not sure whether number of L1->L2 messages is a concern. I think in Interchain staking they are not happy with so many transfers (we need to confirm with the hub team) -- but I think Starknet will do batches? +- as mentioned on Slack L1->L2 messaging from the past + +### Lightclients + +L2 Light clients are a concern. However, one needs to accept that they have reduced security compared to full nodes. In particular, we need to figure out whether and how a light client should figure out that there is a reset, and what to do in this case. + +If height _f_ is a fork block, then checking the "validity" based on block _f-1_ requires a different function -> implies complexity for light clients that read L2; CONFIRM: are L2 light clients a concern? (i.e., validate state from L2) + +### Re-using some proofs on L2 + +In general these proofs are handled somewhere else. But this point came up in discussions: + +- Follow-up: If there is a new fork, some of the proofs that have been done for the old fork are still usable (the proofs always point to the past). Are we thinking about storing and re-proposing them? diff --git a/specs/english/proofs_scheduling.md b/specs/english/proofs_scheduling.md new file mode 100644 index 000000000..8fca450a7 --- /dev/null +++ b/specs/english/proofs_scheduling.md @@ -0,0 +1,320 @@ +# Proofs Scheduling + +The Starket architecture includes the figure of a [prover][starkprover], a node +that produces **proofs** for blocks committed to the blockchain, in order to attest +the correct processing of the transactions included in that block. + + +## Overview + +Since **producing proofs is slow**, we should expect the proof for a block to +take several blocks to be produced. +So once a block is committed at height `H` of the blockchain, a prover is +expected to take `L` time to produce a proof of block `H`. +Meanwhile, several blocks may have been committed to the blockchain, to that +the proof of block `H` will only be available at the time when a block is being +produced and proposed at a height `H' > H`. + +Since **production proofs is expensive**, we should avoid having multiple +provers spending resources to proof the same block. +The need for a **scheduling protocol** derives from this requirement. +Of course, in bad scenarios, we would need multiple provers for a single block, +but this situation should be avoided whenever possible. + +**Proofs are included in blocks** and are committed to the blockchain. +If fact, the content of block is only _final_ when: +(i) it is committed to the blockchain, +(ii) another block including its proof is also committed to the blockchain, +and (iii) the proof of the original block is sent to and validate by L1, the +Ethereum blockchain. + +Ideally, each proposed block should include a proof of a single, previously +committed block. +However, we cannot guarantee that a proof of a previously committed block is +available whenever a new block is proposed. As a result, some blocks may not +include any proof, and some other blocks will need to include proofs of +multiple, previously committed blocks. +Or, more precisely, a proof attesting the proper execution of multiple blocks. + +## Strands + +The proposed solution is to adopt a **static scheduling** protocol. +The blockchain is virtually split into a number of **strands**, +so that proofs of blocks belonging to a strand are included in blocks belonging +to the same strand. + +Using `K` to denote the number of strands in the blockchain, +the mapping of blocks to strands is as follows: + +- A block at height `H` of the blockchain belongs to the strand: `strand(H) = H mod K`. + +The constant `K` should be defined considering a conservative upper bound for +the latency `L` to produce a proof for a block and expected block latency +(i.e., the expected interval between committing successive blocks). +The goal is to ensure, with high probability, that no more than `K` blocks are +produced and committed in `L` time units, +so that the proof of a block committed at height `H` is available when height +`H' = H + K` is started. + +### Scheduling + +The static strand-based scheduling is represented as follows. + +Lets `proof(H)` be the proof of the block committed at height `H`, then: + +- `proof(H)` is included in a block committed at height `H' = H + i * K`, with `i > 0`. + +This is a **safety** property, stating that proofs of blocks and the proven +blocks are committed in the same strand. +In fact, since `strand(H) == strand(H + i * K)`, for any integer `i`, proofs +and blocks are in the same strand. + +In the ideal, best-case scenario we have `i == 1`, meaning that the proof of the +block committed at height `H` is included in block `H' = H + K`. +If, for any reason, `proof(H)` is not available to the proposer of block `H'` +when it produces the block to propose in height `H'`, then the +inclusion of `proof(H)` is shifted to the next block in the same strand +`strand(H)`, which would be `H" = H' + K = H + 2 * K`. +This undesired scenario can be observed multiple times, resulting in another +shift by `K` on the block height where `proof(H)` is included. + +We want to limit the number of blocks in the strand `strand(H)` that do not include the proof of block `H`. +So we define a constant `P` and state the following **liveness** property: + +- `proof(H)` must be included in a block committed up to height `H* = H + (P + 1) * K`. + +So, if `proof(H)` is not included in blocks committed at heights `H' = H + i * K`, +with `1 <= i <= P`, then height `H*` cannot be concluded until the proposed +block that ends up being committed includes `proof(H)`. + +## Context + +Before detailing the proofs scheduling protocol implementation, we introduce +some minimal context. + +### Consensus + +The block committed to the height `H` of the blockchain is the value decided in +the instance `H` of the consensus protocol. +An instance of consensus consists of one or multiple rounds `R`, always +starting from round `R = 0`. +We expect most heights to be decided in the first round, so the scheduling +protocol focuses on this scenario. + +The instance `H` of the consensus protocol is run by a set of validators +`valset(H)`, which is known by all nodes. +The same validator set is adopted in all rounds of a height, but the validator +set may change over heights. +Nodes must know `valset(H)` before starting their participation in the +instance `H` of the consensus protocol. + +There is a deterministic function `proposer(H,R)` that defines from `valset(H)` +the validator that should propose a block in the round `R` of the instance `H` +of the consensus protocol. +We define, for the sake of the scheduling protocol, the **primary proposer** of +height `H` as the proposer of its first round, i.e., `proposer(H,0)`. + +### Blocks + +Blocks proposed in a round of the consensus protocol and eventually committed +to the blockchain are formed by: + +- A `header` field, containing consensus and blockchain related information +- A `proof` field, possibly empty, containing a proof of a set of previous blocks +- A `payload` field, possibly empty, consisting of a set transactions submitted by users + +For the sake of the scheduling protocol, we distinguish between two kind of blocks: + +- **Full blocks** carry transactions, i.e., have a non-empty `payload`. + The protocol requires full blocks to include a non-empty `proof` field. + Full blocks are the typical and relevant blocks in the blockchain. +- **Empty blocks** do not carry transactions, i.e., have an empty `payload`. + The protocol may force the production of empty blocks, which are undesired, + when their proposers do not have a proof to include in the block. + + +## Protocol + +The proofs scheduling protocol specifies the behaviour of the **proposers** of +rounds of the consensus protocol. + +### Overview + +A proposer is expected to include in its proposed block at height `H` a +`proof` for **all unproven blocks** committed to the same strand as height `H`. +A block is unproven when its proof was not yet committed to the blockchain. + +If a proposer of height `H` **has received**, from the designated provers, a +`proof` for all unproven blocks belonging to `strand(H)`, then it is allowed to +produce and propose a **full block**, i.e., a block containing transactions. + +But if the proposer of height `H` **has not received**, from the designated provers, +a `proof` for all unproven blocks belonging to `strand(H)`, then it is forced +to propose an **empty block**, i.e., a block without transactions, and with an +empty `proof` field. +Notice that the proposer may have received an _incomplete_ proof, proving only +part of the unproven blocks in the current strand, but only _full_ proofs can +be included in proposed blocks. + +The reason for forcing the production of **empty blocks** when a proof for +**all unproven blocks** is **not available** is to discourage the production of +blocks with an empty `proof` field. +There are rewards for proposers that produce blocks that end-up committed, +associated to the transactions included in the block. +Producing an empty block is therefore not interesting for a proposer, that +should do its best to include _full_ proofs in the proposed blocks. + +There is a second reason for enforcing this behavior, which is the fact that +producing a **proof for an empty block** should be **faster** and less +expensive than producing a proof for a full block. +Thus, if a block has an empty `proof` field, therefore does not contributes to +the proving mechanism, it should at least be easier to prove. + +### Formalization + +First, lets define what it is meant by unproven blocks in a strand `s` at a +given state of the blockchain: + +- `unproven(s)` is a set of heights `H` with `strand(H) == s` and whose + `proof(H)` was not yet committed. + +Then, lets extend the definition of `proof(H)` to consider proofs for multiple +blocks, from a set `S` of heights: + +- `proofs(S)` is a proof that includes a `proof(H)` for every height `H` in the set + `S` of heights. + +Finally, lets define the expected proof to be included in the block at +height `H`: + + expected_proof(H) = proofs(unproven(strand(H))) + +So, lets `s = strand(H)`, the proof included in block `H` should prove all blocks +in `proofs(unproven(s))`. + +From the roles presented to the operation of a proposer of height `H`, we can +define the following **invariant**: + + block(H).payload != Ø => block(H).proof == expected_proof(H) + +Namely, if the block carries a payload (transactions), then it must include the +full expected proof for its height. + +### Properties + +The first property shows that, except for a corner scenario, there are always +proofs to be included in a new block: + +- For all heights `H >= K`, there are always blocks to proof, i.e., `expected_proof(H) != Ø`. + +This happens because the previous height in the same strand `strand(H)`, height +`H - K >= 0`, has not yet been proven, as there is not height between `H - K` +and `H` belonging to the same strand as height `H`. +As a corollary: + +- For every strand `s`, either it has no blocks (i.e., blockchain height `< K`) + or `unproven(s) != Ø`. + +Considering now strands instead of heights, for every strand `s` we have: + +1. The first (lowest) height `Hmin` in `unproven(s)` is of a block that + contains an non-empty `proof` field. +2. Every other height `H' > Hmin` in `unproven(s)` is of an **empty block** + with an empty `proof` field. +3. There are no gaps in `unproven(s)`, namely for every integer `i` with + `0 <= i < |unproven(s)|`, the height `H(i) = Hmin + i * K` is + present in `unproven(s)` and, of course, `strand(H(i)) == s`. +4. There is at most `P` heights of **empty blocks** in `unproven(s)`, + by the [strand scheduling](#scheduling) definition. + +These properties can be proved by induction on `unproven(s)` and the +strand-based static scheduling protocol. + +The intuition is that when producing a new block on a strand `s`, say block +`H`, we have two possibilities: +(i) the proposer of block `H` includes in the block all unproven blocks on +strand `s`, therefore resetting `unproven(s)` to empty, +or (ii) produces an empty block with no proofs, thus leaving `unproven(s)` +unchanged. +Since new block `H` is not yet proven, as just committed, it is appended to +`unproven(s)`. + +## Implementation + +This section presents an implementation for the previously described protocol. +More specifically, it defines the behaviour of **provers** and how they are +supposed to produce proofs for committed blocks. + +When block `H` is committed to the blockchain, the **prover** of the next height in +strand `strand(H)` is expected to start generating a proof of block `H`. +Notice that the produced proof should be sent to `proposer(H + K, 0)`. + +To generate the proof of block `H`, the prover needs the proof of the previous +block in strand `strand(H)`, whose height is `H - K`. +In the favorable scenario, `proof(H - K)` is included in block `H`, so the +production of `proof(H)` can start immediately. +Otherwise, the prover needs to compute `unproven(strand(H))` and follow the steps: + +1. Go back to the block with the lowest height `Hmin` in + `unproven(strand(H))`, which must include a non-empty `proof` field (by property 1.), + and use `block(Hmin).proof` and `block(Hmin)` to produce `proof(Hmin)`; + - Notice that in the favorable scenario `H == Hmin`, and the process is done here. +2. Go to the block `Hmin + K` and use `proof(Hmin)` and `block(Hmin + K)` to + produce `proof(Hmin + K)`. This operation should be faster because + `block(Hmin + K)` must be empty (by property 2.). +3. If `Hmin + K == H`, the process is done. Otherwise, set `Hmin = Hmin + K` + and repeat step 2. + +At the end of the process, the prover has produced a single proof attesting the +proper execution of **one full block**, at height `Hmin`, +and possibly, in the case of `|unproven(strand(H))| > 1`, also of the execution of +**some empty blocks**. +The produced proof is targeted to be included in the `proof` field of the block +proposed at height `H + K`. + +### Proposers and Provers + +The proofs produced by provers are expected to be included in committed blocks. +A committed block must have been produced by the **proposer** of a round of +consensus, possibly including a `proof` produced by a **prover**. + +There is therefore a relation between provers and proposers. +The simpler way to define this relation is to assume that prover and proposer +are roles that are implemented by the same nodes. +So, once the block at height `H` is committed, the primary proposer of height +`H + K` starts producing `expected_proof(H + K)`. +If it is produced by the time height `H + K` starts, it is included in +the `proof` field of the block produced for that height. + +We may consider, however, more complex setups where provers and proposers are +distinct nodes. +In this case, it has to be defined how the prover assigned to produce +`expected_proof(H + K)` interacts with the proposers of height `H + K`, +in particular with its primary proposer, the node defined by `proposer(H + K, 0)`. + +The relation between provers and proposers is particularly relevant in the +scenario where multiple empty blocks, with empty `proof` fields, are +committed to a strand `s`. +Recall that there is a limit for the number of such blocks in a strand, at most +`P` can be produced. +So, if `|unproven(s)| > P`, then the proposer of **any round** of a height `H`, +where `strand(H) == s`, can only produce and propose a block if it includes +`expected_proof(H)`. + +## Issues to Address + +- [#245](https://github.com/informalsystems/malachite/issues/245): refers to the last + scenario described in this [section](#proposers-and-provers), when a block + can only be committed if it includes the expected proof. + If the proof is not available, and has to start being computed during the + height, we should expect a `L` latency for the height, which can be in the + order of minutes. +- [#246](https://github.com/informalsystems/malachite/issues/246): the validator set + may change at the end of each epoch. The validator set of the next epoch is + know, although not yet installed. The validator set is the input used to + compute the proposer for each round and height, therefore needs to be known a + priori. + We must discuss the relation between the constants `E`, the epoch length, and `K`, the + number of strands. + +[starkprover]: https://docs.starknet.io/architecture-and-concepts/network-architecture/starknet-architecture-overview/#provers diff --git a/specs/quint/specs/driver.qnt b/specs/quint/specs/driver.qnt index 2b1b3f4d0..f6dc85c4d 100644 --- a/specs/quint/specs/driver.qnt +++ b/specs/quint/specs/driver.qnt @@ -193,7 +193,7 @@ module driver { if (receivedCommit) // we have a commit that matches the proposal. We don't need to compare against // es.cs.round - callConsensus(newES, newES.bk, ProposalAndCommitAndValidCInput(Val(prop.proposal))) + callConsensus(newES, newES.bk, ProposalAndCommitAndValidCInput(Val(prop.proposal))) // FIXME: shouldn't be an ID here? else if (es.cs.round != prop.round or es.cs.height != prop.height) // the proposal is from the right proposer and valid, but not for this round // keep the proposal, do nothing else @@ -215,7 +215,7 @@ module driver { (newES, NoConsensusOutput) else if (newES.cs.step == PrevoteStep or newES.cs.step == PrecommitStep) val receivedCommitCurrentVal = checkThreshold(newES.bk, newES.cs.round, Precommit, th) - if (receivedCommitCurrentVal) + if (receivedCommitCurrentVal) // FIXME: is this even reachable? receivedCommit is true here, right? // here we need to call both, Commit and Polka. // We do commit and append polka to pending val pending = (ProposalAndPolkaAndValidCInput(propId), newES.cs.height, newES.cs.round) diff --git a/specs/quint/specs/reset/resetSystem.qnt b/specs/quint/specs/reset/resetSystem.qnt index 871901543..de9c6f52c 100644 --- a/specs/quint/specs/reset/resetSystem.qnt +++ b/specs/quint/specs/reset/resetSystem.qnt @@ -403,7 +403,7 @@ def valid (b: L2Block) : bool = L1.last().l2forkID, L1.last().unfulfilled_updates, bp, - L1.last().time) // whether a proof is accepted is time-dependend + L1.last().time) // whether a proof is accepted is time-dependent } diff --git a/specs/quint/specs/types.qnt b/specs/quint/specs/types.qnt index fd70ef64b..b63b58383 100644 --- a/specs/quint/specs/types.qnt +++ b/specs/quint/specs/types.qnt @@ -54,7 +54,7 @@ module types { // ************************************************************************* type Proposal = { - srcAddress: Address, + srcAddress: Address, // TODO: rename to sender height: Height, round: Round, proposal: NonNilValue, // an actual value. All other values are id(proposal) @@ -67,7 +67,7 @@ module types { type VoteType = Prevote | Precommit type Vote = { voteType: VoteType, - srcAddress: Address, + srcAddress: Address, // TODO: rename to sender height: Height, round: Round, valueId: ValueId, diff --git a/specs/quint/specs/votekeeper.qnt b/specs/quint/specs/votekeeper.qnt index 5f340b33c..20a9b8567 100644 --- a/specs/quint/specs/votekeeper.qnt +++ b/specs/quint/specs/votekeeper.qnt @@ -218,7 +218,7 @@ module votekeeper { // - It first adds the vote and then computes a threshold. // - If there exist a threshold and has not emitted before, the function returns the corresponding VoteKeeperOutput. // - Otherwise, the function returns a no-threshold output. - // - Note that if there is no threshold after adding the vote, the function checks if there is a skip threshold. + // - Note that if there is no threshold after adding the vote, the function checks if there is a skip threshold. // FIXME: not matching // TO DISCUSS: // - There might be a problem if we generalize from single-shot to multi-shot: the keeper only keeps the totalWeight // of the current height; I wonder if we need to keep the totalWeight for every Height that we may receive a vote for. @@ -238,12 +238,12 @@ module votekeeper { else roundVotes.votesAddressesWeights.mapSafeSet(vote.srcAddress, weight) - // Combined weight of all validators at this height + // Combined weight of all validators at this height // FIXME: height or round? val combinedWeight = updatedVotesAddressesWeights.mapSumValues() val finalOutput = if (vote.round > currentRound and isSkip(combinedWeight, keeper.totalWeight)) - SkipVKOutput(vote.round) + SkipVKOutput(vote.round) // FIXME: can we produce multiple events of this type? else val threshold = computeThreshold(updatedVoteCount, vote.valueId) val output = toVoteKeeperOutput(vote.round, vote.voteType, threshold) diff --git a/specs/quint/tests/consensus/consensusTest.qnt b/specs/quint/tests/consensus/consensusTest.qnt index d3ef69743..a4142ad35 100644 --- a/specs/quint/tests/consensus/consensusTest.qnt +++ b/specs/quint/tests/consensus/consensusTest.qnt @@ -31,8 +31,8 @@ module consensusTest { } action step = - nondet h = 1 //oneOf(1.to(4)) - nondet r = 0 //oneOf(1.to(4)) + nondet h = oneOf(1.to(4)) + nondet r = oneOf(1.to(4)) nondet v = oneOf(Set("A", "B", "C")) nondet vr = oneOf(Set(-1, 1, 2, 3, 4)) any {