diff --git a/.github/workflows/domain-genesis-storage-snapshot-build.yml b/.github/workflows/domain-genesis-storage-snapshot-build.yml index c2c3787ce7..7a59fe20d7 100644 --- a/.github/workflows/domain-genesis-storage-snapshot-build.yml +++ b/.github/workflows/domain-genesis-storage-snapshot-build.yml @@ -18,6 +18,9 @@ jobs: packages: write steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - name: Build node image id: build uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 # v6.9.0 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f9d83c7157..bbf5507c27 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -4,13 +4,7 @@ on: push: branches: - main - paths-ignore: - - "**.md" - - ".github/CODEOWNERS" pull_request: - paths-ignore: - - "**.md" - - ".github/CODEOWNERS" workflow_dispatch: merge_group: diff --git a/.github/workflows/rustsec-audit.yml b/.github/workflows/rustsec-audit.yml index 00c5876033..19eb1e81e4 100644 --- a/.github/workflows/rustsec-audit.yml +++ b/.github/workflows/rustsec-audit.yml @@ -14,7 +14,7 @@ on: - cron: "40 13 * * 0" jobs: security_audit: - runs-on: ${{ fromJson(github.repository_owner == 'autonomys' && '["self-hosted", "ubuntu-22.04-x86-64"]' || '"ubuntu-22.04"') }} + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 - uses: rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998 #v2.0.0 diff --git a/.github/workflows/snapshot-build.yml b/.github/workflows/snapshot-build.yml index 7420f0fa7c..dba04fe5b8 100644 --- a/.github/workflows/snapshot-build.yml +++ b/.github/workflows/snapshot-build.yml @@ -33,6 +33,9 @@ jobs: - image: node base-artifact: subspace-node upload-executables: true + - image: gateway + base-artifact: subspace-gateway + upload-executables: false - image: bootstrap-node base-artifact: subspace-bootstrap-node upload-executables: false @@ -104,7 +107,6 @@ jobs: cd executables IMAGE="${{ fromJSON(steps.meta.outputs.json).tags[0] }}" ARTIFACT="${{ matrix.build.base-artifact }}" - docker run --rm --platform linux/amd64 --entrypoint /bin/cat $IMAGE /$ARTIFACT > $ARTIFACT-ubuntu-x86_64-skylake-${{ github.ref_name }} # TODO: Pull is a workaround for https://github.com/moby/moby/issues/48197#issuecomment-2472265028 docker pull --platform linux/amd64/v2 $IMAGE diff --git a/Cargo.lock b/Cargo.lock index 96499371b5..02f0843753 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "Inflector" @@ -38,6 +38,7 @@ dependencies = [ "actix-codec", "actix-rt", "actix-service", + "actix-tls", "actix-utils", "ahash", "base64 0.22.1", @@ -131,6 +132,25 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "actix-tls" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389" +dependencies = [ + "actix-rt", + "actix-service", + "actix-utils", + "futures-core", + "impl-more", + "pin-project-lite", + "tokio", + "tokio-rustls 0.23.4", + "tokio-util", + "tracing", + "webpki-roots 0.22.6", +] + [[package]] name = "actix-utils" version = "3.0.1" @@ -154,6 +174,7 @@ dependencies = [ "actix-rt", "actix-server", "actix-service", + "actix-tls", "actix-utils", "actix-web-codegen", "ahash", @@ -760,15 +781,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-mutex" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479db852db25d9dbf6204e6cb6253698f175c15726470f78af0d918e99d6156e" -dependencies = [ - "event-listener 2.5.3", -] - [[package]] name = "async-nats" version = "0.37.0" @@ -2429,15 +2441,6 @@ dependencies = [ "dirs-sys-next", ] -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs-sys" version = "0.4.1" @@ -2617,6 +2620,7 @@ dependencies = [ "pallet-transporter", "parity-scale-codec", "parking_lot 0.12.3", + "rand", "sc-cli", "sc-client-api", "sc-consensus", @@ -4236,7 +4240,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f2f12607f92c69b12ed746fabf9ca4f5c482cba46679c1a75b874ed7c26adb" dependencies = [ "futures-io", - "rustls 0.23.13", + "rustls 0.23.18", "rustls-pki-types", ] @@ -4869,6 +4873,7 @@ dependencies = [ "pin-project-lite", "smallvec", "tokio", + "want", ] [[package]] @@ -4887,6 +4892,24 @@ dependencies = [ "tokio-rustls 0.24.1", ] +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.4.1", + "hyper-util", + "rustls 0.23.18", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", + "webpki-roots 0.26.6", +] + [[package]] name = "hyper-util" version = "0.1.9" @@ -4894,13 +4917,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes", + "futures-channel", "futures-util", "http 1.1.0", "http-body 1.0.1", "hyper 1.4.1", "pin-project-lite", + "socket2 0.5.7", "tokio", "tower-service", + "tracing", ] [[package]] @@ -5282,7 +5308,7 @@ dependencies = [ "http 1.1.0", "jsonrpsee-core", "pin-project", - "rustls 0.23.13", + "rustls 0.23.18", "rustls-pki-types", "rustls-platform-verifier", "soketto", @@ -6090,7 +6116,7 @@ dependencies = [ "quinn 0.11.5", "rand", "ring 0.17.8", - "rustls 0.23.13", + "rustls 0.23.18", "socket2 0.5.7", "thiserror 1.0.64", "tokio", @@ -6287,7 +6313,7 @@ dependencies = [ "libp2p-identity", "rcgen 0.11.3", "ring 0.17.8", - "rustls 0.23.13", + "rustls 0.23.18", "rustls-webpki 0.101.7", "thiserror 1.0.64", "x509-parser 0.16.0", @@ -9000,7 +9026,7 @@ dependencies = [ "quinn-proto 0.11.8", "quinn-udp 0.5.5", "rustc-hash 2.0.0", - "rustls 0.23.13", + "rustls 0.23.18", "socket2 0.5.7", "thiserror 1.0.64", "tokio", @@ -9052,7 +9078,7 @@ dependencies = [ "rand", "ring 0.17.8", "rustc-hash 2.0.0", - "rustls 0.23.13", + "rustls 0.23.18", "slab", "thiserror 1.0.64", "tinyvec", @@ -9345,6 +9371,48 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "reqwest" +version = "0.12.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.4.1", + "hyper-rustls 0.27.3", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn 0.11.5", + "rustls 0.23.18", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 0.26.6", + "windows-registry", +] + [[package]] name = "resolv-conf" version = "0.7.0" @@ -9582,6 +9650,7 @@ version = "0.20.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" dependencies = [ + "log", "ring 0.16.20", "sct", "webpki", @@ -9615,9 +9684,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.13" +version = "0.23.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8" +checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f" dependencies = [ "log", "once_cell", @@ -9673,9 +9742,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-platform-verifier" @@ -9688,7 +9757,7 @@ dependencies = [ "jni", "log", "once_cell", - "rustls 0.23.13", + "rustls 0.23.18", "rustls-native-certs 0.7.3", "rustls-platform-verifier-android", "rustls-webpki 0.102.8", @@ -10443,7 +10512,7 @@ dependencies = [ "futures", "futures-timer", "hyper 0.14.30", - "hyper-rustls", + "hyper-rustls 0.24.2", "log", "num_cpus", "once_cell", @@ -12520,7 +12589,7 @@ dependencies = [ name = "subspace-data-retrieval" version = "0.1.0" dependencies = [ - "async-lock 3.4.0", + "anyhow", "async-trait", "futures", "parity-scale-codec", @@ -12615,9 +12684,11 @@ dependencies = [ "ss58-registry", "static_assertions", "subspace-core-primitives", + "subspace-data-retrieval", "subspace-erasure-coding", "subspace-farmer-components", "subspace-kzg", + "subspace-logging", "subspace-metrics", "subspace-networking", "subspace-proof-of-space", @@ -12625,14 +12696,12 @@ dependencies = [ "subspace-rpc-primitives", "subspace-verification", "substrate-bip39", - "supports-color", "tempfile", "thiserror 2.0.0", "thread-priority", "tokio", "tokio-stream", "tracing", - "tracing-subscriber", "ulid", "zeroize", ] @@ -12643,7 +12712,6 @@ version = "0.1.0" dependencies = [ "anyhow", "async-lock 3.4.0", - "async-trait", "backoff", "bitvec", "criterion", @@ -12660,6 +12728,7 @@ dependencies = [ "static_assertions", "subspace-archiving", "subspace-core-primitives", + "subspace-data-retrieval", "subspace-erasure-coding", "subspace-kzg", "subspace-proof-of-space", @@ -12674,7 +12743,9 @@ dependencies = [ name = "subspace-gateway" version = "0.1.0" dependencies = [ + "actix-web", "anyhow", + "async-lock 3.4.0", "async-trait", "clap", "fdlimit", @@ -12682,20 +12753,21 @@ dependencies = [ "hex", "jsonrpsee", "mimalloc", - "parking_lot 0.12.3", + "reqwest", + "serde", + "serde_json", "subspace-core-primitives", "subspace-data-retrieval", "subspace-erasure-coding", "subspace-gateway-rpc", "subspace-kzg", + "subspace-logging", "subspace-networking", "subspace-rpc-primitives", "subspace-verification", "supports-color", - "thiserror 2.0.0", "tokio", "tracing", - "tracing-subscriber", ] [[package]] @@ -12728,6 +12800,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "subspace-logging" +version = "0.0.1" +dependencies = [ + "supports-color", + "tracing", + "tracing-subscriber", +] + [[package]] name = "subspace-malicious-operator" version = "0.1.0" @@ -12808,13 +12889,12 @@ dependencies = [ name = "subspace-networking" version = "0.1.0" dependencies = [ - "async-mutex", + "async-lock 3.4.0", "async-trait", "backoff", "bytes", "clap", "derive_more 1.0.0", - "either", "event-listener-primitives", "fs2", "futures", @@ -12834,12 +12914,12 @@ dependencies = [ "serde", "serde_json", "subspace-core-primitives", + "subspace-logging", "subspace-metrics", "thiserror 2.0.0", "tokio", "tokio-stream", "tracing", - "tracing-subscriber", "unsigned-varint 0.8.0", "void", ] @@ -12852,7 +12932,6 @@ dependencies = [ "bip39", "clap", "cross-domain-message-gossip", - "dirs", "domain-client-message-relayer", "domain-client-operator", "domain-eth-service", @@ -12894,11 +12973,11 @@ dependencies = [ "sp-core", "sp-domain-digests", "sp-domains", - "sp-domains-fraud-proof", "sp-keystore", "sp-messenger", "sp-runtime", "subspace-core-primitives", + "subspace-logging", "subspace-metrics", "subspace-networking", "subspace-proof-of-space", @@ -12907,13 +12986,11 @@ dependencies = [ "subspace-service", "substrate-build-script-utils", "substrate-prometheus-endpoint", - "supports-color", "tempfile", "thiserror 2.0.0", "tokio", "tokio-stream", "tracing", - "tracing-subscriber", ] [[package]] @@ -13054,8 +13131,10 @@ dependencies = [ name = "subspace-service" version = "0.1.0" dependencies = [ + "anyhow", "array-bytes", "async-channel 1.9.0", + "async-lock 3.4.0", "async-trait", "cross-domain-message-gossip", "domain-runtime-primitives", @@ -13083,9 +13162,7 @@ dependencies = [ "sc-executor", "sc-informant", "sc-network", - "sc-network-light", "sc-network-sync", - "sc-network-transactions", "sc-offchain", "sc-proof-of-time", "sc-rpc", @@ -13098,7 +13175,6 @@ dependencies = [ "sc-tracing", "sc-transaction-pool", "sc-transaction-pool-api", - "sc-utils", "schnellru", "schnorrkel", "sp-api", @@ -13125,6 +13201,7 @@ dependencies = [ "static_assertions", "subspace-archiving", "subspace-core-primitives", + "subspace-data-retrieval", "subspace-erasure-coding", "subspace-kzg", "subspace-networking", @@ -13451,6 +13528,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.12.6" @@ -13729,6 +13815,17 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls 0.20.9", + "tokio", + "webpki", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -13745,7 +13842,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.13", + "rustls 0.23.18", "rustls-pki-types", "tokio", ] @@ -14701,6 +14798,15 @@ dependencies = [ "untrusted 0.9.0", ] +[[package]] +name = "webpki-roots" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" +dependencies = [ + "webpki", +] + [[package]] name = "webpki-roots" version = "0.25.4" @@ -14803,6 +14909,36 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.42.0" diff --git a/README.md b/README.md index fe7fed60c3..12c275d4d1 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Latest Release](https://img.shields.io/github/v/release/autonomys/subspace?display_name=tag&style=flat-square)](https://github.com/autonomys/subspace/releases) [![Downloads Latest](https://img.shields.io/github/downloads/autonomys/subspace/latest/total?style=flat-square)](https://github.com/autonomys/subspace/releases/latest) -[![Rust](https://img.shields.io/github/actions/workflow/status/autonomys/subspace/rust.yml?branch=main)](https://github.com/autonomys/subspace/actions/workflows/rust.yaml) +[![Rust](https://img.shields.io/github/actions/workflow/status/autonomys/subspace/rust.yml?branch=main)](https://github.com/autonomys/subspace/actions/workflows/rust.yml) [![Rust Docs](https://img.shields.io/github/actions/workflow/status/autonomys/subspace/rustdoc.yml?branch=main)](https://autonomys.github.io/subspace) This is a mono repository for [Subspace Network](https://subspace.network/) implementation, primarily containing diff --git a/crates/pallet-domains/src/benchmarking.rs b/crates/pallet-domains/src/benchmarking.rs index ec25b92816..cee5e4cc4f 100644 --- a/crates/pallet-domains/src/benchmarking.rs +++ b/crates/pallet-domains/src/benchmarking.rs @@ -25,7 +25,6 @@ use frame_support::traits::fungible::{Inspect, Mutate}; use frame_support::traits::Hooks; use frame_system::{Pallet as System, RawOrigin}; use sp_core::crypto::{Ss58Codec, UncheckedFrom}; -use sp_core::ByteArray; use sp_domains::{ dummy_opaque_bundle, DomainId, ExecutionReceipt, OperatorAllowList, OperatorId, OperatorPublicKey, OperatorRewardSource, OperatorSignature, PermissionedActionAllowedBy, @@ -577,26 +576,9 @@ mod benchmarks { let domain_id = register_domain::(); let operator_id = NextOperatorId::::get(); - - // TODO: the `(key, signature)` is failed to verify in `cargo test --features runtime-benchmarks` but it - // will pass when doing the actual benchmark with `subspace-node benchmark pallet ...`, need more investigations. - let (key, signature) = { - let key = OperatorPublicKey::from_ss58check( - "5Gv1Uopoqo1k7125oDtFSCmxH4DzuCiBU7HBKu2bF1GZFsEb", - ) - .unwrap(); - - // signature data included operator_account since result from `account` with same - // input is always deterministic - let sig = OperatorSignature::from_slice(&[ - 88, 91, 154, 118, 137, 117, 109, 164, 232, 186, 101, 199, 94, 12, 91, 47, 228, 198, - 61, 146, 200, 227, 152, 191, 205, 114, 81, 127, 192, 158, 48, 96, 211, 199, 237, - 121, 170, 38, 118, 109, 3, 44, 198, 54, 155, 133, 240, 77, 200, 117, 107, 34, 248, - 238, 144, 101, 200, 146, 20, 94, 180, 98, 40, 134, - ]) - .unwrap(); - (key, sig) - }; + let key = + OperatorPublicKey::from_ss58check("5Gv1Uopoqo1k7125oDtFSCmxH4DzuCiBU7HBKu2bF1GZFsEb") + .unwrap(); let operator_config = OperatorConfig { signing_key: key, minimum_nominator_stake: T::MinNominatorStake::get(), @@ -609,7 +591,6 @@ mod benchmarks { domain_id, T::MinOperatorStake::get(), operator_config.clone(), - signature, ); assert_eq!(NextOperatorId::::get(), operator_id + 1); @@ -986,7 +967,6 @@ mod benchmarks { domain_id, T::MinOperatorStake::get(), operator_config.clone(), - None, )); assert_eq!( OperatorIdOwner::::get(operator_id), diff --git a/crates/pallet-domains/src/bundle_storage_fund.rs b/crates/pallet-domains/src/bundle_storage_fund.rs index 76376668e3..214196a4de 100644 --- a/crates/pallet-domains/src/bundle_storage_fund.rs +++ b/crates/pallet-domains/src/bundle_storage_fund.rs @@ -131,9 +131,7 @@ pub fn refund_storage_fee( } // Drop any dust and deregistered/slashed operator's bundle storage fee to the treasury - if !remaining_fee.is_zero() { - mint_into_treasury::(remaining_fee).ok_or(Error::MintBalance)?; - } + mint_into_treasury::(remaining_fee).map_err(|_| Error::MintBalance)?; Ok(()) } diff --git a/crates/pallet-domains/src/lib.rs b/crates/pallet-domains/src/lib.rs index 9e769eb012..80b2373522 100644 --- a/crates/pallet-domains/src/lib.rs +++ b/crates/pallet-domains/src/lib.rs @@ -231,8 +231,7 @@ mod pallet { use sp_domains::{ BundleDigest, DomainBundleSubmitted, DomainId, DomainSudoCall, DomainsTransfersTracker, EpochIndex, GenesisDomain, OnChainRewards, OnDomainInstantiated, OperatorAllowList, - OperatorId, OperatorPublicKey, OperatorRewardSource, OperatorSignature, RuntimeId, - RuntimeObject, RuntimeType, + OperatorId, OperatorRewardSource, RuntimeId, RuntimeObject, RuntimeType, }; use sp_domains_fraud_proof::fraud_proof_runtime_interface::domain_runtime_call; use sp_domains_fraud_proof::storage_proof::{self, FraudProofStorageKeyProvider}; @@ -485,12 +484,6 @@ mod pallet { pub(super) type OperatorIdOwner = StorageMap<_, Identity, OperatorId, T::AccountId, OptionQuery>; - /// Indexes operator signing key against OperatorId. - #[pallet::storage] - #[pallet::getter(fn operator_signing_key)] - pub(super) type OperatorSigningKey = - StorageMap<_, Identity, OperatorPublicKey, OperatorId, OptionQuery>; - #[pallet::storage] #[pallet::getter(fn domain_staking_summary)] pub(super) type DomainStakingSummary = @@ -1357,18 +1350,12 @@ mod pallet { domain_id: DomainId, amount: BalanceOf, config: OperatorConfig>, - signing_key_proof_of_ownership: OperatorSignature, ) -> DispatchResult { let owner = ensure_signed(origin)?; - let (operator_id, current_epoch_index) = do_register_operator::( - owner, - domain_id, - amount, - config, - Some(signing_key_proof_of_ownership), - ) - .map_err(Error::::from)?; + let (operator_id, current_epoch_index) = + do_register_operator::(owner, domain_id, amount, config) + .map_err(Error::::from)?; Self::deposit_event(Event::OperatorRegistered { operator_id, @@ -1854,8 +1841,6 @@ mod pallet { domain_id, operator_stake, operator_config, - // safe to not check the signing key ownership during genesis - None, ) .expect("Genesis operator registration must succeed"); diff --git a/crates/pallet-domains/src/staking.rs b/crates/pallet-domains/src/staking.rs index c0ef23714f..8e650c6bc3 100644 --- a/crates/pallet-domains/src/staking.rs +++ b/crates/pallet-domains/src/staking.rs @@ -6,8 +6,8 @@ extern crate alloc; use crate::bundle_storage_fund::{self, deposit_reserve_for_storage_fund}; use crate::pallet::{ Deposits, DomainRegistry, DomainStakingSummary, HeadDomainNumber, NextOperatorId, - NominatorCount, OperatorIdOwner, OperatorSigningKey, Operators, PendingSlashes, - PendingStakingOperationCount, Withdrawals, + NominatorCount, OperatorIdOwner, Operators, PendingSlashes, PendingStakingOperationCount, + Withdrawals, }; use crate::staking_epoch::{mint_funds, mint_into_treasury}; use crate::{ @@ -21,12 +21,9 @@ use frame_support::{ensure, PalletError}; use frame_system::pallet_prelude::BlockNumberFor; use scale_info::TypeInfo; use sp_core::{sr25519, Get}; -use sp_domains::{ - DomainId, EpochIndex, OperatorId, OperatorPublicKey, OperatorRewardSource, OperatorSignature, - OperatorSigningKeyProofOfOwnershipData, -}; +use sp_domains::{DomainId, EpochIndex, OperatorId, OperatorPublicKey, OperatorRewardSource}; use sp_runtime::traits::{CheckedAdd, CheckedSub, Zero}; -use sp_runtime::{Perbill, Percent, Perquintill, RuntimeAppPublic, Saturating}; +use sp_runtime::{Perbill, Percent, Perquintill, Saturating}; use sp_std::collections::btree_map::BTreeMap; use sp_std::collections::btree_set::BTreeSet; use sp_std::collections::vec_deque::VecDeque; @@ -184,8 +181,6 @@ pub struct Operator { pub nomination_tax: Percent, /// Total active stake of combined nominators under this operator. pub current_total_stake: Balance, - /// Total rewards this operator received this current epoch. - pub current_epoch_rewards: Balance, /// Total shares of all the nominators under this operator. pub current_total_shares: Share, /// The status of the operator, it may be stale due to the `OperatorStatus::PendingSlash` is @@ -230,7 +225,6 @@ impl Operator( domain_id: DomainId, amount: BalanceOf, config: OperatorConfig>, - maybe_signing_key_proof_of_ownership: Option, ) -> Result<(OperatorId, EpochIndex), Error> { note_pending_staking_operation::(domain_id)?; @@ -331,24 +321,6 @@ pub fn do_register_operator( Error::InvalidOperatorSigningKey ); - ensure!( - !OperatorSigningKey::::contains_key(config.signing_key.clone()), - Error::DuplicateOperatorSigningKey - ); - - if let Some(signing_key_proof_of_ownership) = maybe_signing_key_proof_of_ownership { - let signing_key_signature_data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_owner.clone(), - }; - ensure!( - config.signing_key.verify( - &signing_key_signature_data.encode(), - &signing_key_proof_of_ownership, - ), - Error::InvalidSigningKeySignature - ); - } - ensure!( config.minimum_nominator_stake >= T::MinNominatorStake::get(), Error::MinimumNominatorStake @@ -398,7 +370,6 @@ pub fn do_register_operator( minimum_nominator_stake, nomination_tax, current_total_stake: Zero::zero(), - current_epoch_rewards: Zero::zero(), current_total_shares: Zero::zero(), partial_status: OperatorStatus::Registered, // sum total deposits added during this epoch. @@ -407,7 +378,6 @@ pub fn do_register_operator( total_storage_fee_deposit: new_deposit.storage_fee_deposit, }; Operators::::insert(operator_id, operator); - OperatorSigningKey::::insert(signing_key, operator_id); // update stake summary to include new operator for next epoch domain_stake_summary.next_operators.insert(operator_id); // update pending transfers @@ -1132,16 +1102,7 @@ pub(crate) fn do_unlock_nominator( ); let mut total_shares = operator.current_total_shares; - // take any operator current epoch rewards to include in total stake and set to zero. - let operator_current_epoch_rewards = operator.current_epoch_rewards; - operator.current_epoch_rewards = Zero::zero(); - - // calculate total stake of operator. - let mut total_stake = operator - .current_total_stake - .checked_add(&operator_current_epoch_rewards) - .ok_or(Error::BalanceOverflow)?; - + let mut total_stake = operator.current_total_stake; let share_price = SharePrice::new::(total_shares, total_stake); let mut total_storage_fee_deposit = operator.total_storage_fee_deposit; @@ -1279,7 +1240,7 @@ pub(crate) fn do_unlock_nominator( && !Deposits::::contains_key(operator_id, operator_owner); if cleanup_operator { - do_cleanup_operator::(operator_id, total_stake, operator.signing_key.clone())? + do_cleanup_operator::(operator_id, total_stake)? } else { // set update total shares, total stake and total storage fee deposit for operator operator.current_total_shares = total_shares; @@ -1297,21 +1258,17 @@ pub(crate) fn do_unlock_nominator( pub(crate) fn do_cleanup_operator( operator_id: OperatorId, total_stake: BalanceOf, - operator_signing_key: OperatorPublicKey, ) -> Result<(), Error> { // transfer any remaining storage fund to treasury bundle_storage_fund::transfer_all_to_treasury::(operator_id) .map_err(Error::BundleStorageFund)?; // transfer any remaining amount to treasury - mint_into_treasury::(total_stake).ok_or(Error::MintBalance)?; + mint_into_treasury::(total_stake)?; // remove OperatorOwner Details OperatorIdOwner::::remove(operator_id); - // remove operator signing key - OperatorSigningKey::::remove(operator_signing_key); - // remove operator epoch share prices let _ = OperatorEpochSharePrice::::clear_prefix(operator_id, u32::MAX, None); @@ -1328,6 +1285,9 @@ pub(crate) fn do_reward_operators( operators: IntoIter, rewards: BalanceOf, ) -> Result<(), Error> { + if rewards.is_zero() { + return Ok(()); + } DomainStakingSummary::::mutate(domain_id, |maybe_stake_summary| { let stake_summary = maybe_stake_summary .as_mut() @@ -1338,38 +1298,25 @@ pub(crate) fn do_reward_operators( let operator_weights = operators.into_iter().fold( BTreeMap::::new(), |mut acc, operator_id| { - let total_weight = match acc.get(&operator_id) { - None => 1, - Some(weight) => weight + 1, - }; - acc.insert(operator_id, total_weight); + acc.entry(operator_id) + .and_modify(|weight| *weight += 1) + .or_insert(1); acc }, ); let mut allocated_rewards = BalanceOf::::zero(); - let mut weight_balance_cache = BTreeMap::>::new(); for (operator_id, weight) in operator_weights { - let operator_reward = match weight_balance_cache.get(&weight) { - None => { - let distribution = Perquintill::from_rational(weight, total_count); - let operator_reward = distribution.mul_floor(rewards); - weight_balance_cache.insert(weight, operator_reward); - operator_reward - } - Some(operator_reward) => *operator_reward, - }; - - let total_reward = match stake_summary.current_epoch_rewards.get(&operator_id) { - None => operator_reward, - Some(rewards) => rewards - .checked_add(&operator_reward) - .ok_or(Error::BalanceOverflow)?, + let operator_reward = { + let distribution = Perquintill::from_rational(weight, total_count); + distribution.mul_floor(rewards) }; stake_summary .current_epoch_rewards - .insert(operator_id, total_reward); + .entry(operator_id) + .and_modify(|rewards| *rewards = rewards.saturating_add(operator_reward)) + .or_insert(operator_reward); Pallet::::deposit_event(Event::OperatorRewarded { source: source.clone(), @@ -1388,7 +1335,6 @@ pub(crate) fn do_reward_operators( .checked_sub(&allocated_rewards) .ok_or(Error::BalanceUnderflow)?, ) - .ok_or(Error::MintBalance) }) } @@ -1421,9 +1367,14 @@ pub(crate) fn do_mark_operators_as_slashed( .as_mut() .ok_or(Error::DomainNotInitialized)?; - // slash and remove operator from next epoch set + // slash and remove operator from next and current epoch set operator.update_status(OperatorStatus::Slashed); + stake_summary.current_operators.remove(operator_id); stake_summary.next_operators.remove(operator_id); + stake_summary.current_total_stake = stake_summary + .current_total_stake + .checked_sub(&operator.current_total_stake) + .ok_or(Error::BalanceUnderflow)?; pending_slashes.insert(*operator_id); PendingSlashes::::insert(operator.current_domain_id, pending_slashes); @@ -1450,24 +1401,21 @@ pub(crate) mod tests { use crate::staking::{ do_convert_previous_epoch_withdrawal, do_mark_operators_as_slashed, do_nominate_operator, do_reward_operators, do_unlock_funds, do_withdraw_stake, Error as StakingError, Operator, - OperatorConfig, OperatorSigningKeyProofOfOwnershipData, OperatorStatus, StakingSummary, - WithdrawStake, + OperatorConfig, OperatorStatus, StakingSummary, WithdrawStake, }; use crate::staking_epoch::{do_finalize_domain_current_epoch, do_slash_operator}; use crate::tests::{new_test_ext, ExistentialDeposit, RuntimeOrigin, Test}; use crate::{ bundle_storage_fund, BalanceOf, Error, NominatorId, SlashedReason, MAX_NOMINATORS_TO_SLASH, }; - use codec::Encode; use frame_support::traits::fungible::Mutate; use frame_support::traits::Currency; use frame_support::weights::Weight; use frame_support::{assert_err, assert_ok}; - use sp_core::crypto::UncheckedFrom; use sp_core::{sr25519, Pair, U256}; use sp_domains::{ DomainId, OperatorAllowList, OperatorId, OperatorPair, OperatorPublicKey, - OperatorRewardSource, OperatorSignature, + OperatorRewardSource, }; use sp_runtime::traits::Zero; use sp_runtime::{PerThing, Perbill}; @@ -1488,7 +1436,6 @@ pub(crate) mod tests { operator_stake: BalanceOf, minimum_nominator_stake: BalanceOf, signing_key: OperatorPublicKey, - signature: OperatorSignature, mut nominators: BTreeMap, (BalanceOf, BalanceOf)>, ) -> (OperatorId, OperatorConfig>) { nominators.insert(operator_account, (operator_free_balance, operator_stake)); @@ -1545,7 +1492,6 @@ pub(crate) mod tests { domain_id, operator_stake, operator_config.clone(), - signature, ); assert_ok!(res); @@ -1589,7 +1535,6 @@ pub(crate) mod tests { domain_id, Default::default(), operator_config, - OperatorSignature::unchecked_from([1u8; 64]), ); assert_err!( res, @@ -1612,17 +1557,11 @@ pub(crate) mod tests { nomination_tax: Default::default(), }; - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); - let res = Domains::register_operator( RuntimeOrigin::signed(operator_account), domain_id, Default::default(), operator_config, - signature, ); assert_err!( res, @@ -1635,7 +1574,7 @@ pub(crate) mod tests { fn test_register_operator() { let domain_id = DomainId::new(0); let operator_account = 1; - let operator_free_balance = 1500 * SSC; + let operator_free_balance = 2500 * SSC; let operator_total_stake = 1000 * SSC; let operator_stake = 800 * SSC; let operator_storage_fee_deposit = 200 * SSC; @@ -1643,10 +1582,6 @@ pub(crate) mod tests { let mut ext = new_test_ext(); ext.execute_with(|| { - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let (operator_id, mut operator_config) = register_operator( domain_id, operator_account, @@ -1654,7 +1589,6 @@ pub(crate) mod tests { operator_total_stake, SSC, pair.public(), - signature.clone(), BTreeMap::new(), ); @@ -1673,7 +1607,6 @@ pub(crate) mod tests { minimum_nominator_stake: SSC, nomination_tax: Default::default(), current_total_stake: operator_stake, - current_epoch_rewards: 0, current_total_shares: operator_stake, partial_status: OperatorStatus::Registered, deposits_in_epoch: 0, @@ -1691,32 +1624,23 @@ pub(crate) mod tests { operator_free_balance - operator_total_stake - ExistentialDeposit::get() ); - // cannot register with same operator key + // registering with same operator key is allowed let res = Domains::register_operator( RuntimeOrigin::signed(operator_account), domain_id, operator_stake, operator_config.clone(), - signature.clone(), - ); - assert_err!( - res, - Error::::Staking(crate::staking::Error::DuplicateOperatorSigningKey) ); + assert_ok!(res); // cannot use the locked funds to register a new operator let new_pair = OperatorPair::from_seed(&U256::from(1u32).into()); operator_config.signing_key = new_pair.public(); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = new_pair.sign(&data.encode()); let res = Domains::register_operator( RuntimeOrigin::signed(operator_account), domain_id, operator_stake, operator_config, - signature, ); assert_err!( res, @@ -1737,10 +1661,6 @@ pub(crate) mod tests { let operator_stake = 800 * SSC; let operator_storage_fee_deposit = 200 * SSC; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let nominator_account = 2; let nominator_free_balance = 150 * SSC; @@ -1757,7 +1677,6 @@ pub(crate) mod tests { operator_total_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(vec![( nominator_account, (nominator_free_balance, nominator_total_stake), @@ -1855,10 +1774,6 @@ pub(crate) mod tests { let operator_stake = 200 * SSC; let operator_free_balance = 250 * SSC; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let mut ext = new_test_ext(); ext.execute_with(|| { let (operator_id, _) = register_operator( @@ -1868,7 +1783,6 @@ pub(crate) mod tests { operator_stake, SSC, pair.public(), - signature, BTreeMap::new(), ); @@ -1980,10 +1894,6 @@ pub(crate) mod tests { let domain_id = DomainId::new(0); let operator_account = 0; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let mut total_balance = nominators.iter().map(|n| n.1).sum::>() + operator_reward + maybe_deposit.unwrap_or(0); @@ -2006,7 +1916,6 @@ pub(crate) mod tests { operator_stake, minimum_nominator_stake, pair.public(), - signature, nominators, ); @@ -2621,10 +2530,6 @@ pub(crate) mod tests { let operator_free_balance = 250 * SSC; let operator_stake = 200 * SSC; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let nominator_account = 2; let nominator_free_balance = 150 * SSC; let nominator_stake = 100 * SSC; @@ -2647,7 +2552,6 @@ pub(crate) mod tests { operator_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(nominators), ); @@ -2752,10 +2656,6 @@ pub(crate) mod tests { let operator_stake = 200 * SSC; let operator_extra_deposit = 40 * SSC; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let nominator_account = 2; let nominator_free_balance = 150 * SSC; let nominator_stake = 100 * SSC; @@ -2785,7 +2685,6 @@ pub(crate) mod tests { operator_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(nominators), ); @@ -2908,10 +2807,6 @@ pub(crate) mod tests { let operator_stake = 200 * SSC; let operator_extra_deposit = 40 * SSC; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let nominator_accounts: Vec = (2..22).collect(); let nominator_free_balance = 150 * SSC; @@ -2949,7 +2844,6 @@ pub(crate) mod tests { operator_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(nominators), ); @@ -3086,21 +2980,6 @@ pub(crate) mod tests { let pair_2 = OperatorPair::from_seed(&U256::from(1u32).into()); let pair_3 = OperatorPair::from_seed(&U256::from(2u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account_1, - }; - let signature_1 = pair_1.sign(&data.encode()); - - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account_2, - }; - let signature_2 = pair_2.sign(&data.encode()); - - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account_3, - }; - let signature_3 = pair_3.sign(&data.encode()); - let mut ext = new_test_ext(); ext.execute_with(|| { let (operator_id_1, _) = register_operator( @@ -3110,7 +2989,6 @@ pub(crate) mod tests { operator_stake, 10 * SSC, pair_1.public(), - signature_1, Default::default(), ); @@ -3121,7 +2999,6 @@ pub(crate) mod tests { operator_stake, 10 * SSC, pair_2.public(), - signature_2, Default::default(), ); @@ -3132,7 +3009,6 @@ pub(crate) mod tests { operator_stake, 10 * SSC, pair_3.public(), - signature_3, Default::default(), ); @@ -3233,10 +3109,6 @@ pub(crate) mod tests { let operator_stake = 80 * SSC; let operator_storage_fee_deposit = 20 * SSC; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let nominator_account = 2; let mut ext = new_test_ext(); @@ -3248,7 +3120,6 @@ pub(crate) mod tests { operator_total_stake, SSC, pair.public(), - signature, BTreeMap::default(), ); diff --git a/crates/pallet-domains/src/staking_epoch.rs b/crates/pallet-domains/src/staking_epoch.rs index 969158b562..2f32541ee0 100644 --- a/crates/pallet-domains/src/staking_epoch.rs +++ b/crates/pallet-domains/src/staking_epoch.rs @@ -67,6 +67,7 @@ pub(crate) fn operator_take_reward_tax_and_stake( ) -> Result { let mut rewarded_operator_count = 0; DomainStakingSummary::::try_mutate(domain_id, |maybe_domain_stake_summary| { + let mut to_treasury = BalanceOf::::zero(); let stake_summary = maybe_domain_stake_summary .as_mut() .ok_or(TransitionError::DomainNotInitialized)?; @@ -74,9 +75,17 @@ pub(crate) fn operator_take_reward_tax_and_stake( while let Some((operator_id, reward)) = stake_summary.current_epoch_rewards.pop_first() { Operators::::try_mutate(operator_id, |maybe_operator| { let operator = match maybe_operator.as_mut() { - // it is possible that operator may have de registered by the time they got rewards - // if not available, skip the operator - None => return Ok(()), + // It is possible that operator may have de registered and unlocked by the time they + // got rewards, in this case, move the reward to the treasury + None => { + to_treasury += reward; + return Ok(()) + } + // Move the reward of slashed and pening slash operator to the treasury + Some(operator) if matches!(*operator.status::(operator_id), OperatorStatus::Slashed | OperatorStatus::PendingSlash) => { + to_treasury += reward; + return Ok(()) + } Some(operator) => operator, }; @@ -125,13 +134,14 @@ pub(crate) fn operator_take_reward_tax_and_stake( }); } - // add remaining rewards to nominators to be distributed during the epoch transition + // Add the remaining rewards to the operator's `current_total_stake` which increases the + // share price of the staking pool and as a way to distribute the reward to the nominator let rewards = reward .checked_sub(&operator_tax_amount) .ok_or(TransitionError::BalanceUnderflow)?; - operator.current_epoch_rewards = operator - .current_epoch_rewards + operator.current_total_stake = operator + .current_total_stake .checked_add(&rewards) .ok_or(TransitionError::BalanceOverflow)?; @@ -141,6 +151,8 @@ pub(crate) fn operator_take_reward_tax_and_stake( })?; } + mint_into_treasury::(to_treasury)?; + Ok(()) }) .map_err(Error::OperatorRewardStaking)?; @@ -228,51 +240,38 @@ pub(crate) fn do_finalize_operator_epoch_staking( // if there are no deposits, withdrawls, and epoch rewards for this operator // then short-circuit and return early. - if operator.deposits_in_epoch.is_zero() - && operator.withdrawals_in_epoch.is_zero() - && operator.current_epoch_rewards.is_zero() - { + if operator.deposits_in_epoch.is_zero() && operator.withdrawals_in_epoch.is_zero() { return Ok((operator.current_total_stake, false)); } - let total_stake = operator - .current_total_stake - .checked_add(&operator.current_epoch_rewards) - .ok_or(TransitionError::BalanceOverflow)?; - - let total_shares = operator.current_total_shares; - + let mut total_stake = operator.current_total_stake; + let mut total_shares = operator.current_total_shares; let share_price = SharePrice::new::(total_shares, total_stake); // calculate and subtract total withdrew shares from previous epoch - let (total_stake, total_shares) = if !operator.withdrawals_in_epoch.is_zero() { + if !operator.withdrawals_in_epoch.is_zero() { let withdraw_stake = share_price.shares_to_stake::(operator.withdrawals_in_epoch); - let total_stake = total_stake + total_stake = total_stake .checked_sub(&withdraw_stake) .ok_or(TransitionError::BalanceUnderflow)?; - let total_shares = total_shares + total_shares = total_shares .checked_sub(&operator.withdrawals_in_epoch) .ok_or(TransitionError::ShareUnderflow)?; operator.withdrawals_in_epoch = Zero::zero(); - (total_stake, total_shares) - } else { - (total_stake, total_shares) }; // calculate and add total deposits from the previous epoch - let (total_stake, total_shares) = if !operator.deposits_in_epoch.is_zero() { + if !operator.deposits_in_epoch.is_zero() { let deposited_shares = share_price.stake_to_shares::(operator.deposits_in_epoch); - let total_stake = total_stake + total_stake = total_stake .checked_add(&operator.deposits_in_epoch) .ok_or(TransitionError::BalanceOverflow)?; - let total_shares = total_shares + total_shares = total_shares .checked_add(&deposited_shares) .ok_or(TransitionError::ShareOverflow)?; + operator.deposits_in_epoch = Zero::zero(); - (total_stake, total_shares) - } else { - (total_stake, total_shares) }; // update operator pool epoch share price @@ -288,7 +287,6 @@ pub(crate) fn do_finalize_operator_epoch_staking( // update operator state operator.current_total_shares = total_shares; operator.current_total_stake = total_stake; - operator.current_epoch_rewards = Zero::zero(); Operators::::set(operator_id, Some(operator)); Ok((total_stake, true)) @@ -306,23 +304,26 @@ pub(crate) fn mint_funds( Ok(()) } -pub(crate) fn mint_into_treasury(amount: BalanceOf) -> Option<()> { - let existing_funds = AccumulatedTreasuryFunds::::get(); - let total_funds = existing_funds.checked_add(&amount)?; - if total_funds.is_zero() { - return Some(()); +pub(crate) fn mint_into_treasury(amount: BalanceOf) -> Result<(), TransitionError> { + if amount.is_zero() { + return Ok(()); } + let total_funds = AccumulatedTreasuryFunds::::get() + .checked_add(&amount) + .ok_or(TransitionError::BalanceOverflow)?; + match T::Currency::can_deposit(&T::TreasuryAccount::get(), total_funds, Provenance::Minted) { // Deposit is possible, so we mint the funds into treasury. DepositConsequence::Success => { - T::Currency::mint_into(&T::TreasuryAccount::get(), total_funds).ok()?; + T::Currency::mint_into(&T::TreasuryAccount::get(), total_funds) + .map_err(|_| TransitionError::MintBalance)?; AccumulatedTreasuryFunds::::kill(); } // Deposit cannot be done to treasury, so hold the funds until we can. _ => AccumulatedTreasuryFunds::::set(total_funds), } - Some(()) + Ok(()) } /// Slashes any pending slashed operators. @@ -354,12 +355,7 @@ pub(crate) fn do_slash_operator( let staked_hold_id = T::HoldIdentifier::staking_staked(); - let mut total_stake = operator - .current_total_stake - .checked_add(&operator.current_epoch_rewards) - .ok_or(TransitionError::BalanceOverflow)?; - - operator.current_epoch_rewards = Zero::zero(); + let mut total_stake = operator.current_total_stake; let mut total_shares = operator.current_total_shares; let share_price = SharePrice::new::(total_shares, total_stake); @@ -441,7 +437,7 @@ pub(crate) fn do_slash_operator( .checked_sub(&amount_to_slash_in_holding) .ok_or(TransitionError::BalanceUnderflow)?; - mint_into_treasury::(nominator_reward).ok_or(TransitionError::MintBalance)?; + mint_into_treasury::(nominator_reward)?; total_stake = total_stake.saturating_sub(nominator_staked_amount); total_shares = total_shares.saturating_sub(nominator_shares); @@ -493,7 +489,7 @@ pub(crate) fn do_slash_operator( nominator_count == 0 && !Deposits::::contains_key(operator_id, operator_owner); if cleanup_operator { - do_cleanup_operator::(operator_id, total_stake, operator.signing_key)?; + do_cleanup_operator::(operator_id, total_stake)?; if slashed_operators.is_empty() { PendingSlashes::::remove(domain_id); } else { @@ -516,7 +512,7 @@ mod tests { use crate::bundle_storage_fund::STORAGE_FEE_RESERVE; use crate::pallet::{ Deposits, DomainStakingSummary, HeadDomainNumber, LastEpochStakingDistribution, - NominatorCount, OperatorIdOwner, OperatorSigningKey, Operators, Withdrawals, + NominatorCount, OperatorIdOwner, Operators, Withdrawals, }; use crate::staking::tests::{register_operator, Share}; use crate::staking::{ @@ -530,13 +526,10 @@ mod tests { use crate::{BalanceOf, Config, HoldIdentifier, NominatorId}; #[cfg(not(feature = "std"))] use alloc::vec; - use codec::Encode; use frame_support::assert_ok; use frame_support::traits::fungible::InspectHold; use sp_core::{Pair, U256}; - use sp_domains::{ - DomainId, OperatorPair, OperatorRewardSource, OperatorSigningKeyProofOfOwnershipData, - }; + use sp_domains::{DomainId, OperatorPair, OperatorRewardSource}; use sp_runtime::traits::Zero; use sp_runtime::{PerThing, Percent}; use std::collections::BTreeMap; @@ -554,10 +547,6 @@ mod tests { let domain_id = DomainId::new(0); let operator_account = 1; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let minimum_free_balance = 10 * SSC; let mut nominators = BTreeMap::from_iter( nominators @@ -586,7 +575,6 @@ mod tests { operator_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(nominators.clone()), ); @@ -649,7 +637,6 @@ mod tests { assert_eq!(Operators::::get(operator_id), None); assert_eq!(OperatorIdOwner::::get(operator_id), None); - assert_eq!(OperatorSigningKey::::get(pair.public()), None); assert_eq!(NominatorCount::::get(operator_id), 0); }); } @@ -692,10 +679,6 @@ mod tests { let domain_id = DomainId::new(0); let operator_account = 0; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let FinalizeDomainParams { total_deposit, rewards, @@ -729,7 +712,6 @@ mod tests { operator_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(nominators), ); @@ -771,7 +753,6 @@ mod tests { operator.current_total_stake + operator.total_storage_fee_deposit, total_updated_stake ); - assert_eq!(operator.current_epoch_rewards, Zero::zero()); let domain_stake_summary = DomainStakingSummary::::get(domain_id).unwrap(); assert_eq!( @@ -808,10 +789,6 @@ mod tests { let domain_id = DomainId::new(0); let operator_account = 1; let pair = OperatorPair::from_seed(&U256::from(0u32).into()); - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: operator_account, - }; - let signature = pair.sign(&data.encode()); let operator_rewards = 10 * SSC; let mut nominators = BTreeMap::from_iter(vec![(1, (110 * SSC, 100 * SSC)), (2, (60 * SSC, 50 * SSC))]); @@ -827,7 +804,6 @@ mod tests { operator_stake, 10 * SSC, pair.public(), - signature, BTreeMap::from_iter(nominators), ); @@ -836,6 +812,7 @@ mod tests { // 10% tax let nomination_tax = Percent::from_parts(10); let mut operator = Operators::::get(operator_id).unwrap(); + let pre_total_stake = operator.current_total_stake; let pre_storage_fund_deposit = operator.total_storage_fee_deposit; operator.nomination_tax = nomination_tax; Operators::::insert(operator_id, operator); @@ -854,7 +831,7 @@ mod tests { let new_storage_fund_deposit = operator.total_storage_fee_deposit - pre_storage_fund_deposit; assert_eq!( - operator.current_epoch_rewards, + operator.current_total_stake - pre_total_stake, (10 * SSC - expected_operator_tax) ); diff --git a/crates/pallet-subspace/src/lib.rs b/crates/pallet-subspace/src/lib.rs index 5c68ababc0..b1dd52d7d4 100644 --- a/crates/pallet-subspace/src/lib.rs +++ b/crates/pallet-subspace/src/lib.rs @@ -371,8 +371,6 @@ pub mod pallet { pub enum Error { /// Solution range adjustment already enabled. SolutionRangeAdjustmentAlreadyEnabled, - /// Rewards already active. - RewardsAlreadyEnabled, /// Iterations are not multiple of number of checkpoints times two NotMultipleOfCheckpoints, /// Proof of time slot iterations must increase as hardware improves @@ -1116,10 +1114,6 @@ impl Pallet { fn do_enable_rewards_at( enable_rewards_at: EnableRewardsAt>, ) -> DispatchResult { - if EnableRewards::::get().is_some() { - return Err(Error::::RewardsAlreadyEnabled.into()); - } - match enable_rewards_at { EnableRewardsAt::Height(block_number) => { // Enable rewards at a particular block height (default to the next block after diff --git a/crates/pallet-subspace/src/mock.rs b/crates/pallet-subspace/src/mock.rs index a696b63bc9..ca14de2c38 100644 --- a/crates/pallet-subspace/src/mock.rs +++ b/crates/pallet-subspace/src/mock.rs @@ -41,8 +41,8 @@ use subspace_core_primitives::pos::PosSeed; use subspace_core_primitives::pot::PotOutput; use subspace_core_primitives::sectors::SectorId; use subspace_core_primitives::segments::{ - ArchivedBlockProgress, ArchivedHistorySegment, HistorySize, LastArchivedBlock, - RecordedHistorySegment, SegmentCommitment, SegmentHeader, SegmentIndex, + ArchivedBlockProgress, HistorySize, LastArchivedBlock, RecordedHistorySegment, + SegmentCommitment, SegmentHeader, SegmentIndex, }; use subspace_core_primitives::solutions::{RewardSignature, Solution, SolutionRange}; use subspace_core_primitives::{BlockNumber, PublicKey, SlotNumber, REWARD_SIGNING_CONTEXT}; @@ -304,7 +304,7 @@ pub fn create_signed_vote( slot: Slot, proof_of_time: PotOutput, future_proof_of_time: PotOutput, - archived_history_segment: &ArchivedHistorySegment, + archived_history_segment: &NewArchivedSegment, reward_address: ::AccountId, solution_range: SolutionRange, vote_solution_range: SolutionRange, diff --git a/crates/pallet-subspace/src/tests.rs b/crates/pallet-subspace/src/tests.rs index 0b7c0f118c..611174f79c 100644 --- a/crates/pallet-subspace/src/tests.rs +++ b/crates/pallet-subspace/src/tests.rs @@ -350,7 +350,7 @@ fn vote_after_genesis() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -381,7 +381,7 @@ fn vote_too_low_height() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -412,7 +412,7 @@ fn vote_past_future_height() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -433,7 +433,7 @@ fn vote_past_future_height() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -463,7 +463,7 @@ fn vote_wrong_parent() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -504,7 +504,7 @@ fn vote_past_future_slot() { 2.into(), Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -529,7 +529,7 @@ fn vote_past_future_slot() { 4.into(), Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -553,7 +553,7 @@ fn vote_past_future_slot() { 2.into(), Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -595,7 +595,7 @@ fn vote_same_slot() { Subspace::current_slot(), Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -615,7 +615,7 @@ fn vote_same_slot() { Subspace::current_slot(), Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -645,7 +645,7 @@ fn vote_bad_reward_signature() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -676,7 +676,7 @@ fn vote_unknown_segment_commitment() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -710,7 +710,7 @@ fn vote_outside_of_solution_range() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, SolutionRange::MAX, @@ -752,7 +752,7 @@ fn vote_solution_quality_too_high() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -770,7 +770,7 @@ fn vote_solution_quality_too_high() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, SolutionRange::MIN, // Create vote for block level of quality @@ -835,7 +835,7 @@ fn vote_invalid_proof_of_time() { slot + 1, test_proof_of_time, Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -863,7 +863,7 @@ fn vote_invalid_proof_of_time() { slot + 1, test_proof_of_time, Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -881,7 +881,7 @@ fn vote_invalid_proof_of_time() { slot, test_proof_of_time, Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -909,7 +909,7 @@ fn vote_invalid_proof_of_time() { slot, test_proof_of_time, Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -937,7 +937,7 @@ fn vote_invalid_proof_of_time() { slot, test_proof_of_time, test_future_proof_of_time, - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -975,7 +975,7 @@ fn vote_correct_signature() { Subspace::current_slot() + 1, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, 1, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -1014,7 +1014,7 @@ fn vote_equivocation_current_block_plus_vote() { slot, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, reward_address, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -1072,7 +1072,7 @@ fn vote_equivocation_parent_block_plus_vote() { slot, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, reward_address, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -1132,7 +1132,7 @@ fn vote_equivocation_current_voters_duplicate() { slot, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, reward_address, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, @@ -1212,7 +1212,7 @@ fn vote_equivocation_parent_voters_duplicate() { slot, Default::default(), Default::default(), - &archived_segment.pieces, + archived_segment, reward_address, pallet::SolutionRanges::::get().current, pallet::SolutionRanges::::get().voting_current, diff --git a/crates/sc-consensus-subspace/src/archiver.rs b/crates/sc-consensus-subspace/src/archiver.rs index 967bcf6f9b..3109104e5f 100644 --- a/crates/sc-consensus-subspace/src/archiver.rs +++ b/crates/sc-consensus-subspace/src/archiver.rs @@ -73,6 +73,7 @@ use sp_runtime::traits::{ use sp_runtime::Justifications; use std::error::Error; use std::future::Future; +use std::num::NonZeroU32; use std::slice; use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::Arc; @@ -365,8 +366,13 @@ pub struct ObjectMappingNotification { pub enum CreateObjectMappings { /// Start creating object mappings from this block number. /// - /// This can be lower than the latest archived block. - Block(BlockNumber), + /// This can be lower than the latest archived block, but must be greater than genesis. + /// + /// The genesis block doesn't have mappings, so starting mappings at genesis is pointless. + /// The archiver will fail if it can't get the data for this block, but snap sync doesn't store + /// the genesis data on disk. So avoiding genesis also avoids this error. + /// + Block(NonZeroU32), /// Create object mappings as archiving is happening. Yes, @@ -381,7 +387,7 @@ impl CreateObjectMappings { /// If there is no fixed block number, or mappings are disabled, returns None. fn block(&self) -> Option { match self { - CreateObjectMappings::Block(block) => Some(*block), + CreateObjectMappings::Block(block) => Some(block.get()), CreateObjectMappings::Yes => None, CreateObjectMappings::No => None, } @@ -610,18 +616,60 @@ where // If there is no path to this block from the tip due to snap sync, we'll start archiving from // an earlier segment, then start mapping again once archiving reaches this block. if let Some(block_number) = create_object_mappings.block() { + // There aren't any mappings in the genesis block, so starting there is pointless. + // (And causes errors on restart, because genesis block data is never stored during snap sync.) best_block_to_archive = best_block_to_archive.min(block_number); } if (best_block_to_archive..best_block_number) .any(|block_number| client.hash(block_number.into()).ok().flatten().is_none()) { - // If there are blocks missing blocks between best block to archive and best block of the + // If there are blocks missing headers between best block to archive and best block of the // blockchain it means newer block was inserted in some special way and as such is by // definition valid, so we can simply assume that is our best block to archive instead best_block_to_archive = best_block_number; } + // If the user chooses an object mapping start block we don't have data or state for, we can't + // create mappings for it, so the node must exit with an error. We ignore genesis here, because + // it doesn't have mappings. + if create_object_mappings.is_enabled() && best_block_to_archive >= 1 { + let Some(best_block_to_archive_hash) = client.hash(best_block_to_archive.into())? else { + let error = format!( + "Missing hash for mapping block {best_block_to_archive}, \ + try a higher block number, or wipe your node and restart with `--sync full`" + ); + return Err(sp_blockchain::Error::Application(error.into())); + }; + + let Some(best_block_data) = client.block(best_block_to_archive_hash)? else { + let error = format!( + "Missing data for mapping block {best_block_to_archive} \ + hash {best_block_to_archive_hash}, \ + try a higher block number, or wipe your node and restart with `--sync full`" + ); + return Err(sp_blockchain::Error::Application(error.into())); + }; + + // Similarly, state can be pruned, even if the data is present + client + .runtime_api() + .extract_block_object_mapping( + *best_block_data.block.header().parent_hash(), + best_block_data.block.clone(), + ) + .map_err(|error| { + sp_blockchain::Error::Application( + format!( + "Missing state for mapping block {best_block_to_archive} \ + hash {best_block_to_archive_hash}: {error}, \ + try a higher block number, or wipe your node and restart with `--sync full`" + ) + .into(), + ) + })?; + } + let maybe_last_archived_block = find_last_archived_block( client, segment_headers_store, diff --git a/crates/sc-subspace-chain-specs/res/chain-spec-raw-taurus.json b/crates/sc-subspace-chain-specs/res/chain-spec-raw-taurus.json index 29434c3b0d..3aabe0a6c9 100644 --- a/crates/sc-subspace-chain-specs/res/chain-spec-raw-taurus.json +++ b/crates/sc-subspace-chain-specs/res/chain-spec-raw-taurus.json @@ -16,7 +16,11 @@ ], "protocolId": "autonomys-taurus", "properties": { - "domainsBootstrapNodes": {}, + "domainsBootstrapNodes": { + "0": [ + "/dns/bootstrap-0.auto-evm.taurus.subspace.network/tcp/30334/p2p/12D3KooWKDhSnpoeyRPRQSNwnB2k1C4WRa8h3BQh5s5mtF9MJdTN" + ] + }, "dsnBootstrapNodes": [ "/dns/bootstrap-0.taurus.subspace.network/tcp/30533/p2p/12D3KooWFL8f47BBWcvF7LxNGucJhXoS4j1aSddUNmY7haFR6eUk", "/dns/bootstrap-1.taurus.subspace.network/tcp/30533/p2p/12D3KooWCtNAJ9dB1CpBNN6HrjyhZNY1fXodkXz6qQWCfxaM7w4A" diff --git a/crates/sp-domains-fraud-proof/src/fraud_proof.rs b/crates/sp-domains-fraud-proof/src/fraud_proof.rs index c5fcbaec88..eb7a397e14 100644 --- a/crates/sp-domains-fraud-proof/src/fraud_proof.rs +++ b/crates/sp-domains-fraud-proof/src/fraud_proof.rs @@ -336,18 +336,23 @@ pub struct FraudProof { #[allow(clippy::large_enum_variant)] #[derive(Debug, Decode, Encode, TypeInfo, PartialEq, Eq, Clone)] pub enum FraudProofVariant { + #[codec(index = 0)] InvalidStateTransition(InvalidStateTransitionProof), + #[codec(index = 1)] ValidBundle(ValidBundleProof), + #[codec(index = 2)] InvalidExtrinsicsRoot(InvalidExtrinsicsRootProof), + #[codec(index = 3)] InvalidBundles(InvalidBundlesProof), + #[codec(index = 4)] InvalidDomainBlockHash(InvalidDomainBlockHashProof), + #[codec(index = 5)] InvalidBlockFees(InvalidBlockFeesProof), + #[codec(index = 6)] InvalidTransfers(InvalidTransfersProof), - // Dummy fraud proof only used in test and benchmark - // - // NOTE: the `Dummy` must be the last variant, because the `#[cfg(..)]` will apply to - // all the variants after it. + /// Dummy fraud proof only used in tests and benchmarks #[cfg(any(feature = "std", feature = "runtime-benchmarks"))] + #[codec(index = 100)] Dummy, } diff --git a/crates/sp-domains/src/lib.rs b/crates/sp-domains/src/lib.rs index e0d84ff5bc..3f169729b8 100644 --- a/crates/sp-domains/src/lib.rs +++ b/crates/sp-domains/src/lib.rs @@ -1456,14 +1456,6 @@ pub fn operator_block_fees_final_key() -> Vec { .to_vec() } -/// Preimage to verify the proof of ownership of Operator Signing key. -/// Operator owner is used to ensure the signature is used by anyone except -/// the owner of the Signing key pair. -#[derive(Debug, Encode)] -pub struct OperatorSigningKeyProofOfOwnershipData { - pub operator_owner: AccountId, -} - /// Hook to handle chain rewards. pub trait OnChainRewards { fn on_chain_rewards(chain_id: ChainId, reward: Balance); @@ -1540,9 +1532,6 @@ sp_api::decl_runtime_apis! { /// Returns the current epoch and the next epoch operators of the given domain fn domain_operators(domain_id: DomainId) -> Option<(BTreeMap, Vec)>; - /// Get operator id by signing key - fn operator_id_by_signing_key(signing_key: OperatorPublicKey) -> Option; - /// Returns the execution receipt hash of the given domain and domain block number fn receipt_hash(domain_id: DomainId, domain_number: HeaderNumberFor) -> Option>; diff --git a/crates/subspace-core-primitives/src/lib.rs b/crates/subspace-core-primitives/src/lib.rs index fa84ccadc6..0b9d0eb7ea 100644 --- a/crates/subspace-core-primitives/src/lib.rs +++ b/crates/subspace-core-primitives/src/lib.rs @@ -59,22 +59,16 @@ pub const REWARD_SIGNING_CONTEXT: &[u8] = b"subspace_reward"; /// Type of randomness. #[derive( - Debug, - Default, - Copy, - Clone, - Eq, - PartialEq, - From, - Into, - Deref, - Encode, - Decode, - TypeInfo, - MaxEncodedLen, + Default, Copy, Clone, Eq, PartialEq, From, Into, Deref, Encode, Decode, TypeInfo, MaxEncodedLen, )] pub struct Randomness([u8; Randomness::SIZE]); +impl fmt::Debug for Randomness { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -156,7 +150,6 @@ pub type BlockWeight = u128; /// A Ristretto Schnorr public key as bytes produced by `schnorrkel` crate. #[derive( - Debug, Default, Copy, Clone, @@ -174,6 +167,12 @@ pub type BlockWeight = u128; )] pub struct PublicKey([u8; PublicKey::SIZE]); +impl fmt::Debug for PublicKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -239,7 +238,6 @@ impl PublicKey { /// Single BLS12-381 scalar with big-endian representation, not guaranteed to be valid #[derive( - Debug, Default, Copy, Clone, @@ -262,6 +260,12 @@ impl PublicKey { #[cfg_attr(feature = "serde", serde(transparent))] pub struct ScalarBytes([u8; ScalarBytes::FULL_BYTES]); +impl fmt::Debug for ScalarBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + impl ScalarBytes { /// How many full bytes can be stored in BLS12-381 scalar (for instance before encoding). It is /// actually 254 bits, but bits are mut harder to work with and likely not worth it. diff --git a/crates/subspace-core-primitives/src/pieces.rs b/crates/subspace-core-primitives/src/pieces.rs index 5f2f6eb91d..e22a9349d2 100644 --- a/crates/subspace-core-primitives/src/pieces.rs +++ b/crates/subspace-core-primitives/src/pieces.rs @@ -252,10 +252,16 @@ impl PieceOffset { /// Raw record contained within recorded history segment before archiving is applied. /// /// NOTE: This is a stack-allocated data structure and can cause stack overflow! -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deref, DerefMut)] +#[derive(Copy, Clone, Eq, PartialEq, Deref, DerefMut)] #[repr(transparent)] pub struct RawRecord([[u8; ScalarBytes::SAFE_BYTES]; Self::NUM_CHUNKS]); +impl fmt::Debug for RawRecord { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0.as_flattened())) + } +} + impl Default for RawRecord { #[inline] fn default() -> Self { @@ -407,10 +413,16 @@ impl RawRecord { /// Record contained within a piece. /// /// NOTE: This is a stack-allocated data structure and can cause stack overflow! -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deref, DerefMut)] +#[derive(Copy, Clone, Eq, PartialEq, Deref, DerefMut)] #[repr(transparent)] pub struct Record([[u8; ScalarBytes::FULL_BYTES]; Self::NUM_CHUNKS]); +impl fmt::Debug for Record { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0.as_flattened())) + } +} + impl Default for Record { #[inline] fn default() -> Self { @@ -601,7 +613,6 @@ impl Record { /// Record commitment contained within a piece. #[derive( - Debug, Copy, Clone, Eq, @@ -618,6 +629,12 @@ impl Record { )] pub struct RecordCommitment([u8; RecordCommitment::SIZE]); +impl fmt::Debug for RecordCommitment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -731,7 +748,6 @@ impl RecordCommitment { /// Record witness contained within a piece. #[derive( - Debug, Copy, Clone, Eq, @@ -748,6 +764,12 @@ impl RecordCommitment { )] pub struct RecordWitness([u8; RecordWitness::SIZE]); +impl fmt::Debug for RecordWitness { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -859,12 +881,17 @@ impl RecordWitness { pub const SIZE: usize = 48; } -#[derive(Debug)] enum CowBytes { Shared(Bytes), Owned(BytesMut), } +impl fmt::Debug for CowBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.as_ref())) + } +} + impl PartialEq for CowBytes { fn eq(&self, other: &Self) -> bool { self.as_ref().eq(other.as_ref()) @@ -1162,12 +1189,16 @@ impl Piece { /// Internally piece contains a record and corresponding witness that together with segment /// commitment of the segment this piece belongs to can be used to verify that a piece belongs to /// the actual archival history of the blockchain. -#[derive( - Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deref, DerefMut, AsRef, AsMut, -)] +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deref, DerefMut, AsRef, AsMut)] #[repr(transparent)] pub struct PieceArray([u8; Piece::SIZE]); +impl fmt::Debug for PieceArray { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + impl Default for PieceArray { #[inline] fn default() -> Self { diff --git a/crates/subspace-core-primitives/src/pos.rs b/crates/subspace-core-primitives/src/pos.rs index 691003ede8..e7b9567ecb 100644 --- a/crates/subspace-core-primitives/src/pos.rs +++ b/crates/subspace-core-primitives/src/pos.rs @@ -1,6 +1,7 @@ //! Proof of space-related data structures. use crate::hashes::{blake3_hash, Blake3Hash}; +use core::fmt; use derive_more::{Deref, DerefMut, From, Into}; use parity_scale_codec::{Decode, Encode, MaxEncodedLen}; use scale_info::TypeInfo; @@ -12,9 +13,15 @@ use serde::{Deserializer, Serializer}; use serde_big_array::BigArray; /// Proof of space seed. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deref, From, Into)] +#[derive(Copy, Clone, Eq, PartialEq, Deref, From, Into)] pub struct PosSeed([u8; PosSeed::SIZE]); +impl fmt::Debug for PosSeed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + impl PosSeed { /// Size of proof of space seed in bytes. pub const SIZE: usize = 32; @@ -22,22 +29,16 @@ impl PosSeed { /// Proof of space proof bytes. #[derive( - Debug, - Copy, - Clone, - Eq, - PartialEq, - Deref, - DerefMut, - From, - Into, - Encode, - Decode, - TypeInfo, - MaxEncodedLen, + Copy, Clone, Eq, PartialEq, Deref, DerefMut, From, Into, Encode, Decode, TypeInfo, MaxEncodedLen, )] pub struct PosProof([u8; PosProof::SIZE]); +impl fmt::Debug for PosProof { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] diff --git a/crates/subspace-core-primitives/src/pot.rs b/crates/subspace-core-primitives/src/pot.rs index cdbe703d4b..c6020d66b4 100644 --- a/crates/subspace-core-primitives/src/pot.rs +++ b/crates/subspace-core-primitives/src/pot.rs @@ -15,7 +15,6 @@ use serde::{Deserializer, Serializer}; /// Proof of time key(input to the encryption). #[derive( - Debug, Default, Copy, Clone, @@ -33,6 +32,12 @@ use serde::{Deserializer, Serializer}; )] pub struct PotKey([u8; Self::SIZE]); +impl fmt::Debug for PotKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -98,7 +103,6 @@ impl PotKey { /// Proof of time seed #[derive( - Debug, Default, Copy, Clone, @@ -117,6 +121,12 @@ impl PotKey { )] pub struct PotSeed([u8; Self::SIZE]); +impl fmt::Debug for PotSeed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -187,7 +197,6 @@ impl PotSeed { /// Proof of time output, can be intermediate checkpoint or final slot output #[derive( - Debug, Default, Copy, Clone, @@ -206,6 +215,12 @@ impl PotSeed { )] pub struct PotOutput([u8; Self::SIZE]); +impl fmt::Debug for PotOutput { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] diff --git a/crates/subspace-core-primitives/src/segments.rs b/crates/subspace-core-primitives/src/segments.rs index c259b59ad9..274a4d587c 100644 --- a/crates/subspace-core-primitives/src/segments.rs +++ b/crates/subspace-core-primitives/src/segments.rs @@ -9,6 +9,7 @@ use crate::BlockNumber; #[cfg(not(feature = "std"))] use alloc::boxed::Box; use core::array::TryFromSliceError; +use core::fmt; use core::iter::Step; use core::num::NonZeroU64; use derive_more::{ @@ -134,7 +135,6 @@ impl SegmentIndex { /// Segment commitment contained within segment header. #[derive( - Debug, Copy, Clone, Eq, @@ -152,6 +152,12 @@ impl SegmentIndex { #[repr(transparent)] pub struct SegmentCommitment([u8; SegmentCommitment::SIZE]); +impl fmt::Debug for SegmentCommitment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -422,10 +428,17 @@ impl SegmentHeader { /// Recorded history segment before archiving is applied. /// /// NOTE: This is a stack-allocated data structure and can cause stack overflow! -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deref, DerefMut)] +#[derive(Copy, Clone, Eq, PartialEq, Deref, DerefMut)] #[repr(transparent)] pub struct RecordedHistorySegment([RawRecord; Self::NUM_RAW_RECORDS]); +impl fmt::Debug for RecordedHistorySegment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RecordedHistorySegment") + .finish_non_exhaustive() + } +} + impl Default for RecordedHistorySegment { #[inline] fn default() -> Self { diff --git a/crates/subspace-core-primitives/src/solutions.rs b/crates/subspace-core-primitives/src/solutions.rs index 21d4a8465e..aac35f5a4d 100644 --- a/crates/subspace-core-primitives/src/solutions.rs +++ b/crates/subspace-core-primitives/src/solutions.rs @@ -6,6 +6,7 @@ use crate::sectors::SectorIndex; use crate::segments::{HistorySize, SegmentIndex}; use crate::{PublicKey, ScalarBytes}; use core::array::TryFromSliceError; +use core::fmt; use derive_more::{AsMut, AsRef, Deref, DerefMut, From, Into}; use num_traits::WrappingSub; use parity_scale_codec::{Decode, Encode, MaxEncodedLen}; @@ -64,23 +65,16 @@ const_assert!(solution_range_to_pieces(pieces_to_solution_range(5, (1, 6)), (1, /// A Ristretto Schnorr signature as bytes produced by `schnorrkel` crate. #[derive( - Debug, - Copy, - Clone, - PartialEq, - Eq, - Ord, - PartialOrd, - Hash, - Encode, - Decode, - TypeInfo, - Deref, - From, - Into, + Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, Encode, Decode, TypeInfo, Deref, From, Into, )] pub struct RewardSignature([u8; RewardSignature::SIZE]); +impl fmt::Debug for RewardSignature { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] @@ -135,7 +129,6 @@ impl RewardSignature { /// Witness for chunk contained within a record. #[derive( - Debug, Copy, Clone, Eq, @@ -153,6 +146,12 @@ impl RewardSignature { #[repr(transparent)] pub struct ChunkWitness([u8; ChunkWitness::SIZE]); +impl fmt::Debug for ChunkWitness { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", hex::encode(self.0)) + } +} + #[cfg(feature = "serde")] #[derive(Serialize, Deserialize)] #[serde(transparent)] diff --git a/crates/subspace-fake-runtime-api/src/lib.rs b/crates/subspace-fake-runtime-api/src/lib.rs index 3cb36c652a..30a2b1bad9 100644 --- a/crates/subspace-fake-runtime-api/src/lib.rs +++ b/crates/subspace-fake-runtime-api/src/lib.rs @@ -32,6 +32,7 @@ use sp_domains_fraud_proof::storage_proof::FraudProofStorageKeyRequest; use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, ChannelId, CrossDomainMessage, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_runtime::traits::{Block as BlockT, NumberFor}; use sp_runtime::transaction_validity::{TransactionSource, TransactionValidity}; use sp_runtime::{ApplyExtrinsicResult, ExtrinsicInclusionMode}; @@ -253,10 +254,6 @@ sp_api::impl_runtime_apis! { unreachable!() } - fn operator_id_by_signing_key(_signing_key: OperatorPublicKey) -> Option { - unreachable!() - } - fn receipt_hash(_domain_id: DomainId, _domain_number: DomainNumber) -> Option { unreachable!() } @@ -365,6 +362,14 @@ sp_api::impl_runtime_apis! { fn domain_chains_allowlist_update(_domain_id: DomainId) -> Option{ unreachable!() } + + fn xdm_id(_ext: &::Extrinsic) -> Option { + unreachable!() + } + + fn channel_nonce(_chain_id: ChainId, _channel_id: ChannelId) -> Option { + unreachable!() + } } impl sp_messenger::RelayerApi::Hash> for Runtime { diff --git a/crates/subspace-farmer-components/Cargo.toml b/crates/subspace-farmer-components/Cargo.toml index b5cab6e101..6e6e64de7b 100644 --- a/crates/subspace-farmer-components/Cargo.toml +++ b/crates/subspace-farmer-components/Cargo.toml @@ -18,7 +18,6 @@ bench = false [dependencies] anyhow = "1.0.89" async-lock = "3.4.0" -async-trait = "0.1.83" backoff = { version = "0.4.0", features = ["futures", "tokio"] } bitvec = "1.0.1" # TODO: Switch to fs4 once https://github.com/al8n/fs4-rs/issues/15 is resolved @@ -35,6 +34,7 @@ serde = { version = "1.0.110", features = ["derive"] } static_assertions = "1.1.0" subspace-archiving = { version = "0.1.0", path = "../subspace-archiving" } subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives" } +subspace-data-retrieval = { version = "0.1.0", path = "../../shared/subspace-data-retrieval" } subspace-erasure-coding = { version = "0.1.0", path = "../subspace-erasure-coding" } subspace-kzg = { version = "0.1.0", path = "../../shared/subspace-kzg" } subspace-proof-of-space = { version = "0.1.0", path = "../subspace-proof-of-space", features = ["parallel"] } diff --git a/crates/subspace-farmer-components/benches/auditing.rs b/crates/subspace-farmer-components/benches/auditing.rs index 42eb8adb1b..4fed809148 100644 --- a/crates/subspace-farmer-components/benches/auditing.rs +++ b/crates/subspace-farmer-components/benches/auditing.rs @@ -67,8 +67,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { .archived_segments .into_iter() .next() - .unwrap() - .pieces; + .unwrap(); let farmer_protocol_info = FarmerProtocolInfo { history_size: HistorySize::from(NonZeroU64::new(1).unwrap()), diff --git a/crates/subspace-farmer-components/benches/plotting.rs b/crates/subspace-farmer-components/benches/plotting.rs index b39e8aac28..0ce11286dc 100644 --- a/crates/subspace-farmer-components/benches/plotting.rs +++ b/crates/subspace-farmer-components/benches/plotting.rs @@ -55,8 +55,7 @@ fn criterion_benchmark(c: &mut Criterion) { .archived_segments .into_iter() .next() - .unwrap() - .pieces; + .unwrap(); let farmer_protocol_info = FarmerProtocolInfo { history_size: HistorySize::from(NonZeroU64::new(1).unwrap()), diff --git a/crates/subspace-farmer-components/benches/proving.rs b/crates/subspace-farmer-components/benches/proving.rs index 3d5bb3d6f1..edc4211eaa 100644 --- a/crates/subspace-farmer-components/benches/proving.rs +++ b/crates/subspace-farmer-components/benches/proving.rs @@ -75,8 +75,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { .archived_segments .into_iter() .next() - .unwrap() - .pieces; + .unwrap(); let farmer_protocol_info = FarmerProtocolInfo { history_size: HistorySize::from(NonZeroU64::new(1).unwrap()), diff --git a/crates/subspace-farmer-components/benches/reading.rs b/crates/subspace-farmer-components/benches/reading.rs index 3e2f6b1133..6a1a602a7c 100644 --- a/crates/subspace-farmer-components/benches/reading.rs +++ b/crates/subspace-farmer-components/benches/reading.rs @@ -66,8 +66,7 @@ pub fn criterion_benchmark(c: &mut Criterion) { .archived_segments .into_iter() .next() - .unwrap() - .pieces; + .unwrap(); let farmer_protocol_info = FarmerProtocolInfo { history_size: HistorySize::from(NonZeroU64::new(1).unwrap()), diff --git a/crates/subspace-farmer-components/src/lib.rs b/crates/subspace-farmer-components/src/lib.rs index 757c32b74b..9a2c5a15e8 100644 --- a/crates/subspace-farmer-components/src/lib.rs +++ b/crates/subspace-farmer-components/src/lib.rs @@ -24,88 +24,13 @@ pub mod sector; mod segment_reconstruction; use crate::file_ext::FileExt; -use async_trait::async_trait; -use futures::stream::FuturesUnordered; -use futures::Stream; use parity_scale_codec::{Decode, Encode}; use serde::{Deserialize, Serialize}; use static_assertions::const_assert; use std::fs::File; use std::future::Future; use std::io; -use std::sync::Arc; -use subspace_core_primitives::pieces::{Piece, PieceIndex}; -use subspace_core_primitives::segments::{ArchivedHistorySegment, HistorySize}; - -/// Trait representing a way to get pieces -#[async_trait] -pub trait PieceGetter { - /// Get piece by index - async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result>; - - /// Get pieces with provided indices. - /// - /// Number of elements in returned stream is the same as number of unique `piece_indices`. - async fn get_pieces<'a, PieceIndices>( - &'a self, - piece_indices: PieceIndices, - ) -> anyhow::Result< - Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a; -} - -#[async_trait] -impl PieceGetter for Arc -where - T: PieceGetter + Send + Sync, -{ - async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { - self.as_ref().get_piece(piece_index).await - } - - async fn get_pieces<'a, PieceIndices>( - &'a self, - piece_indices: PieceIndices, - ) -> anyhow::Result< - Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a, - { - self.as_ref().get_pieces(piece_indices).await - } -} - -#[async_trait] -impl PieceGetter for ArchivedHistorySegment { - async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { - let position = usize::try_from(u64::from(piece_index))?; - - Ok(self.pieces().nth(position)) - } - - async fn get_pieces<'a, PieceIndices>( - &'a self, - piece_indices: PieceIndices, - ) -> anyhow::Result< - Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a, - { - Ok(Box::new( - piece_indices - .into_iter() - .map(|piece_index| async move { - let result = self.get_piece(piece_index).await; - (piece_index, result) - }) - .collect::>(), - ) as Box<_>) - } -} +use subspace_core_primitives::segments::HistorySize; /// Enum to encapsulate the selection between [`ReadAtSync`] and [`ReadAtAsync]` variants #[derive(Debug, Copy, Clone)] diff --git a/crates/subspace-farmer-components/src/plotting.rs b/crates/subspace-farmer-components/src/plotting.rs index 69a5fa8af4..b408cf90b4 100644 --- a/crates/subspace-farmer-components/src/plotting.rs +++ b/crates/subspace-farmer-components/src/plotting.rs @@ -11,7 +11,7 @@ use crate::sector::{ SectorContentsMap, SectorMetadata, SectorMetadataChecksummed, }; use crate::segment_reconstruction::recover_missing_piece; -use crate::{FarmerProtocolInfo, PieceGetter}; +use crate::FarmerProtocolInfo; use async_lock::{Mutex as AsyncMutex, Semaphore}; use backoff::future::retry; use backoff::{Error as BackoffError, ExponentialBackoff}; @@ -31,6 +31,7 @@ use subspace_core_primitives::pos::PosSeed; use subspace_core_primitives::sectors::{SBucket, SectorId, SectorIndex}; use subspace_core_primitives::segments::HistorySize; use subspace_core_primitives::{PublicKey, ScalarBytes}; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_erasure_coding::ErasureCoding; use subspace_kzg::{Kzg, Scalar}; use subspace_proof_of_space::{Table, TableGenerator}; diff --git a/crates/subspace-farmer-components/src/segment_reconstruction.rs b/crates/subspace-farmer-components/src/segment_reconstruction.rs index 2fd8f0bbc8..e2d6915320 100644 --- a/crates/subspace-farmer-components/src/segment_reconstruction.rs +++ b/crates/subspace-farmer-components/src/segment_reconstruction.rs @@ -1,28 +1,25 @@ -use crate::PieceGetter; -use futures::StreamExt; use subspace_archiving::piece_reconstructor::{PiecesReconstructor, ReconstructorError}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; -use subspace_core_primitives::segments::{ArchivedHistorySegment, RecordedHistorySegment}; +use subspace_data_retrieval::piece_getter::PieceGetter; +use subspace_data_retrieval::segment_downloading::{ + download_segment_pieces, SegmentDownloadingError, +}; use subspace_erasure_coding::ErasureCoding; use subspace_kzg::Kzg; use thiserror::Error; use tokio::task::JoinError; -use tracing::{debug, error, info}; +use tracing::{error, info}; #[derive(Debug, Error)] pub(crate) enum SegmentReconstructionError { - /// Not enough pieces to reconstruct a segment - #[error("Not enough pieces to reconstruct a segment")] - NotEnoughPiecesAcquired, + /// Segment downloading failed + #[error("Segment downloading failed: {0}")] + SegmentDownloadingFailed(#[from] SegmentDownloadingError), /// Internal piece retrieval process failed #[error("Piece reconstruction failed: {0}")] ReconstructionFailed(#[from] ReconstructorError), - /// Internal piece retrieval process failed - #[error("Pieces retrieval failed: {0}")] - PieceRetrievalFailed(#[from] anyhow::Error), - /// Join error #[error("Join error: {0}")] JoinError(#[from] JoinError), @@ -41,50 +38,7 @@ where let segment_index = missing_piece_index.segment_index(); let position = missing_piece_index.position(); - let required_pieces_number = RecordedHistorySegment::NUM_RAW_RECORDS; - let mut received_pieces = 0_usize; - - let mut segment_pieces = vec![None::; ArchivedHistorySegment::NUM_PIECES]; - - let mut pieces_iter = segment_index.segment_piece_indexes().into_iter(); - - // Download in batches until we get enough or exhaust available pieces - while !pieces_iter.is_empty() && received_pieces != required_pieces_number { - let piece_indices = pieces_iter - .by_ref() - .take(required_pieces_number - received_pieces); - - let mut received_segment_pieces = piece_getter.get_pieces(piece_indices).await?; - - while let Some((piece_index, result)) = received_segment_pieces.next().await { - match result { - Ok(Some(piece)) => { - received_pieces += 1; - segment_pieces - .get_mut(piece_index.position() as usize) - .expect("Piece position is by definition within segment; qed") - .replace(piece); - } - Ok(None) => { - debug!(%piece_index, "Piece was not found"); - } - Err(error) => { - debug!(%error, %piece_index, "Failed to get piece"); - } - } - } - } - - if received_pieces < required_pieces_number { - error!( - %missing_piece_index, - %received_pieces, - %required_pieces_number, - "Recovering missing piece failed." - ); - - return Err(SegmentReconstructionError::NotEnoughPiecesAcquired); - } + let segment_pieces = download_segment_pieces(segment_index, piece_getter).await?; let result = tokio::task::spawn_blocking(move || { let reconstructor = PiecesReconstructor::new(kzg, erasure_coding); diff --git a/crates/subspace-farmer/Cargo.toml b/crates/subspace-farmer/Cargo.toml index 766d38b23b..bff753a07f 100644 --- a/crates/subspace-farmer/Cargo.toml +++ b/crates/subspace-farmer/Cargo.toml @@ -51,10 +51,12 @@ serde = { version = "1.0.110", features = ["derive"] } serde_json = "1.0.128" static_assertions = "1.1.0" ss58-registry = "1.51.0" +subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives" } +subspace-data-retrieval = { version = "0.1.0", path = "../../shared/subspace-data-retrieval" } subspace-erasure-coding = { version = "0.1.0", path = "../subspace-erasure-coding" } subspace-farmer-components = { version = "0.1.0", path = "../subspace-farmer-components" } -subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives" } subspace-kzg = { version = "0.1.0", path = "../../shared/subspace-kzg" } +subspace-logging = { version = "0.0.1", path = "../../shared/subspace-logging", optional = true } subspace-metrics = { version = "0.1.0", path = "../../shared/subspace-metrics", optional = true } subspace-networking = { version = "0.1.0", path = "../subspace-networking" } subspace-proof-of-space = { version = "0.1.0", path = "../subspace-proof-of-space" } @@ -62,14 +64,12 @@ subspace-proof-of-space-gpu = { version = "0.1.0", path = "../../shared/subspace subspace-rpc-primitives = { version = "0.1.0", path = "../subspace-rpc-primitives" } subspace-verification = { version = "0.1.0", path = "../subspace-verification" } substrate-bip39 = "0.6.0" -supports-color = { version = "3.0.1", optional = true } tempfile = "3.13.0" thiserror = "2.0.0" thread-priority = "1.1.0" tokio = { version = "1.40.0", features = ["macros", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] } tokio-stream = { version = "0.1.16", features = ["sync"] } tracing = "0.1.40" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"], optional = true } ulid = { version = "1.1.3", features = ["serde"] } zeroize = "1.8.1" @@ -96,6 +96,5 @@ binary = [ "dep:fdlimit", "dep:mimalloc", "dep:subspace-metrics", - "dep:supports-color", - "dep:tracing-subscriber", + "dep:subspace-logging", ] diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster.rs index a672edb164..cfd520d053 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster.rs @@ -18,11 +18,14 @@ use prometheus_client::registry::Registry; use std::env::current_exe; use std::mem; use std::net::SocketAddr; +use std::time::Duration; use subspace_farmer::cluster::nats_client::NatsClient; use subspace_farmer::utils::AsyncJoinOnDrop; use subspace_metrics::{start_prometheus_metrics_server, RegistryAdapter}; use subspace_proof_of_space::Table; +const REQUEST_RETRY_MAX_ELAPSED_TIME: Duration = Duration::from_mins(1); + /// Arguments for cluster #[derive(Debug, Parser)] pub(crate) struct ClusterArgs { @@ -101,7 +104,7 @@ where let nats_client = NatsClient::new( nats_servers, ExponentialBackoff { - max_elapsed_time: None, + max_elapsed_time: Some(REQUEST_RETRY_MAX_ELAPSED_TIME), ..ExponentialBackoff::default() }, ) diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/controller.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/controller.rs index 6fa95bc119..3e47037161 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/controller.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/controller.rs @@ -3,7 +3,7 @@ use crate::commands::cluster::farmer::FARMER_IDENTIFICATION_BROADCAST_INTERVAL; use crate::commands::shared::derive_libp2p_keypair; use crate::commands::shared::network::{configure_network, NetworkArgs}; use anyhow::anyhow; -use async_lock::RwLock as AsyncRwLock; +use async_lock::{RwLock as AsyncRwLock, Semaphore}; use backoff::ExponentialBackoff; use clap::{Parser, ValueHint}; use futures::stream::FuturesUnordered; @@ -20,7 +20,7 @@ use subspace_farmer::cluster::controller::controller_service; use subspace_farmer::cluster::controller::farms::{maintain_farms, FarmIndex}; use subspace_farmer::cluster::nats_client::NatsClient; use subspace_farmer::farm::plotted_pieces::PlottedPieces; -use subspace_farmer::farmer_cache::FarmerCache; +use subspace_farmer::farmer_cache::{FarmerCache, FarmerCaches}; use subspace_farmer::farmer_piece_getter::piece_validator::SegmentCommitmentPieceValidator; use subspace_farmer::farmer_piece_getter::{DsnCacheRetryPolicy, FarmerPieceGetter}; use subspace_farmer::node_client::caching_proxy_node_client::CachingProxyNodeClient; @@ -30,7 +30,7 @@ use subspace_farmer::single_disk_farm::identity::Identity; use subspace_farmer::utils::{run_future_in_dedicated_thread, AsyncJoinOnDrop}; use subspace_kzg::Kzg; use subspace_networking::utils::piece_provider::PieceProvider; -use tracing::info; +use tracing::{info, info_span, Instrument}; /// Get piece retry attempts number. const PIECE_GETTER_MAX_RETRIES: u16 = 7; @@ -38,6 +38,8 @@ const PIECE_GETTER_MAX_RETRIES: u16 = 7; const GET_PIECE_INITIAL_INTERVAL: Duration = Duration::from_secs(5); /// Defines max duration between get_piece calls. const GET_PIECE_MAX_INTERVAL: Duration = Duration::from_secs(40); +/// Multiplier on top of outgoing connections number for piece downloading purposes +const PIECE_PROVIDER_MULTIPLIER: usize = 10; /// Arguments for controller #[derive(Debug, Parser)] @@ -55,7 +57,7 @@ pub(super) struct ControllerArgs { /// It is strongly recommended to use alphanumeric values for cache group, the same cache group /// must be also specified on corresponding caches. #[arg(long, default_value = "default")] - cache_group: String, + cache_groups: Vec, /// Number of service instances. /// /// Increasing number of services allows to process more concurrent requests, but increasing @@ -84,7 +86,7 @@ pub(super) async fn controller( let ControllerArgs { base_path, node_rpc_url, - cache_group, + cache_groups, service_instances, mut network_args, dev, @@ -128,8 +130,11 @@ pub(super) async fn controller( let peer_id = keypair.public().to_peer_id(); let instance = peer_id.to_string(); - let (farmer_cache, farmer_cache_worker) = - FarmerCache::new(node_client.clone(), peer_id, Some(registry)); + let (farmer_caches, farmer_cache_workers) = cache_groups + .iter() + .map(|_cache_group| FarmerCache::new(node_client.clone(), peer_id, Some(registry))) + .unzip::<_, _, Vec<_>, Vec<_>>(); + let farmer_caches = Arc::from(farmer_caches.into_boxed_slice()); // TODO: Metrics @@ -137,6 +142,7 @@ pub(super) async fn controller( .await .map_err(|error| anyhow!("Failed to create caching proxy node client: {error}"))?; + let out_connections = network_args.out_connections; let (node, mut node_runner) = { if network_args.bootstrap_nodes.is_empty() { network_args @@ -151,7 +157,7 @@ pub(super) async fn controller( network_args, Arc::downgrade(&plotted_pieces), node_client.clone(), - farmer_cache.clone(), + FarmerCaches::from(Arc::clone(&farmer_caches)), Some(registry), ) .map_err(|error| anyhow!("Failed to configure networking: {error}"))? @@ -161,11 +167,58 @@ pub(super) async fn controller( let piece_provider = PieceProvider::new( node.clone(), SegmentCommitmentPieceValidator::new(node.clone(), node_client.clone(), kzg.clone()), + Arc::new(Semaphore::new( + out_connections as usize * PIECE_PROVIDER_MULTIPLIER, + )), ); + let farmer_cache_workers_fut = farmer_cache_workers + .into_iter() + .zip(&cache_groups) + .enumerate() + .map(|(index, (farmer_cache_worker, cache_group))| { + // Each farmer cache worker gets a customized piece getter that can leverage other + // caches than itself for sync purposes + let piece_getter = FarmerPieceGetter::new( + piece_provider.clone(), + FarmerCaches::from(Arc::from( + farmer_caches + .iter() + .enumerate() + .filter(|&(filter_index, _farmer_cache)| filter_index != index) + .map(|(_filter_index, farmer_cache)| farmer_cache.clone()) + .collect::>(), + )), + node_client.clone(), + Arc::clone(&plotted_pieces), + DsnCacheRetryPolicy { + max_retries: PIECE_GETTER_MAX_RETRIES, + backoff: ExponentialBackoff { + initial_interval: GET_PIECE_INITIAL_INTERVAL, + max_interval: GET_PIECE_MAX_INTERVAL, + // Try until we get a valid piece + max_elapsed_time: None, + multiplier: 1.75, + ..ExponentialBackoff::default() + }, + }, + ); + + let fut = farmer_cache_worker + .run(piece_getter.downgrade()) + .instrument(info_span!("", %cache_group)); + + async move { + let fut = + run_future_in_dedicated_thread(move || fut, format!("cache-worker-{index}")); + anyhow::Ok(fut?.await?) + } + }) + .collect::>(); + let piece_getter = FarmerPieceGetter::new( piece_provider, - farmer_cache.clone(), + FarmerCaches::from(Arc::clone(&farmer_caches)), node_client.clone(), Arc::clone(&plotted_pieces), DsnCacheRetryPolicy { @@ -181,30 +234,28 @@ pub(super) async fn controller( }, ); - let farmer_cache_worker_fut = run_future_in_dedicated_thread( - { - let future = farmer_cache_worker.run(piece_getter.downgrade()); - - move || future - }, - "controller-cache-worker".to_string(), - )?; - let mut controller_services = (0..service_instances.get()) .map(|index| { let nats_client = nats_client.clone(); let node_client = node_client.clone(); let piece_getter = piece_getter.clone(); - let farmer_cache = farmer_cache.clone(); + let farmer_caches = Arc::clone(&farmer_caches); + let cache_groups = cache_groups.clone(); let instance = instance.clone(); AsyncJoinOnDrop::new( tokio::spawn(async move { + let farmer_caches = cache_groups + .iter() + .zip(farmer_caches.as_ref()) + .map(|(cache_group, farmer_cache)| (cache_group.as_str(), farmer_cache)) + .collect::>(); + controller_service( &nats_client, &node_client, &piece_getter, - &farmer_cache, + &farmer_caches, &instance, index == 0, ) @@ -238,39 +289,56 @@ pub(super) async fn controller( .await } }, - "controller-farms".to_string(), + "farms".to_string(), )?; - let caches_fut = run_future_in_dedicated_thread( - move || async move { - maintain_caches( - &cache_group, - &nats_client, - farmer_cache, - CACHE_IDENTIFICATION_BROADCAST_INTERVAL, - ) - .await - }, - "controller-caches".to_string(), - )?; + let caches_fut = farmer_caches + .iter() + .cloned() + .zip(cache_groups) + .enumerate() + .map(|(index, (farmer_cache, cache_group))| { + let nats_client = nats_client.clone(); + + async move { + let fut = run_future_in_dedicated_thread( + move || async move { + maintain_caches( + &cache_group, + &nats_client, + &farmer_cache, + CACHE_IDENTIFICATION_BROADCAST_INTERVAL, + ) + .await + }, + format!("caches-{index}"), + ); + anyhow::Ok(fut?.await?) + } + }) + .collect::>(); let networking_fut = run_future_in_dedicated_thread( move || async move { node_runner.run().await }, - "controller-networking".to_string(), + "networking".to_string(), )?; Ok(Box::pin(async move { // This defines order in which things are dropped let networking_fut = networking_fut; let farms_fut = farms_fut; - let caches_fut = caches_fut; - let farmer_cache_worker_fut = farmer_cache_worker_fut; + let mut caches_fut = caches_fut; + let caches_fut = caches_fut.next().map(|result| result.unwrap_or(Ok(Ok(())))); + let mut farmer_cache_workers_fut = farmer_cache_workers_fut; + let farmer_cache_workers_fut = farmer_cache_workers_fut + .next() + .map(|result| result.unwrap_or(Ok(()))); let controller_service_fut = controller_service_fut; let networking_fut = pin!(networking_fut); let farms_fut = pin!(farms_fut); let caches_fut = pin!(caches_fut); - let farmer_cache_worker_fut = pin!(farmer_cache_worker_fut); + let farmer_cache_workers_fut = pin!(farmer_cache_workers_fut); let controller_service_fut = pin!(controller_service_fut); select! { @@ -290,7 +358,7 @@ pub(super) async fn controller( }, // Piece cache worker future - _ = farmer_cache_worker_fut.fuse() => { + _ = farmer_cache_workers_fut.fuse() => { info!("Farmer cache worker exited.") }, diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/farmer.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/farmer.rs index 506c864583..4d7aae6d1e 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/farmer.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/farmer.rs @@ -36,7 +36,6 @@ use subspace_farmer::utils::{ use subspace_farmer_components::reading::ReadSectorRecordChunksMode; use subspace_kzg::Kzg; use subspace_proof_of_space::Table; -use tokio::sync::{Barrier, Semaphore}; use tracing::{error, info, info_span, warn, Instrument}; const FARM_ERROR_PRINT_INTERVAL: Duration = Duration::from_secs(30); @@ -59,8 +58,7 @@ pub(super) struct FarmerArgs { /// `size` is max allocated size in human-readable format (e.g. 10GB, 2TiB) or just bytes that /// farmer will make sure to not exceed (and will pre-allocated all the space on startup to /// ensure it will not run out of space in runtime). Optionally, `record-chunks-mode` can be - /// set to `ConcurrentChunks` or `WholeSector` in order to avoid internal benchmarking during - /// startup. + /// set to `ConcurrentChunks` (default) or `WholeSector`. disk_farms: Vec, /// Address for farming rewards #[arg(long, value_parser = parse_ss58_reward_address)] @@ -256,9 +254,6 @@ where let farms = { let node_client = node_client.clone(); let info_mutex = &AsyncMutex::new(()); - let faster_read_sector_record_chunks_mode_barrier = - Arc::new(Barrier::new(disk_farms.len())); - let faster_read_sector_record_chunks_mode_concurrency = Arc::new(Semaphore::new(1)); let registry = &Mutex::new(registry); let mut farms = Vec::with_capacity(disk_farms.len()); @@ -272,10 +267,6 @@ where let erasure_coding = erasure_coding.clone(); let plotter = Arc::clone(&plotter); let global_mutex = Arc::clone(&global_mutex); - let faster_read_sector_record_chunks_mode_barrier = - Arc::clone(&faster_read_sector_record_chunks_mode_barrier); - let faster_read_sector_record_chunks_mode_concurrency = - Arc::clone(&faster_read_sector_record_chunks_mode_concurrency); async move { let farm_fut = SingleDiskFarm::new::<_, PosTable>( @@ -297,9 +288,8 @@ where max_plotting_sectors_per_farm, disable_farm_locking, read_sector_record_chunks_mode: disk_farm - .read_sector_record_chunks_mode, - faster_read_sector_record_chunks_mode_barrier, - faster_read_sector_record_chunks_mode_concurrency, + .read_sector_record_chunks_mode + .unwrap_or(ReadSectorRecordChunksMode::ConcurrentChunks), registry: Some(registry), create, }, diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs index 49af09578d..a62d0430a5 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs @@ -1,6 +1,6 @@ use crate::commands::shared::PlottingThreadPriority; use anyhow::anyhow; -use async_lock::Mutex as AsyncMutex; +use async_lock::{Mutex as AsyncMutex, Semaphore}; use clap::Parser; use prometheus_client::registry::Registry; use std::future::Future; @@ -9,6 +9,7 @@ use std::pin::Pin; use std::sync::Arc; use std::time::Duration; use subspace_core_primitives::pieces::Record; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_erasure_coding::ErasureCoding; use subspace_farmer::cluster::controller::ClusterPieceGetter; use subspace_farmer::cluster::nats_client::NatsClient; @@ -25,10 +26,8 @@ use subspace_farmer::plotter::Plotter; use subspace_farmer::utils::{ create_plotting_thread_pool_manager, parse_cpu_cores_sets, thread_pool_core_indices, }; -use subspace_farmer_components::PieceGetter; use subspace_kzg::Kzg; use subspace_proof_of_space::Table; -use tokio::sync::Semaphore; use tracing::info; const PLOTTING_RETRY_INTERVAL: Duration = Duration::from_secs(5); @@ -134,6 +133,9 @@ pub(super) struct PlotterArgs { #[cfg(feature = "rocm")] #[clap(flatten)] rocm_plotting_options: RocmPlottingOptions, + /// Cache group to use if specified, otherwise all caches are usable by this plotter + #[arg(long)] + cache_group: Option, /// Additional cluster components #[clap(raw = true)] pub(super) additional_components: Vec, @@ -153,6 +155,7 @@ where cuda_plotting_options, #[cfg(feature = "rocm")] rocm_plotting_options, + cache_group, additional_components: _, } = plotter_args; @@ -162,7 +165,7 @@ where .expect("Not zero; qed"), ) .map_err(|error| anyhow!("Failed to instantiate erasure coding: {error}"))?; - let piece_getter = ClusterPieceGetter::new(nats_client.clone()); + let piece_getter = ClusterPieceGetter::new(nats_client.clone(), cache_group); let global_mutex = Arc::default(); @@ -398,7 +401,10 @@ where cuda_devices .into_iter() .map(|cuda_device| CudaRecordsEncoder::new(cuda_device, Arc::clone(&global_mutex))) - .collect(), + .collect::>() + .map_err(|error| { + anyhow::anyhow!("Failed to create CUDA records encoder: {error}") + })?, global_mutex, kzg, erasure_coding, @@ -477,7 +483,10 @@ where rocm_devices .into_iter() .map(|rocm_device| RocmRecordsEncoder::new(rocm_device, Arc::clone(&global_mutex))) - .collect(), + .collect::>() + .map_err(|error| { + anyhow::anyhow!("Failed to create ROCm records encoder: {error}") + })?, global_mutex, kzg, erasure_coding, diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs index a6736ed127..cb7b418929 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs @@ -2,7 +2,7 @@ use crate::commands::shared::network::{configure_network, NetworkArgs}; use crate::commands::shared::{derive_libp2p_keypair, DiskFarm, PlottingThreadPriority}; use crate::utils::shutdown_signal; use anyhow::anyhow; -use async_lock::{Mutex as AsyncMutex, RwLock as AsyncRwLock}; +use async_lock::{Mutex as AsyncMutex, RwLock as AsyncRwLock, Semaphore}; use backoff::ExponentialBackoff; use bytesize::ByteSize; use clap::{Parser, ValueHint}; @@ -20,10 +20,11 @@ use std::sync::Arc; use std::time::Duration; use subspace_core_primitives::pieces::Record; use subspace_core_primitives::PublicKey; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_erasure_coding::ErasureCoding; use subspace_farmer::farm::plotted_pieces::PlottedPieces; use subspace_farmer::farm::{PlottedSectors, SectorPlottingDetails, SectorUpdate}; -use subspace_farmer::farmer_cache::FarmerCache; +use subspace_farmer::farmer_cache::{FarmerCache, FarmerCaches}; use subspace_farmer::farmer_piece_getter::piece_validator::SegmentCommitmentPieceValidator; use subspace_farmer::farmer_piece_getter::{DsnCacheRetryPolicy, FarmerPieceGetter}; use subspace_farmer::node_client::caching_proxy_node_client::CachingProxyNodeClient; @@ -49,12 +50,10 @@ use subspace_farmer::utils::{ thread_pool_core_indices, AsyncJoinOnDrop, }; use subspace_farmer_components::reading::ReadSectorRecordChunksMode; -use subspace_farmer_components::PieceGetter; use subspace_kzg::Kzg; use subspace_metrics::{start_prometheus_metrics_server, RegistryAdapter}; use subspace_networking::utils::piece_provider::PieceProvider; use subspace_proof_of_space::Table; -use tokio::sync::{Barrier, Semaphore}; use tracing::{error, info, info_span, warn, Instrument}; /// Get piece retry attempts number. @@ -68,6 +67,8 @@ const GET_PIECE_MAX_INTERVAL: Duration = Duration::from_secs(40); const MAX_SPACE_PLEDGED_FOR_PLOT_CACHE_ON_WINDOWS: u64 = 7 * 1024 * 1024 * 1024 * 1024; const FARM_ERROR_PRINT_INTERVAL: Duration = Duration::from_secs(30); const PLOTTING_RETRY_INTERVAL: Duration = Duration::from_secs(5); +/// Multiplier on top of outgoing connections number for piece downloading purposes +const PIECE_PROVIDER_MULTIPLIER: usize = 10; type FarmIndex = u8; @@ -196,8 +197,7 @@ pub(crate) struct FarmingArgs { /// `size` is max allocated size in human-readable format (e.g. 10GB, 2TiB) or just bytes that /// farmer will make sure to not exceed (and will pre-allocated all the space on startup to /// ensure it will not run out of space in runtime). Optionally, `record-chunks-mode` can be - /// set to `ConcurrentChunks` or `WholeSector` in order to avoid internal benchmarking during - /// startup. + /// set to `ConcurrentChunks` (default) or `WholeSector`. disk_farms: Vec, /// WebSocket RPC URL of the Subspace node to connect to #[arg(long, value_hint = ValueHint::Url, default_value = "ws://127.0.0.1:9944")] @@ -426,11 +426,13 @@ where let (farmer_cache, farmer_cache_worker) = FarmerCache::new(node_client.clone(), peer_id, Some(&mut registry)); + let farmer_caches = FarmerCaches::from(farmer_cache.clone()); let node_client = CachingProxyNodeClient::new(node_client) .await .map_err(|error| anyhow!("Failed to create caching proxy node client: {error}"))?; + let out_connections = network_args.out_connections; let (node, mut node_runner) = { if network_args.bootstrap_nodes.is_empty() { network_args @@ -445,7 +447,7 @@ where network_args, Arc::downgrade(&plotted_pieces), node_client.clone(), - farmer_cache.clone(), + farmer_caches.clone(), should_start_prometheus_server.then_some(&mut registry), ) .map_err(|error| anyhow!("Failed to configure networking: {error}"))? @@ -460,11 +462,14 @@ where let piece_provider = PieceProvider::new( node.clone(), SegmentCommitmentPieceValidator::new(node.clone(), node_client.clone(), kzg.clone()), + Arc::new(Semaphore::new( + out_connections as usize * PIECE_PROVIDER_MULTIPLIER, + )), ); let piece_getter = FarmerPieceGetter::new( piece_provider, - farmer_cache.clone(), + farmer_caches, node_client.clone(), Arc::clone(&plotted_pieces), DsnCacheRetryPolicy { @@ -567,9 +572,6 @@ where let (farms, plotting_delay_senders) = { let info_mutex = &AsyncMutex::new(()); - let faster_read_sector_record_chunks_mode_barrier = - Arc::new(Barrier::new(disk_farms.len())); - let faster_read_sector_record_chunks_mode_concurrency = Arc::new(Semaphore::new(1)); let (plotting_delay_senders, plotting_delay_receivers) = (0..disk_farms.len()) .map(|_| oneshot::channel()) .unzip::<_, _, Vec<_>, Vec<_>>(); @@ -587,10 +589,6 @@ where let erasure_coding = erasure_coding.clone(); let plotter = Arc::clone(&plotter); let global_mutex = Arc::clone(&global_mutex); - let faster_read_sector_record_chunks_mode_barrier = - Arc::clone(&faster_read_sector_record_chunks_mode_barrier); - let faster_read_sector_record_chunks_mode_concurrency = - Arc::clone(&faster_read_sector_record_chunks_mode_concurrency); async move { let farm_fut = SingleDiskFarm::new::<_, PosTable>( @@ -611,9 +609,8 @@ where max_plotting_sectors_per_farm, disable_farm_locking, read_sector_record_chunks_mode: disk_farm - .read_sector_record_chunks_mode, - faster_read_sector_record_chunks_mode_barrier, - faster_read_sector_record_chunks_mode_concurrency, + .read_sector_record_chunks_mode + .unwrap_or(ReadSectorRecordChunksMode::ConcurrentChunks), registry: Some(registry), create, }, @@ -1072,7 +1069,10 @@ where cuda_devices .into_iter() .map(|cuda_device| CudaRecordsEncoder::new(cuda_device, Arc::clone(&global_mutex))) - .collect(), + .collect::>() + .map_err(|error| { + anyhow::anyhow!("Failed to create CUDA records encoder: {error}") + })?, global_mutex, kzg, erasure_coding, @@ -1151,7 +1151,10 @@ where rocm_devices .into_iter() .map(|rocm_device| RocmRecordsEncoder::new(rocm_device, Arc::clone(&global_mutex))) - .collect(), + .collect::>() + .map_err(|error| { + anyhow::anyhow!("Failed to create ROCm records encoder: {error}") + })?, global_mutex, kzg, erasure_coding, diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/shared/network.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/shared/network.rs index 78815f6839..158a8edd31 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/shared/network.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/shared/network.rs @@ -9,7 +9,7 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::path::Path; use std::sync::{Arc, Weak}; use subspace_farmer::farm::plotted_pieces::PlottedPieces; -use subspace_farmer::farmer_cache::FarmerCache; +use subspace_farmer::farmer_cache::FarmerCaches; use subspace_farmer::node_client::NodeClientExt; use subspace_farmer::KNOWN_PEERS_CACHE_SIZE; use subspace_networking::libp2p::identity::Keypair; @@ -96,9 +96,9 @@ pub(in super::super) fn configure_network( }: NetworkArgs, weak_plotted_pieces: Weak>>, node_client: NC, - farmer_cache: FarmerCache, + farmer_caches: FarmerCaches, prometheus_metrics_registry: Option<&mut Registry>, -) -> Result<(Node, NodeRunner), anyhow::Error> +) -> Result<(Node, NodeRunner), anyhow::Error> where FarmIndex: Hash + Eq + Copy + fmt::Debug + Send + Sync + 'static, usize: From, @@ -119,7 +119,7 @@ where let default_config = Config::new( protocol_prefix, keypair, - farmer_cache.clone(), + farmer_caches.clone(), prometheus_metrics_registry, ); let config = Config { @@ -130,7 +130,7 @@ where request_response_protocols: vec![ { let maybe_weak_node = Arc::clone(&maybe_weak_node); - let farmer_cache = farmer_cache.clone(); + let farmer_caches = farmer_caches.clone(); CachedPieceByIndexRequestHandler::create(move |peer_id, request| { let CachedPieceByIndexRequest { @@ -140,14 +140,14 @@ where debug!(?piece_index, "Cached piece request received"); let maybe_weak_node = Arc::clone(&maybe_weak_node); - let farmer_cache = farmer_cache.clone(); + let farmer_caches = farmer_caches.clone(); let mut cached_pieces = Arc::unwrap_or_clone(cached_pieces); async move { let piece_from_cache = - farmer_cache.get_piece(piece_index.to_multihash()).await; + farmer_caches.get_piece(piece_index.to_multihash()).await; cached_pieces.truncate(CachedPieceByIndexRequest::RECOMMENDED_LIMIT); - let cached_pieces = farmer_cache.has_pieces(cached_pieces).await; + let cached_pieces = farmer_caches.has_pieces(cached_pieces).await; Some(CachedPieceByIndexResponse { result: match piece_from_cache { @@ -190,13 +190,14 @@ where debug!(?piece_index, "Piece request received. Trying cache..."); let weak_plotted_pieces = weak_plotted_pieces.clone(); - let farmer_cache = farmer_cache.clone(); + let farmer_caches = farmer_caches.clone(); let mut cached_pieces = Arc::unwrap_or_clone(cached_pieces); async move { - let piece_from_cache = farmer_cache.get_piece(piece_index.to_multihash()).await; + let piece_from_cache = + farmer_caches.get_piece(piece_index.to_multihash()).await; cached_pieces.truncate(PieceByIndexRequest::RECOMMENDED_LIMIT); - let cached_pieces = farmer_cache.has_pieces(cached_pieces).await; + let cached_pieces = farmer_caches.has_pieces(cached_pieces).await; if let Some(piece) = piece_from_cache { Some(PieceByIndexResponse { diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/main.rs b/crates/subspace-farmer/src/bin/subspace-farmer/main.rs index 440309fb91..7c9634f9e0 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/main.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/main.rs @@ -8,11 +8,9 @@ use std::path::PathBuf; use std::process::exit; use std::{fs, panic}; use subspace_farmer::single_disk_farm::{ScrubTarget, SingleDiskFarm}; +use subspace_logging::init_logger; use subspace_proof_of_space::chia::ChiaTable; use tracing::info; -use tracing_subscriber::filter::LevelFilter; -use tracing_subscriber::prelude::*; -use tracing_subscriber::{fmt, EnvFilter}; #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -77,23 +75,7 @@ async fn main() -> anyhow::Result<()> { exit(1); })); - tracing_subscriber::registry() - .with( - fmt::layer() - // TODO: Workaround for https://github.com/tokio-rs/tracing/issues/2214, also on - // Windows terminal doesn't support the same colors as bash does - .with_ansi(if cfg!(windows) { - false - } else { - supports_color::on(supports_color::Stream::Stderr).is_some() - }) - .with_filter( - EnvFilter::builder() - .with_default_directive(LevelFilter::INFO.into()) - .from_env_lossy(), - ), - ) - .init(); + init_logger(); utils::raise_fd_limit(); let command = Command::parse(); diff --git a/crates/subspace-farmer/src/cluster/controller.rs b/crates/subspace-farmer/src/cluster/controller.rs index fba5e02563..0c8eefa702 100644 --- a/crates/subspace-farmer/src/cluster/controller.rs +++ b/crates/subspace-farmer/src/cluster/controller.rs @@ -25,18 +25,23 @@ use futures::stream::FuturesUnordered; use futures::{select, stream, FutureExt, Stream, StreamExt}; use parity_scale_codec::{Decode, Encode}; use parking_lot::Mutex; +use rand::prelude::*; use std::collections::{HashMap, HashSet}; use std::pin::Pin; use std::sync::Arc; use std::task::Poll; use subspace_core_primitives::pieces::{Piece, PieceIndex}; use subspace_core_primitives::segments::{SegmentHeader, SegmentIndex}; -use subspace_farmer_components::PieceGetter; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_rpc_primitives::{ FarmerAppInfo, RewardSignatureResponse, RewardSigningInfo, SlotInfo, SolutionResponse, }; use tracing::{debug, error, trace, warn}; +/// Special "cache group" that all controllers subscribe to and that can be used to query any cache +/// group. The cache group for each query is chosen at random. +const GLOBAL_CACHE_GROUP: &str = "_"; + /// Broadcast sent by controllers requesting farmers to identify themselves #[derive(Debug, Copy, Clone, Encode, Decode)] pub struct ClusterControllerFarmerIdentifyBroadcast; @@ -152,7 +157,7 @@ struct ClusterControllerFindPieceInCacheRequest { } impl GenericRequest for ClusterControllerFindPieceInCacheRequest { - const SUBJECT: &'static str = "subspace.controller.find-piece-in-cache"; + const SUBJECT: &'static str = "subspace.controller.*.find-piece-in-cache"; type Response = Option<(PieceCacheId, PieceCacheOffset)>; } @@ -163,7 +168,7 @@ struct ClusterControllerFindPiecesInCacheRequest { } impl GenericStreamRequest for ClusterControllerFindPiecesInCacheRequest { - const SUBJECT: &'static str = "subspace.controller.find-pieces-in-cache"; + const SUBJECT: &'static str = "subspace.controller.*.find-pieces-in-cache"; /// Only pieces that were found are returned type Response = (PieceIndex, PieceCacheId, PieceCacheOffset); } @@ -195,6 +200,7 @@ impl GenericStreamRequest for ClusterControllerPiecesRequest { #[derive(Debug, Clone)] pub struct ClusterPieceGetter { nats_client: NatsClient, + cache_group: String, } #[async_trait] @@ -204,7 +210,7 @@ impl PieceGetter for ClusterPieceGetter { .nats_client .request( &ClusterControllerFindPieceInCacheRequest { piece_index }, - None, + Some(&self.cache_group), ) .await? { @@ -274,18 +280,14 @@ impl PieceGetter for ClusterPieceGetter { .await?) } - async fn get_pieces<'a, PieceIndices>( + async fn get_pieces<'a>( &'a self, - piece_indices: PieceIndices, + piece_indices: Vec, ) -> anyhow::Result< Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a, - { + > { let (tx, mut rx) = mpsc::unbounded(); - let piece_indices = piece_indices.into_iter().collect::>(); let piece_indices_to_get = Mutex::new(piece_indices.iter().copied().collect::>()); @@ -296,7 +298,7 @@ impl PieceGetter for ClusterPieceGetter { .nats_client .stream_request( &ClusterControllerFindPiecesInCacheRequest { piece_indices }, - None, + Some(&self.cache_group), ) .await?; @@ -432,8 +434,11 @@ impl PieceGetter for ClusterPieceGetter { impl ClusterPieceGetter { /// Create new instance #[inline] - pub fn new(nats_client: NatsClient) -> Self { - Self { nats_client } + pub fn new(nats_client: NatsClient, cache_group: Option) -> Self { + Self { + nats_client, + cache_group: cache_group.unwrap_or_else(|| GLOBAL_CACHE_GROUP.to_string()), + } } } @@ -609,7 +614,7 @@ pub async fn controller_service( nats_client: &NatsClient, node_client: &NC, piece_getter: &PG, - farmer_cache: &FarmerCache, + farmer_caches: &[(&str, &FarmerCache)], instance: &str, primary_instance: bool, ) -> anyhow::Result<()> @@ -640,10 +645,10 @@ where result = segment_headers_responder(nats_client, node_client).fuse() => { result }, - result = find_piece_responder(nats_client, farmer_cache).fuse() => { + result = find_piece_responder(nats_client, farmer_caches).fuse() => { result }, - result = find_pieces_responder(nats_client, farmer_cache).fuse() => { + result = find_pieces_responder(nats_client, farmer_caches).fuse() => { result }, result = piece_responder(nats_client, piece_getter).fuse() => { @@ -661,10 +666,10 @@ where result = segment_headers_responder(nats_client, node_client).fuse() => { result }, - result = find_piece_responder(nats_client, farmer_cache).fuse() => { + result = find_piece_responder(nats_client, farmer_caches).fuse() => { result }, - result = find_pieces_responder(nats_client, farmer_cache).fuse() => { + result = find_pieces_responder(nats_client, farmer_caches).fuse() => { result }, result = piece_responder(nats_client, piece_getter).fuse() => { @@ -907,32 +912,66 @@ where async fn find_piece_responder( nats_client: &NatsClient, - farmer_cache: &FarmerCache, + farmer_caches: &[(&str, &FarmerCache)], ) -> anyhow::Result<()> { - nats_client - .request_responder( - None, + futures::future::try_join( + farmer_caches + .iter() + .map(|(cache_group, farmer_cache)| { + nats_client.request_responder( + Some(cache_group), + Some("subspace.controller".to_string()), + move |ClusterControllerFindPieceInCacheRequest { piece_index }| async move { + Some(farmer_cache.find_piece(piece_index).await) + }, + ) + }) + .collect::>() + .next() + .map(|result| result.unwrap_or(Ok(()))), + nats_client.request_responder( + Some(GLOBAL_CACHE_GROUP), Some("subspace.controller".to_string()), |ClusterControllerFindPieceInCacheRequest { piece_index }| async move { + let (_cache_group, farmer_cache) = farmer_caches.iter().choose(&mut thread_rng())?; Some(farmer_cache.find_piece(piece_index).await) }, - ) - .await + ), + ) + .await + .map(|((), ())| ()) } async fn find_pieces_responder( nats_client: &NatsClient, - farmer_cache: &FarmerCache, + farmer_caches: &[(&str, &FarmerCache)], ) -> anyhow::Result<()> { - nats_client - .stream_request_responder( - None, + futures::future::try_join( + farmer_caches + .iter() + .map(|(cache_group, farmer_cache)| { + nats_client.stream_request_responder( + Some(cache_group), + Some("subspace.controller".to_string()), + move |ClusterControllerFindPiecesInCacheRequest { piece_indices }| async move { + Some(stream::iter(farmer_cache.find_pieces(piece_indices).await)) + }, + ) + }) + .collect::>() + .next() + .map(|result| result.unwrap_or(Ok(()))), + nats_client.stream_request_responder( + Some(GLOBAL_CACHE_GROUP), Some("subspace.controller".to_string()), |ClusterControllerFindPiecesInCacheRequest { piece_indices }| async move { + let (_cache_group, farmer_cache) = farmer_caches.iter().choose(&mut thread_rng())?; Some(stream::iter(farmer_cache.find_pieces(piece_indices).await)) }, - ) - .await + ), + ) + .await + .map(|((), ())| ()) } async fn piece_responder(nats_client: &NatsClient, piece_getter: &PG) -> anyhow::Result<()> diff --git a/crates/subspace-farmer/src/cluster/controller/caches.rs b/crates/subspace-farmer/src/cluster/controller/caches.rs index a4a398743f..17a8526ba2 100644 --- a/crates/subspace-farmer/src/cluster/controller/caches.rs +++ b/crates/subspace-farmer/src/cluster/controller/caches.rs @@ -94,7 +94,7 @@ impl KnownCaches { pub async fn maintain_caches( cache_group: &str, nats_client: &NatsClient, - farmer_cache: FarmerCache, + farmer_cache: &FarmerCache, identification_broadcast_interval: Duration, ) -> anyhow::Result<()> { let mut known_caches = KnownCaches::new(identification_broadcast_interval); diff --git a/crates/subspace-farmer/src/cluster/plotter.rs b/crates/subspace-farmer/src/cluster/plotter.rs index 5398e827f0..4d67819ed5 100644 --- a/crates/subspace-farmer/src/cluster/plotter.rs +++ b/crates/subspace-farmer/src/cluster/plotter.rs @@ -31,6 +31,7 @@ use std::time::{Duration, Instant}; use subspace_core_primitives::sectors::SectorIndex; use subspace_core_primitives::PublicKey; use subspace_farmer_components::plotting::PlottedSector; +use subspace_farmer_components::sector::sector_size; use subspace_farmer_components::FarmerProtocolInfo; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tokio::time::MissedTickBehavior; @@ -380,7 +381,12 @@ impl ClusterPlotter { } }; - let (mut sector_sender, sector_receiver) = mpsc::channel(1); + // Allow to buffer up to the whole sector in memory to not block plotter on the + // other side + let (mut sector_sender, sector_receiver) = mpsc::channel( + (sector_size(pieces_in_sector) / nats_client.approximate_max_message_size()) + .max(1), + ); let mut maybe_sector_receiver = Some(sector_receiver); loop { match tokio::time::timeout(PING_TIMEOUT, response_stream.next()).await { diff --git a/crates/subspace-farmer/src/disk_piece_cache.rs b/crates/subspace-farmer/src/disk_piece_cache.rs index 3456de2b40..26bead4f27 100644 --- a/crates/subspace-farmer/src/disk_piece_cache.rs +++ b/crates/subspace-farmer/src/disk_piece_cache.rs @@ -27,7 +27,7 @@ use subspace_farmer_components::file_ext::FileExt; use thiserror::Error; use tokio::runtime::Handle; use tokio::task; -use tracing::{debug, info, warn}; +use tracing::{debug, info, warn, Span}; /// How many pieces should be skipped before stopping to check the rest of contents, this allows to /// not miss most of the pieces after one or two corrupted pieces @@ -129,7 +129,10 @@ impl farm::PieceCache for DiskPieceCache { > { let this = self.clone(); let (mut sender, receiver) = mpsc::channel(100_000); + let span = Span::current(); let read_contents = task::spawn_blocking(move || { + let _guard = span.enter(); + let contents = this.contents(); for (piece_cache_offset, maybe_piece) in contents { if let Err(error) = @@ -175,8 +178,13 @@ impl farm::PieceCache for DiskPieceCache { offset: PieceCacheOffset, ) -> Result, FarmError> { let piece_cache = self.clone(); + let span = Span::current(); Ok(AsyncJoinOnDrop::new( - task::spawn_blocking(move || piece_cache.read_piece_index(offset)), + task::spawn_blocking(move || { + let _guard = span.enter(); + + piece_cache.read_piece_index(offset) + }), false, ) .await??) @@ -186,6 +194,8 @@ impl farm::PieceCache for DiskPieceCache { &self, offset: PieceCacheOffset, ) -> Result, FarmError> { + let span = Span::current(); + // TODO: On Windows spawning blocking task that allows concurrent reads causes huge memory // usage. No idea why it happens, but not spawning anything at all helps for some reason. // Someone at some point should figure it out and fix, but it will probably be not me @@ -193,11 +203,19 @@ impl farm::PieceCache for DiskPieceCache { // See https://github.com/autonomys/subspace/issues/2813 and linked forum post for details. // This TODO exists in multiple files if cfg!(windows) { - Ok(task::block_in_place(|| self.read_piece(offset))?) + Ok(task::block_in_place(|| { + let _guard = span.enter(); + + self.read_piece(offset) + })?) } else { let piece_cache = self.clone(); Ok(AsyncJoinOnDrop::new( - task::spawn_blocking(move || piece_cache.read_piece(offset)), + task::spawn_blocking(move || { + let _guard = span.enter(); + + piece_cache.read_piece(offset) + }), false, ) .await??) diff --git a/crates/subspace-farmer/src/farmer_cache.rs b/crates/subspace-farmer/src/farmer_cache.rs index 07d1fd686d..4912fd161d 100644 --- a/crates/subspace-farmer/src/farmer_cache.rs +++ b/crates/subspace-farmer/src/farmer_cache.rs @@ -16,11 +16,12 @@ use crate::utils::run_future_in_dedicated_thread; use async_lock::RwLock as AsyncRwLock; use event_listener_primitives::{Bag, HandlerId}; use futures::channel::mpsc; -use futures::future::FusedFuture; +use futures::future::{Either, FusedFuture}; use futures::stream::{FuturesOrdered, FuturesUnordered}; use futures::{select, stream, FutureExt, SinkExt, Stream, StreamExt}; -use parking_lot::Mutex; +use parking_lot::{Mutex, RwLock}; use prometheus_client::registry::Registry; +use rand::prelude::*; use rayon::prelude::*; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; @@ -32,7 +33,7 @@ use std::time::Duration; use std::{fmt, mem}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; use subspace_core_primitives::segments::{SegmentHeader, SegmentIndex}; -use subspace_farmer_components::PieceGetter; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_networking::libp2p::kad::{ProviderRecord, RecordKey}; use subspace_networking::libp2p::PeerId; use subspace_networking::utils::multihash::ToMultihash; @@ -541,6 +542,7 @@ where let piece_indices_to_store = piece_indices_to_store.into_iter().enumerate(); let downloading_semaphore = &Semaphore::new(SYNC_BATCH_SIZE * SYNC_CONCURRENT_BATCHES); + let ignored_cache_indices = &RwLock::new(HashSet::new()); let downloading_pieces_stream = stream::iter(piece_indices_to_store.map(|(batch, piece_indices)| { @@ -571,15 +573,16 @@ where let piece = match result { Ok(Some(piece)) => { - trace!(%piece_index, "Downloaded piece successfully"); + trace!(%batch, %piece_index, "Downloaded piece successfully"); piece } Ok(None) => { - debug!(%piece_index, "Couldn't find piece"); + debug!(%batch, %piece_index, "Couldn't find piece"); continue; } Err(error) => { debug!( + %batch, %error, %piece_index, "Failed to get piece for piece cache" @@ -596,6 +599,7 @@ where // Find plot in which there is a place for new piece to be stored let Some(offset) = caches.pop_free_offset() else { error!( + %batch, %piece_index, "Failed to store piece in cache, there was no space" ); @@ -608,23 +612,37 @@ where let cache_index = offset.cache_index; let piece_offset = offset.piece_offset; - if let Some(backend) = maybe_backend - && let Err(error) = - backend.write_piece(piece_offset, piece_index, &piece).await - { - // TODO: Will likely need to cache problematic backend indices to avoid hitting it over and over again repeatedly - error!( - %error, + let skip_write = ignored_cache_indices.read().contains(&cache_index); + if skip_write { + trace!( + %batch, %cache_index, %piece_index, %piece_offset, - "Failed to write piece into cache" + "Skipping known problematic cache index" ); - continue; - } + } else { + if let Some(backend) = maybe_backend + && let Err(error) = + backend.write_piece(piece_offset, piece_index, &piece).await + { + error!( + %error, + %batch, + %cache_index, + %piece_index, + %piece_offset, + "Failed to write piece into cache, ignoring this cache going \ + forward" + ); + ignored_cache_indices.write().insert(cache_index); + continue; + } - let key = KeyWithDistance::new(self.peer_id, piece_index.to_multihash()); - caches.lock().push_stored_piece(key, offset); + let key = + KeyWithDistance::new(self.peer_id, piece_index.to_multihash()); + caches.lock().push_stored_piece(key, offset); + } let prev_downloaded_pieces_count = downloaded_pieces_count.fetch_add(1, Ordering::Relaxed); @@ -654,7 +672,7 @@ where downloading_pieces_stream // This allows to schedule new batch while previous batches partially completed, but // avoids excessive memory usage like when all futures are created upfront - .buffer_unordered(SYNC_CONCURRENT_BATCHES * 2) + .buffer_unordered(SYNC_CONCURRENT_BATCHES * 10) // Simply drain everything .for_each(|()| async {}) .await; @@ -1644,6 +1662,83 @@ impl LocalRecordProvider for FarmerCache { } } +/// Collection of [`FarmerCache`] instances for load balancing +#[derive(Debug, Clone)] +pub struct FarmerCaches { + caches: Arc<[FarmerCache]>, +} + +impl From> for FarmerCaches { + fn from(caches: Arc<[FarmerCache]>) -> Self { + Self { caches } + } +} + +impl From for FarmerCaches { + fn from(cache: FarmerCache) -> Self { + Self { + caches: Arc::new([cache]), + } + } +} + +impl FarmerCaches { + /// Get piece from cache + pub async fn get_piece(&self, key: Key) -> Option + where + RecordKey: From, + { + let farmer_cache = self.caches.choose(&mut thread_rng())?; + farmer_cache.get_piece(key).await + } + + /// Get pieces from cache. + /// + /// Number of elements in returned stream is the same as number of unique `piece_indices`. + pub async fn get_pieces<'a, PieceIndices>( + &'a self, + piece_indices: PieceIndices, + ) -> impl Stream)> + Send + Unpin + 'a + where + PieceIndices: IntoIterator + Send + 'a, + { + let Some(farmer_cache) = self.caches.choose(&mut thread_rng()) else { + return Either::Left(stream::iter( + piece_indices + .into_iter() + .map(|piece_index| (piece_index, None)), + )); + }; + + Either::Right(farmer_cache.get_pieces(piece_indices).await) + } + + /// Returns a filtered list of pieces that were found in farmer cache, order is not guaranteed + pub async fn has_pieces(&self, piece_indices: Vec) -> Vec { + let Some(farmer_cache) = self.caches.choose(&mut thread_rng()) else { + return Vec::new(); + }; + + farmer_cache.has_pieces(piece_indices).await + } + + /// Try to store a piece in additional downloaded pieces, if there is space for them + pub async fn maybe_store_additional_piece(&self, piece_index: PieceIndex, piece: &Piece) { + self.caches + .iter() + .map(|farmer_cache| farmer_cache.maybe_store_additional_piece(piece_index, piece)) + .collect::>() + .for_each(|()| async {}) + .await; + } +} + +impl LocalRecordProvider for FarmerCaches { + fn record(&self, key: &RecordKey) -> Option { + self.caches.choose(&mut thread_rng())?.record(key) + } +} + /// Extracts the `PieceIndex` from a `RecordKey`. fn decode_piece_index_from_record_key(key: &RecordKey) -> PieceIndex { let len = key.as_ref().len(); diff --git a/crates/subspace-farmer/src/farmer_cache/tests.rs b/crates/subspace-farmer/src/farmer_cache/tests.rs index 2c55c5c32f..bfaba15959 100644 --- a/crates/subspace-farmer/src/farmer_cache/tests.rs +++ b/crates/subspace-farmer/src/farmer_cache/tests.rs @@ -17,7 +17,8 @@ use subspace_core_primitives::pieces::{Piece, PieceIndex}; use subspace_core_primitives::segments::{ HistorySize, LastArchivedBlock, SegmentHeader, SegmentIndex, }; -use subspace_farmer_components::{FarmerProtocolInfo, PieceGetter}; +use subspace_data_retrieval::piece_getter::PieceGetter; +use subspace_farmer_components::FarmerProtocolInfo; use subspace_networking::libp2p::identity; use subspace_networking::libp2p::kad::RecordKey; use subspace_networking::utils::multihash::ToMultihash; @@ -154,15 +155,12 @@ impl PieceGetter for MockPieceGetter { )) } - async fn get_pieces<'a, PieceIndices>( + async fn get_pieces<'a>( &'a self, - piece_indices: PieceIndices, + piece_indices: Vec, ) -> anyhow::Result< Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a, - { + > { Ok(Box::new( piece_indices .into_iter() diff --git a/crates/subspace-farmer/src/farmer_piece_getter.rs b/crates/subspace-farmer/src/farmer_piece_getter.rs index 45a114ac49..fe850f0223 100644 --- a/crates/subspace-farmer/src/farmer_piece_getter.rs +++ b/crates/subspace-farmer/src/farmer_piece_getter.rs @@ -1,7 +1,7 @@ //! Farmer-specific piece getter use crate::farm::plotted_pieces::PlottedPieces; -use crate::farmer_cache::FarmerCache; +use crate::farmer_cache::FarmerCaches; use crate::node_client::NodeClient; use async_lock::RwLock as AsyncRwLock; use async_trait::async_trait; @@ -19,7 +19,7 @@ use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Weak}; use std::task::{Context, Poll}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; -use subspace_farmer_components::PieceGetter; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_networking::utils::multihash::ToMultihash; use subspace_networking::utils::piece_provider::{PieceProvider, PieceValidator}; use tracing::{debug, error, trace}; @@ -39,7 +39,7 @@ pub struct DsnCacheRetryPolicy { struct Inner { piece_provider: PieceProvider, - farmer_cache: FarmerCache, + farmer_caches: FarmerCaches, node_client: NC, plotted_pieces: Arc>>, dsn_cache_retry_policy: DsnCacheRetryPolicy, @@ -78,7 +78,7 @@ where /// Create new instance pub fn new( piece_provider: PieceProvider, - farmer_cache: FarmerCache, + farmer_caches: FarmerCaches, node_client: NC, plotted_pieces: Arc>>, dsn_cache_retry_policy: DsnCacheRetryPolicy, @@ -86,7 +86,7 @@ where Self { inner: Arc::new(Inner { piece_provider, - farmer_cache, + farmer_caches, node_client, plotted_pieces, dsn_cache_retry_policy, @@ -104,7 +104,7 @@ where trace!(%piece_index, "Getting piece from farmer cache"); if let Some(piece) = inner - .farmer_cache + .farmer_caches .get_piece(piece_index.to_multihash()) .await { @@ -117,7 +117,7 @@ where if let Some(piece) = inner.piece_provider.get_piece_from_cache(piece_index).await { trace!(%piece_index, "Got piece from DSN L2 cache"); inner - .farmer_cache + .farmer_caches .maybe_store_additional_piece(piece_index, &piece) .await; return Some(piece); @@ -129,7 +129,7 @@ where Ok(Some(piece)) => { trace!(%piece_index, "Got piece from node successfully"); inner - .farmer_cache + .farmer_caches .maybe_store_additional_piece(piece_index, &piece) .await; return Some(piece); @@ -168,7 +168,7 @@ where if let Some(piece) = read_piece_fut.await { trace!(%piece_index, "Got piece from local plot successfully"); inner - .farmer_cache + .farmer_caches .maybe_store_additional_piece(piece_index, &piece) .await; return Some(piece); @@ -186,7 +186,7 @@ where if let Some(piece) = archival_storage_search_result { trace!(%piece_index, "DSN L1 lookup succeeded"); inner - .farmer_cache + .farmer_caches .maybe_store_additional_piece(piece_index, &piece) .await; return Some(piece); @@ -260,15 +260,12 @@ where Ok(None) } - async fn get_pieces<'a, PieceIndices>( + async fn get_pieces<'a>( &'a self, - piece_indices: PieceIndices, + piece_indices: Vec, ) -> anyhow::Result< Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a, - { + > { let (tx, mut rx) = mpsc::unbounded(); let fut = async move { @@ -277,7 +274,7 @@ where debug!("Getting pieces from farmer cache"); let mut pieces_not_found_in_farmer_cache = Vec::new(); let mut pieces_in_farmer_cache = - self.inner.farmer_cache.get_pieces(piece_indices).await; + self.inner.farmer_caches.get_pieces(piece_indices).await; while let Some((piece_index, maybe_piece)) = pieces_in_farmer_cache.next().await { let Some(piece) = maybe_piece else { @@ -310,7 +307,7 @@ where }; // TODO: Would be nice to have concurrency here self.inner - .farmer_cache + .farmer_caches .maybe_store_additional_piece(piece_index, &piece) .await; tx.unbounded_send((piece_index, Ok(Some(piece)))) @@ -332,7 +329,7 @@ where Ok(Some(piece)) => { trace!(%piece_index, "Got piece from node successfully"); self.inner - .farmer_cache + .farmer_caches .maybe_store_additional_piece(piece_index, &piece) .await; @@ -467,15 +464,12 @@ where piece_getter.get_piece(piece_index).await } - async fn get_pieces<'a, PieceIndices>( + async fn get_pieces<'a>( &'a self, - piece_indices: PieceIndices, + piece_indices: Vec, ) -> anyhow::Result< Box>)> + Send + Unpin + 'a>, - > - where - PieceIndices: IntoIterator + Send + 'a, - { + > { let Some(piece_getter) = self.upgrade() else { debug!("Farmer piece getter upgrade didn't succeed"); return Ok(Box::new(stream::iter( @@ -487,7 +481,6 @@ where // TODO: This is necessary due to more complex lifetimes not yet supported by ouroboros, see // https://github.com/someguynamedjosh/ouroboros/issues/112 - let piece_indices = piece_indices.into_iter().collect::>(); let stream_with_piece_getter = StreamWithPieceGetter::try_new_async_send(piece_getter, move |piece_getter| { piece_getter.get_pieces(piece_indices) diff --git a/crates/subspace-farmer/src/plotter/cpu.rs b/crates/subspace-farmer/src/plotter/cpu.rs index de534fdf0c..1ee7981fc2 100644 --- a/crates/subspace-farmer/src/plotter/cpu.rs +++ b/crates/subspace-farmer/src/plotter/cpu.rs @@ -6,7 +6,7 @@ use crate::plotter::cpu::metrics::CpuPlotterMetrics; use crate::plotter::{Plotter, SectorPlottingProgress}; use crate::thread_pool_manager::PlottingThreadPoolManager; use crate::utils::AsyncJoinOnDrop; -use async_lock::Mutex as AsyncMutex; +use async_lock::{Mutex as AsyncMutex, Semaphore, SemaphoreGuardArc}; use async_trait::async_trait; use bytes::Bytes; use event_listener_primitives::{Bag, HandlerId}; @@ -27,15 +27,15 @@ use std::task::Poll; use std::time::Instant; use subspace_core_primitives::sectors::SectorIndex; use subspace_core_primitives::PublicKey; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_erasure_coding::ErasureCoding; use subspace_farmer_components::plotting::{ download_sector, encode_sector, write_sector, CpuRecordsEncoder, DownloadSectorOptions, EncodeSectorOptions, PlottingError, }; -use subspace_farmer_components::{FarmerProtocolInfo, PieceGetter}; +use subspace_farmer_components::FarmerProtocolInfo; use subspace_kzg::Kzg; use subspace_proof_of_space::Table; -use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tokio::task::yield_now; use tracing::{warn, Instrument}; @@ -87,7 +87,7 @@ where PosTable: Table, { async fn has_free_capacity(&self) -> Result { - Ok(self.downloading_semaphore.available_permits() > 0) + Ok(self.downloading_semaphore.try_acquire().is_some()) } async fn plot_sector( @@ -97,39 +97,13 @@ where farmer_protocol_info: FarmerProtocolInfo, pieces_in_sector: u16, replotting: bool, - mut progress_sender: mpsc::Sender, + progress_sender: mpsc::Sender, ) { let start = Instant::now(); // Done outside the future below as a backpressure, ensuring that it is not possible to // schedule unbounded number of plotting tasks - let downloading_permit = match Arc::clone(&self.downloading_semaphore) - .acquire_owned() - .await - { - Ok(downloading_permit) => downloading_permit, - Err(error) => { - warn!(%error, "Failed to acquire downloading permit"); - - let progress_updater = ProgressUpdater { - public_key, - sector_index, - handlers: Arc::clone(&self.handlers), - metrics: self.metrics.clone(), - }; - - progress_updater - .update_progress_and_events( - &mut progress_sender, - SectorPlottingProgress::Error { - error: format!("Failed to acquire downloading permit: {error}"), - }, - ) - .await; - - return; - } - }; + let downloading_permit = self.downloading_semaphore.acquire_arc().await; self.plot_sector_internal( start, @@ -155,8 +129,7 @@ where ) -> bool { let start = Instant::now(); - let Ok(downloading_permit) = Arc::clone(&self.downloading_semaphore).try_acquire_owned() - else { + let Some(downloading_permit) = self.downloading_semaphore.try_acquire_arc() else { return false; }; @@ -259,7 +232,7 @@ where async fn plot_sector_internal( &self, start: Instant, - downloading_permit: OwnedSemaphorePermit, + downloading_permit: SemaphoreGuardArc, public_key: PublicKey, sector_index: SectorIndex, farmer_protocol_info: FarmerProtocolInfo, diff --git a/crates/subspace-farmer/src/plotter/gpu.rs b/crates/subspace-farmer/src/plotter/gpu.rs index 07c3d8c89a..9a901900c7 100644 --- a/crates/subspace-farmer/src/plotter/gpu.rs +++ b/crates/subspace-farmer/src/plotter/gpu.rs @@ -11,7 +11,7 @@ use crate::plotter::gpu::gpu_encoders_manager::GpuRecordsEncoderManager; use crate::plotter::gpu::metrics::GpuPlotterMetrics; use crate::plotter::{Plotter, SectorPlottingProgress}; use crate::utils::AsyncJoinOnDrop; -use async_lock::Mutex as AsyncMutex; +use async_lock::{Mutex as AsyncMutex, Semaphore, SemaphoreGuardArc}; use async_trait::async_trait; use bytes::Bytes; use event_listener_primitives::{Bag, HandlerId}; @@ -30,14 +30,14 @@ use std::task::Poll; use std::time::Instant; use subspace_core_primitives::sectors::SectorIndex; use subspace_core_primitives::PublicKey; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_erasure_coding::ErasureCoding; use subspace_farmer_components::plotting::{ download_sector, encode_sector, write_sector, DownloadSectorOptions, EncodeSectorOptions, PlottingError, RecordsEncoder, }; -use subspace_farmer_components::{FarmerProtocolInfo, PieceGetter}; +use subspace_farmer_components::FarmerProtocolInfo; use subspace_kzg::Kzg; -use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tokio::task::yield_now; use tracing::{warn, Instrument}; @@ -97,7 +97,7 @@ where GRE: GpuRecordsEncoder + 'static, { async fn has_free_capacity(&self) -> Result { - Ok(self.downloading_semaphore.available_permits() > 0) + Ok(self.downloading_semaphore.try_acquire().is_some()) } async fn plot_sector( @@ -107,39 +107,13 @@ where farmer_protocol_info: FarmerProtocolInfo, pieces_in_sector: u16, _replotting: bool, - mut progress_sender: mpsc::Sender, + progress_sender: mpsc::Sender, ) { let start = Instant::now(); // Done outside the future below as a backpressure, ensuring that it is not possible to // schedule unbounded number of plotting tasks - let downloading_permit = match Arc::clone(&self.downloading_semaphore) - .acquire_owned() - .await - { - Ok(downloading_permit) => downloading_permit, - Err(error) => { - warn!(%error, "Failed to acquire downloading permit"); - - let progress_updater = ProgressUpdater { - public_key, - sector_index, - handlers: Arc::clone(&self.handlers), - metrics: self.metrics.clone(), - }; - - progress_updater - .update_progress_and_events( - &mut progress_sender, - SectorPlottingProgress::Error { - error: format!("Failed to acquire downloading permit: {error}"), - }, - ) - .await; - - return; - } - }; + let downloading_permit = self.downloading_semaphore.acquire_arc().await; self.plot_sector_internal( start, @@ -164,8 +138,7 @@ where ) -> bool { let start = Instant::now(); - let Ok(downloading_permit) = Arc::clone(&self.downloading_semaphore).try_acquire_owned() - else { + let Some(downloading_permit) = self.downloading_semaphore.try_acquire_arc() else { return false; }; @@ -266,7 +239,7 @@ where async fn plot_sector_internal( &self, start: Instant, - downloading_permit: OwnedSemaphorePermit, + downloading_permit: SemaphoreGuardArc, public_key: PublicKey, sector_index: SectorIndex, farmer_protocol_info: FarmerProtocolInfo, diff --git a/crates/subspace-farmer/src/plotter/gpu/cuda.rs b/crates/subspace-farmer/src/plotter/gpu/cuda.rs index bbbedff731..84af098f9d 100644 --- a/crates/subspace-farmer/src/plotter/gpu/cuda.rs +++ b/crates/subspace-farmer/src/plotter/gpu/cuda.rs @@ -2,6 +2,9 @@ use crate::plotter::gpu::GpuRecordsEncoder; use async_lock::Mutex as AsyncMutex; +use parking_lot::Mutex; +use rayon::{current_thread_index, ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder}; +use std::process::exit; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use subspace_core_primitives::pieces::{PieceOffset, Record}; @@ -14,6 +17,7 @@ use subspace_proof_of_space_gpu::cuda::CudaDevice; #[derive(Debug)] pub struct CudaRecordsEncoder { cuda_device: CudaDevice, + thread_pool: ThreadPool, global_mutex: Arc>, } @@ -34,21 +38,46 @@ impl RecordsEncoder for CudaRecordsEncoder { .map_err(|error| anyhow::anyhow!("Failed to convert pieces in sector: {error}"))?; let mut sector_contents_map = SectorContentsMap::new(pieces_in_sector); - for ((piece_offset, record), mut encoded_chunks_used) in (PieceOffset::ZERO..) - .zip(records.iter_mut()) - .zip(sector_contents_map.iter_record_bitfields_mut()) { - // Take mutex briefly to make sure encoding is allowed right now - self.global_mutex.lock_blocking(); + let iter = Mutex::new( + (PieceOffset::ZERO..) + .zip(records.iter_mut()) + .zip(sector_contents_map.iter_record_bitfields_mut()), + ); + let plotting_error = Mutex::new(None::); - let pos_seed = sector_id.derive_evaluation_seed(piece_offset); + self.thread_pool.scope(|scope| { + scope.spawn_broadcast(|_scope, _ctx| loop { + // Take mutex briefly to make sure encoding is allowed right now + self.global_mutex.lock_blocking(); - self.cuda_device - .generate_and_encode_pospace(&pos_seed, record, encoded_chunks_used.iter_mut()) - .map_err(anyhow::Error::msg)?; + // This instead of `while` above because otherwise mutex will be held for the + // duration of the loop and will limit concurrency to 1 record + let Some(((piece_offset, record), mut encoded_chunks_used)) = + iter.lock().next() + else { + return; + }; + let pos_seed = sector_id.derive_evaluation_seed(piece_offset); - if abort_early.load(Ordering::Relaxed) { - break; + if let Err(error) = self.cuda_device.generate_and_encode_pospace( + &pos_seed, + record, + encoded_chunks_used.iter_mut(), + ) { + plotting_error.lock().replace(error); + return; + } + + if abort_early.load(Ordering::Relaxed) { + return; + } + }); + }); + + let plotting_error = plotting_error.lock().take(); + if let Some(error) = plotting_error { + return Err(anyhow::Error::msg(error)); } } @@ -58,10 +87,38 @@ impl RecordsEncoder for CudaRecordsEncoder { impl CudaRecordsEncoder { /// Create new instance - pub fn new(cuda_device: CudaDevice, global_mutex: Arc>) -> Self { - Self { + pub fn new( + cuda_device: CudaDevice, + global_mutex: Arc>, + ) -> Result { + let id = cuda_device.id(); + let thread_name = move |thread_index| format!("cuda-{id}.{thread_index}"); + // TODO: remove this panic handler when rayon logs panic_info + // https://github.com/rayon-rs/rayon/issues/1208 + let panic_handler = move |panic_info| { + if let Some(index) = current_thread_index() { + eprintln!("panic on thread {}: {:?}", thread_name(index), panic_info); + } else { + // We want to guarantee exit, rather than panicking in a panic handler. + eprintln!( + "rayon panic handler called on non-rayon thread: {:?}", + panic_info + ); + } + exit(1); + }; + + let thread_pool = ThreadPoolBuilder::new() + .thread_name(thread_name) + .panic_handler(panic_handler) + // Make sure there is overlap between records, so GPU is almost always busy + .num_threads(2) + .build()?; + + Ok(Self { cuda_device, + thread_pool, global_mutex, - } + }) } } diff --git a/crates/subspace-farmer/src/plotter/gpu/rocm.rs b/crates/subspace-farmer/src/plotter/gpu/rocm.rs index 2aef4c482a..db875b83e1 100644 --- a/crates/subspace-farmer/src/plotter/gpu/rocm.rs +++ b/crates/subspace-farmer/src/plotter/gpu/rocm.rs @@ -2,6 +2,9 @@ use crate::plotter::gpu::GpuRecordsEncoder; use async_lock::Mutex as AsyncMutex; +use parking_lot::Mutex; +use rayon::{current_thread_index, ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder}; +use std::process::exit; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use subspace_core_primitives::pieces::{PieceOffset, Record}; @@ -14,6 +17,7 @@ use subspace_proof_of_space_gpu::rocm::RocmDevice; #[derive(Debug)] pub struct RocmRecordsEncoder { rocm_device: RocmDevice, + thread_pool: ThreadPool, global_mutex: Arc>, } @@ -34,21 +38,46 @@ impl RecordsEncoder for RocmRecordsEncoder { .map_err(|error| anyhow::anyhow!("Failed to convert pieces in sector: {error}"))?; let mut sector_contents_map = SectorContentsMap::new(pieces_in_sector); - for ((piece_offset, record), mut encoded_chunks_used) in (PieceOffset::ZERO..) - .zip(records.iter_mut()) - .zip(sector_contents_map.iter_record_bitfields_mut()) { - // Take mutex briefly to make sure encoding is allowed right now - self.global_mutex.lock_blocking(); + let iter = Mutex::new( + (PieceOffset::ZERO..) + .zip(records.iter_mut()) + .zip(sector_contents_map.iter_record_bitfields_mut()), + ); + let plotting_error = Mutex::new(None::); - let pos_seed = sector_id.derive_evaluation_seed(piece_offset); + self.thread_pool.scope(|scope| { + scope.spawn_broadcast(|_scope, _ctx| loop { + // Take mutex briefly to make sure encoding is allowed right now + self.global_mutex.lock_blocking(); - self.rocm_device - .generate_and_encode_pospace(&pos_seed, record, encoded_chunks_used.iter_mut()) - .map_err(anyhow::Error::msg)?; + // This instead of `while` above because otherwise mutex will be held for the + // duration of the loop and will limit concurrency to 1 record + let Some(((piece_offset, record), mut encoded_chunks_used)) = + iter.lock().next() + else { + return; + }; + let pos_seed = sector_id.derive_evaluation_seed(piece_offset); - if abort_early.load(Ordering::Relaxed) { - break; + if let Err(error) = self.rocm_device.generate_and_encode_pospace( + &pos_seed, + record, + encoded_chunks_used.iter_mut(), + ) { + plotting_error.lock().replace(error); + return; + } + + if abort_early.load(Ordering::Relaxed) { + return; + } + }); + }); + + let plotting_error = plotting_error.lock().take(); + if let Some(error) = plotting_error { + return Err(anyhow::Error::msg(error)); } } @@ -58,10 +87,38 @@ impl RecordsEncoder for RocmRecordsEncoder { impl RocmRecordsEncoder { /// Create new instance - pub fn new(rocm_device: RocmDevice, global_mutex: Arc>) -> Self { - Self { + pub fn new( + rocm_device: RocmDevice, + global_mutex: Arc>, + ) -> Result { + let id = rocm_device.id(); + let thread_name = move |thread_index| format!("rocm-{id}.{thread_index}"); + // TODO: remove this panic handler when rayon logs panic_info + // https://github.com/rayon-rs/rayon/issues/1208 + let panic_handler = move |panic_info| { + if let Some(index) = current_thread_index() { + eprintln!("panic on thread {}: {:?}", thread_name(index), panic_info); + } else { + // We want to guarantee exit, rather than panicking in a panic handler. + eprintln!( + "rayon panic handler called on non-rayon thread: {:?}", + panic_info + ); + } + exit(1); + }; + + let thread_pool = ThreadPoolBuilder::new() + .thread_name(thread_name) + .panic_handler(panic_handler) + // Make sure there is overlap between records, so GPU is almost always busy + .num_threads(2) + .build()?; + + Ok(Self { rocm_device, + thread_pool, global_mutex, - } + }) } } diff --git a/crates/subspace-farmer/src/single_disk_farm.rs b/crates/subspace-farmer/src/single_disk_farm.rs index ebfda43729..3fb574b868 100644 --- a/crates/subspace-farmer/src/single_disk_farm.rs +++ b/crates/subspace-farmer/src/single_disk_farm.rs @@ -49,7 +49,6 @@ use futures::{select, FutureExt, StreamExt}; use parity_scale_codec::{Decode, Encode}; use parking_lot::Mutex; use prometheus_client::registry::Registry; -use rand::prelude::*; use rayon::prelude::*; use rayon::{ThreadPoolBuildError, ThreadPoolBuilder}; use serde::{Deserialize, Serialize}; @@ -64,27 +63,27 @@ use std::pin::Pin; use std::str::FromStr; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Duration; use std::{fmt, fs, io, mem}; use subspace_core_primitives::hashes::{blake3_hash, Blake3Hash}; use subspace_core_primitives::pieces::Record; use subspace_core_primitives::sectors::SectorIndex; use subspace_core_primitives::segments::{HistorySize, SegmentIndex}; -use subspace_core_primitives::{PublicKey, ScalarBytes}; +use subspace_core_primitives::PublicKey; use subspace_erasure_coding::ErasureCoding; use subspace_farmer_components::file_ext::FileExt; use subspace_farmer_components::reading::ReadSectorRecordChunksMode; use subspace_farmer_components::sector::{sector_size, SectorMetadata, SectorMetadataChecksummed}; -use subspace_farmer_components::{FarmerProtocolInfo, ReadAtSync}; +use subspace_farmer_components::FarmerProtocolInfo; use subspace_kzg::Kzg; use subspace_networking::KnownPeersManager; use subspace_proof_of_space::Table; use subspace_rpc_primitives::{FarmerAppInfo, SolutionResponse}; use thiserror::Error; use tokio::runtime::Handle; -use tokio::sync::{broadcast, Barrier, Semaphore}; +use tokio::sync::broadcast; use tokio::task; -use tracing::{debug, error, info, trace, warn, Instrument, Span}; +use tracing::{error, info, trace, warn, Instrument, Span}; // Refuse to compile on non-64-bit platforms, offsets may fail on those when converting from u64 to // usize depending on chain parameters @@ -94,11 +93,7 @@ const_assert!(mem::size_of::() >= mem::size_of::()); const RESERVED_PLOT_METADATA: u64 = 1024 * 1024; /// Reserve 1M of space for farm info (for potential future expansion) const RESERVED_FARM_INFO: u64 = 1024 * 1024; -const NEW_SEGMENT_PROCESSING_DELAY: Duration = Duration::from_secs(30); -/// Limit for reads in internal benchmark. -/// -/// 4 seconds is proving time, hence 3 seconds for reads. -const INTERNAL_BENCHMARK_READ_TIMEOUT: Duration = Duration::from_millis(3500); +const NEW_SEGMENT_PROCESSING_DELAY: Duration = Duration::from_mins(10); /// Exclusive lock for single disk farm info file, ensuring no concurrent edits by cooperating processes is done #[derive(Debug)] @@ -168,14 +163,25 @@ impl SingleDiskFarmInfo { } /// Store `SingleDiskFarm` info to path, so it can be loaded again upon restart. - pub fn store_to(&self, directory: &Path) -> io::Result<()> { + /// + /// Can optionally return a lock. + pub fn store_to( + &self, + directory: &Path, + lock: bool, + ) -> io::Result> { let mut file = OpenOptions::new() .write(true) .create(true) - .truncate(true) + .truncate(false) .open(directory.join(Self::FILE_NAME))?; - fs4::fs_std::FileExt::try_lock_exclusive(&file)?; - file.write_all(&serde_json::to_vec(self).expect("Info serialization never fails; qed")) + if lock { + fs4::fs_std::FileExt::try_lock_exclusive(&file)?; + } + file.set_len(0)?; + file.write_all(&serde_json::to_vec(self).expect("Info serialization never fails; qed"))?; + + Ok(lock.then_some(SingleDiskFarmInfoLock { _file: file })) } /// Try to acquire exclusive lock on the single disk farm info file, ensuring no concurrent edits by cooperating @@ -306,13 +312,8 @@ where pub max_plotting_sectors_per_farm: NonZeroUsize, /// Disable farm locking, for example if file system doesn't support it pub disable_farm_locking: bool, - /// Explicit mode to use for reading of sector record chunks instead of doing internal - /// benchmarking - pub read_sector_record_chunks_mode: Option, - /// Barrier before internal benchmarking between different farms - pub faster_read_sector_record_chunks_mode_barrier: Arc, - /// Limit concurrency of internal benchmarking between different farms - pub faster_read_sector_record_chunks_mode_concurrency: Arc, + /// Mode to use for reading of sector record chunks instead + pub read_sector_record_chunks_mode: ReadSectorRecordChunksMode, /// Prometheus registry pub registry: Option<&'a Mutex<&'a mut Registry>>, /// Whether to create a farm if it doesn't yet exist @@ -851,8 +852,6 @@ impl SingleDiskFarm { max_plotting_sectors_per_farm, disable_farm_locking, read_sector_record_chunks_mode, - faster_read_sector_record_chunks_mode_barrier, - faster_read_sector_record_chunks_mode_concurrency, registry, create, } = options; @@ -972,40 +971,6 @@ impl SingleDiskFarm { let (farming_plot, farming_thread_pool) = AsyncJoinOnDrop::new(farming_plot_fut, false).await??; - faster_read_sector_record_chunks_mode_barrier.wait().await; - - let (read_sector_record_chunks_mode, farming_plot, farming_thread_pool) = - if let Some(mode) = read_sector_record_chunks_mode { - (mode, farming_plot, farming_thread_pool) - } else { - // Error doesn't matter here - let _permit = faster_read_sector_record_chunks_mode_concurrency - .acquire() - .await; - let span = span.clone(); - let plot_file = Arc::clone(&plot_file); - - let read_sector_record_chunks_mode_fut = task::spawn_blocking(move || { - farming_thread_pool - .install(move || { - let _span_guard = span.enter(); - - faster_read_sector_record_chunks_mode( - &*plot_file, - &farming_plot, - sector_size, - metadata_header.plotted_sector_count, - ) - .map(|mode| (mode, farming_plot)) - }) - .map(|(mode, farming_plot)| (mode, farming_plot, farming_thread_pool)) - }); - - AsyncJoinOnDrop::new(read_sector_record_chunks_mode_fut, false).await?? - }; - - faster_read_sector_record_chunks_mode_barrier.wait().await; - let plotting_join_handle = task::spawn_blocking({ let sectors_metadata = Arc::clone(§ors_metadata); let handlers = Arc::clone(&handlers); @@ -1284,87 +1249,88 @@ impl SingleDiskFarm { }; let public_key = identity.public_key().to_bytes().into(); - let single_disk_farm_info = match SingleDiskFarmInfo::load_from(directory)? { - Some(mut single_disk_farm_info) => { - if &farmer_app_info.genesis_hash != single_disk_farm_info.genesis_hash() { - return Err(SingleDiskFarmError::WrongChain { - id: *single_disk_farm_info.id(), - correct_chain: hex::encode(single_disk_farm_info.genesis_hash()), - wrong_chain: hex::encode(farmer_app_info.genesis_hash), - }); - } + let (single_disk_farm_info, single_disk_farm_info_lock) = + match SingleDiskFarmInfo::load_from(directory)? { + Some(mut single_disk_farm_info) => { + if &farmer_app_info.genesis_hash != single_disk_farm_info.genesis_hash() { + return Err(SingleDiskFarmError::WrongChain { + id: *single_disk_farm_info.id(), + correct_chain: hex::encode(single_disk_farm_info.genesis_hash()), + wrong_chain: hex::encode(farmer_app_info.genesis_hash), + }); + } - if &public_key != single_disk_farm_info.public_key() { - return Err(SingleDiskFarmError::IdentityMismatch { - id: *single_disk_farm_info.id(), - correct_public_key: *single_disk_farm_info.public_key(), - wrong_public_key: public_key, - }); - } + if &public_key != single_disk_farm_info.public_key() { + return Err(SingleDiskFarmError::IdentityMismatch { + id: *single_disk_farm_info.id(), + correct_public_key: *single_disk_farm_info.public_key(), + wrong_public_key: public_key, + }); + } - let pieces_in_sector = single_disk_farm_info.pieces_in_sector(); + let pieces_in_sector = single_disk_farm_info.pieces_in_sector(); - if max_pieces_in_sector < pieces_in_sector { - return Err(SingleDiskFarmError::InvalidPiecesInSector { - id: *single_disk_farm_info.id(), - max_supported: max_pieces_in_sector, - initialized_with: pieces_in_sector, - }); - } + if max_pieces_in_sector < pieces_in_sector { + return Err(SingleDiskFarmError::InvalidPiecesInSector { + id: *single_disk_farm_info.id(), + max_supported: max_pieces_in_sector, + initialized_with: pieces_in_sector, + }); + } - if max_pieces_in_sector > pieces_in_sector { - info!( - pieces_in_sector, - max_pieces_in_sector, - "Farm initialized with smaller number of pieces in sector, farm needs to \ - be re-created for increase" - ); - } + if max_pieces_in_sector > pieces_in_sector { + info!( + pieces_in_sector, + max_pieces_in_sector, + "Farm initialized with smaller number of pieces in sector, farm needs \ + to be re-created for increase" + ); + } - if allocated_space != single_disk_farm_info.allocated_space() { - info!( - old_space = %bytesize::to_string(single_disk_farm_info.allocated_space(), true), - new_space = %bytesize::to_string(allocated_space, true), - "Farm size has changed" - ); + let mut single_disk_farm_info_lock = None; + + if allocated_space != single_disk_farm_info.allocated_space() { + info!( + old_space = %bytesize::to_string(single_disk_farm_info.allocated_space(), true), + new_space = %bytesize::to_string(allocated_space, true), + "Farm size has changed" + ); - let new_allocated_space = allocated_space; - match &mut single_disk_farm_info { - SingleDiskFarmInfo::V0 { - allocated_space, .. - } => { - *allocated_space = new_allocated_space; + let new_allocated_space = allocated_space; + match &mut single_disk_farm_info { + SingleDiskFarmInfo::V0 { + allocated_space, .. + } => { + *allocated_space = new_allocated_space; + } } + + single_disk_farm_info_lock = + single_disk_farm_info.store_to(directory, !disable_farm_locking)?; + } else if !disable_farm_locking { + single_disk_farm_info_lock = Some( + SingleDiskFarmInfo::try_lock(directory) + .map_err(SingleDiskFarmError::LikelyAlreadyInUse)?, + ); } - single_disk_farm_info.store_to(directory)?; + (single_disk_farm_info, single_disk_farm_info_lock) } + None => { + let single_disk_farm_info = SingleDiskFarmInfo::new( + FarmId::new(), + farmer_app_info.genesis_hash, + public_key, + max_pieces_in_sector, + allocated_space, + ); - single_disk_farm_info - } - None => { - let single_disk_farm_info = SingleDiskFarmInfo::new( - FarmId::new(), - farmer_app_info.genesis_hash, - public_key, - max_pieces_in_sector, - allocated_space, - ); - - single_disk_farm_info.store_to(directory)?; - - single_disk_farm_info - } - }; + let single_disk_farm_info_lock = + single_disk_farm_info.store_to(directory, !disable_farm_locking)?; - let single_disk_farm_info_lock = if disable_farm_locking { - None - } else { - Some( - SingleDiskFarmInfo::try_lock(directory) - .map_err(SingleDiskFarmError::LikelyAlreadyInUse)?, - ) - }; + (single_disk_farm_info, single_disk_farm_info_lock) + } + }; let pieces_in_sector = single_disk_farm_info.pieces_in_sector(); let sector_size = sector_size(pieces_in_sector) as u64; @@ -2411,80 +2377,3 @@ fn write_dummy_sector_metadata( error, }) } - -fn faster_read_sector_record_chunks_mode( - original_plot: &OP, - farming_plot: &FP, - sector_size: usize, - mut plotted_sector_count: SectorIndex, -) -> Result -where - OP: FileExt + Sync, - FP: ReadAtSync, -{ - info!("Benchmarking faster proving method"); - - let mut sector_bytes = vec![0u8; sector_size]; - - if plotted_sector_count == 0 { - thread_rng().fill_bytes(&mut sector_bytes); - original_plot.write_all_at(§or_bytes, 0)?; - - plotted_sector_count = 1; - } - - let mut fastest_mode = ReadSectorRecordChunksMode::ConcurrentChunks; - let mut fastest_time = Duration::MAX; - - for _ in 0..3 { - let sector_offset = - sector_size as u64 * thread_rng().gen_range(0..plotted_sector_count) as u64; - let farming_plot = farming_plot.offset(sector_offset); - - // Reading the whole sector at once - { - let start = Instant::now(); - farming_plot.read_at(&mut sector_bytes, 0)?; - let elapsed = start.elapsed(); - - debug!(?elapsed, "Whole sector"); - - if elapsed >= INTERNAL_BENCHMARK_READ_TIMEOUT { - debug!( - ?elapsed, - "Reading whole sector is too slow, using chunks instead" - ); - - fastest_mode = ReadSectorRecordChunksMode::ConcurrentChunks; - break; - } - - if fastest_time > elapsed { - fastest_mode = ReadSectorRecordChunksMode::WholeSector; - fastest_time = elapsed; - } - } - - // A lot simplified version of concurrent chunks - { - let start = Instant::now(); - (0..Record::NUM_CHUNKS).into_par_iter().try_for_each(|_| { - let offset = thread_rng().gen_range(0_usize..sector_size / ScalarBytes::FULL_BYTES) - * ScalarBytes::FULL_BYTES; - farming_plot.read_at(&mut [0; ScalarBytes::FULL_BYTES], offset as u64) - })?; - let elapsed = start.elapsed(); - - debug!(?elapsed, "Chunks"); - - if fastest_time > elapsed { - fastest_mode = ReadSectorRecordChunksMode::ConcurrentChunks; - fastest_time = elapsed; - } - } - } - - info!(?fastest_mode, "Faster proving method found"); - - Ok(fastest_mode) -} diff --git a/crates/subspace-farmer/src/single_disk_farm/plotting.rs b/crates/subspace-farmer/src/single_disk_farm/plotting.rs index f327c3f607..a6b411d3ca 100644 --- a/crates/subspace-farmer/src/single_disk_farm/plotting.rs +++ b/crates/subspace-farmer/src/single_disk_farm/plotting.rs @@ -11,6 +11,7 @@ use futures::channel::{mpsc, oneshot}; use futures::stream::FuturesOrdered; use futures::{select, FutureExt, SinkExt, StreamExt}; use parity_scale_codec::Encode; +use rand::prelude::*; use std::collections::HashSet; use std::future::Future; use std::io; @@ -700,8 +701,8 @@ pub(super) struct PlottingSchedulerOptions { pub(super) handlers: Arc, pub(super) sectors_metadata: Arc>>, pub(super) sectors_to_plot_sender: mpsc::Sender, - // Delay between segment header being acknowledged by farmer and potentially triggering - // replotting + // Max delay between segment header being acknowledged by farmer and potentially + // triggering replotting pub(super) new_segment_processing_delay: Duration, pub(super) metrics: Option>, } @@ -798,7 +799,10 @@ where // There is no urgent need to rush replotting sectors immediately and this delay allows for // newly archived pieces to be both cached locally and on other farmers on the network - tokio::time::sleep(new_segment_processing_delay).await; + let delay = Duration::from_secs(thread_rng().gen_range( + new_segment_processing_delay.as_secs() / 10..=new_segment_processing_delay.as_secs(), + )); + tokio::time::sleep(delay).await; if archived_segments_sender.send(segment_header).is_err() { break; diff --git a/crates/subspace-gateway-rpc/src/lib.rs b/crates/subspace-gateway-rpc/src/lib.rs index af04fed933..4986f0741d 100644 --- a/crates/subspace-gateway-rpc/src/lib.rs +++ b/crates/subspace-gateway-rpc/src/lib.rs @@ -8,6 +8,7 @@ use std::ops::{Deref, DerefMut}; use subspace_core_primitives::hashes::{blake3_hash, Blake3Hash}; use subspace_core_primitives::objects::GlobalObjectMapping; use subspace_data_retrieval::object_fetcher::{self, ObjectFetcher}; +use subspace_data_retrieval::piece_getter::PieceGetter; use tracing::debug; const SUBSPACE_ERROR: i32 = 9000; @@ -99,22 +100,32 @@ pub trait SubspaceGatewayRpcApi { #[method(name = "subspace_fetchObject")] async fn fetch_object(&self, mappings: GlobalObjectMapping) -> Result, Error>; } + /// Subspace Gateway RPC configuration -pub struct SubspaceGatewayRpcConfig { +pub struct SubspaceGatewayRpcConfig +where + PG: PieceGetter + Send + Sync + 'static, +{ /// DSN object fetcher instance. - pub object_fetcher: ObjectFetcher, + pub object_fetcher: ObjectFetcher, } /// Implements the [`SubspaceGatewayRpcApiServer`] trait for interacting with the Subspace Gateway. -pub struct SubspaceGatewayRpc { +pub struct SubspaceGatewayRpc +where + PG: PieceGetter + Send + Sync + 'static, +{ /// DSN object fetcher instance. - object_fetcher: ObjectFetcher, + object_fetcher: ObjectFetcher, } /// [`SubspaceGatewayRpc`] is used to fetch objects from the DSN. -impl SubspaceGatewayRpc { +impl SubspaceGatewayRpc +where + PG: PieceGetter + Send + Sync + 'static, +{ /// Creates a new instance of the `SubspaceGatewayRpc` handler. - pub fn new(config: SubspaceGatewayRpcConfig) -> Self { + pub fn new(config: SubspaceGatewayRpcConfig) -> Self { Self { object_fetcher: config.object_fetcher, } @@ -122,10 +133,11 @@ impl SubspaceGatewayRpc { } #[async_trait] -impl SubspaceGatewayRpcApiServer for SubspaceGatewayRpc { +impl SubspaceGatewayRpcApiServer for SubspaceGatewayRpc +where + PG: PieceGetter + Send + Sync + 'static, +{ async fn fetch_object(&self, mappings: GlobalObjectMapping) -> Result, Error> { - // TODO: deny unsafe RPC calls - let count = mappings.objects().len(); if count > MAX_OBJECTS_PER_REQUEST { debug!(%count, %MAX_OBJECTS_PER_REQUEST, "Too many mappings in request"); diff --git a/crates/subspace-gateway/Cargo.toml b/crates/subspace-gateway/Cargo.toml index 27e3b74049..351a80ea32 100644 --- a/crates/subspace-gateway/Cargo.toml +++ b/crates/subspace-gateway/Cargo.toml @@ -1,7 +1,10 @@ [package] name = "subspace-gateway" version = "0.1.0" -authors = ["Teor "] +authors = [ + "Teor ", + "Shamil Gadelshin " +] description = "A Subspace Network data gateway." edition = "2021" license = "MIT OR Apache-2.0" @@ -17,25 +20,28 @@ include = [ targets = ["x86_64-unknown-linux-gnu"] [dependencies] +actix-web = { version = "4", features = ["rustls"], default-features = false } +async-lock = "3.4.0" anyhow = "1.0.89" async-trait = "0.1.83" clap = { version = "4.5.18", features = ["derive"] } fdlimit = "0.3.0" futures = "0.3.31" hex = "0.4.3" -jsonrpsee = { version = "0.24.5", features = ["server"] } +jsonrpsee = { version = "0.24.5", features = ["server", "ws-client"] } mimalloc = "0.1.43" -parking_lot = "0.12.2" +reqwest = { version = "0.12.9", features = ["json", "rustls-tls"], default-features = false } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives" } subspace-data-retrieval = { version = "0.1.0", path = "../../shared/subspace-data-retrieval" } subspace-erasure-coding = { version = "0.1.0", path = "../subspace-erasure-coding" } subspace-gateway-rpc = { version = "0.1.0", path = "../subspace-gateway-rpc" } subspace-kzg = { version = "0.1.0", path = "../../shared/subspace-kzg" } +subspace-logging = { version = "0.0.1", path = "../../shared/subspace-logging" } subspace-networking = { version = "0.1.0", path = "../subspace-networking" } subspace-rpc-primitives = { version = "0.1.0", path = "../subspace-rpc-primitives" } subspace-verification = { version = "0.1.0", path = "../subspace-verification", default-features = false } supports-color = "3.0.1" -thiserror = "2.0.0" tokio = { version = "1.40.0", features = ["rt-multi-thread", "signal", "macros"] } tracing = "0.1.40" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } diff --git a/crates/subspace-gateway/README.md b/crates/subspace-gateway/README.md index e939f26bf8..c9d47d4cde 100644 --- a/crates/subspace-gateway/README.md +++ b/crates/subspace-gateway/README.md @@ -61,7 +61,7 @@ target/production/subspace-gateway --version Start a gateway connected to a single node development chain: ```bash -target/production/subspace-gateway run \ +target/production/subspace-gateway rpc \ --dev ``` diff --git a/crates/subspace-gateway/src/commands.rs b/crates/subspace-gateway/src/commands.rs index 159afc558f..439add72bf 100644 --- a/crates/subspace-gateway/src/commands.rs +++ b/crates/subspace-gateway/src/commands.rs @@ -1,27 +1,66 @@ //! Gateway subcommands. -pub(crate) mod run; +pub(crate) mod http; +pub(crate) mod network; +pub(crate) mod rpc; -use crate::commands::run::RunOptions; +use crate::commands::http::HttpCommandOptions; +use crate::commands::network::{configure_network, NetworkArgs}; +use crate::commands::rpc::RpcCommandOptions; +use crate::node_client::RpcNodeClient; +use crate::piece_getter::DsnPieceGetter; +use crate::piece_validator::SegmentCommitmentPieceValidator; +use anyhow::anyhow; +use async_lock::Semaphore; use clap::Parser; +use std::num::NonZeroUsize; use std::panic; use std::process::exit; +use std::sync::Arc; +use subspace_core_primitives::pieces::Record; +use subspace_data_retrieval::object_fetcher::ObjectFetcher; +use subspace_erasure_coding::ErasureCoding; +use subspace_kzg::Kzg; +use subspace_networking::utils::piece_provider::PieceProvider; +use subspace_networking::NodeRunner; use tokio::signal; -use tracing::level_filters::LevelFilter; use tracing::{debug, warn}; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::util::SubscriberInitExt; -use tracing_subscriber::{fmt, EnvFilter, Layer}; + +/// The default size limit, based on the maximum block size in some domains. +pub const DEFAULT_MAX_SIZE: usize = 5 * 1024 * 1024; +/// Multiplier on top of outgoing connections number for piece downloading purposes +const PIECE_PROVIDER_MULTIPLIER: usize = 10; /// Commands for working with a gateway. #[derive(Debug, Parser)] #[clap(about, version)] pub enum Command { - /// Run data gateway - Run(RunOptions), + /// Run data gateway with RPC server + Rpc(RpcCommandOptions), + /// Run data gateway with HTTP server + Http(HttpCommandOptions), // TODO: subcommand to run various benchmarks } +/// Options for running a gateway +#[derive(Debug, Parser)] +pub(crate) struct GatewayOptions { + /// Enable development mode. + /// + /// Implies following flags (unless customized): + /// * `--allow-private-ips` + #[arg(long, verbatim_doc_comment)] + dev: bool, + + /// The maximum object size to fetch. + /// Larger objects will return an error. + #[arg(long, default_value_t = DEFAULT_MAX_SIZE)] + max_size: usize, + + #[clap(flatten)] + dsn_options: NetworkArgs, +} + /// Install a panic handler which exits on panics, rather than unwinding. Unwinding can hang the /// tokio runtime waiting for stuck tasks or threads. pub(crate) fn set_exit_on_panic() { @@ -32,25 +71,6 @@ pub(crate) fn set_exit_on_panic() { })); } -pub(crate) fn init_logger() { - // TODO: Workaround for https://github.com/tokio-rs/tracing/issues/2214, also on - // Windows terminal doesn't support the same colors as bash does - let enable_color = if cfg!(windows) { - false - } else { - supports_color::on(supports_color::Stream::Stderr).is_some() - }; - tracing_subscriber::registry() - .with( - fmt::layer().with_ansi(enable_color).with_filter( - EnvFilter::builder() - .with_default_directive(LevelFilter::INFO.into()) - .from_env_lossy(), - ), - ) - .init(); -} - pub(crate) fn raise_fd_limit() { match fdlimit::raise_fd_limit() { Ok(fdlimit::Outcome::LimitRaised { from, to }) => { @@ -102,3 +122,46 @@ pub(crate) async fn shutdown_signal() { tracing::info!("Received Ctrl+C, shutting down gateway..."); } + +/// Configures and returns object fetcher and DSN node runner. +pub async fn initialize_object_fetcher( + options: GatewayOptions, +) -> anyhow::Result<( + ObjectFetcher>>, + NodeRunner<()>, +)> { + let GatewayOptions { + dev, + max_size, + mut dsn_options, + } = options; + // Development mode handling is limited to this section + { + if dev { + dsn_options.allow_private_ips = true; + } + } + + let kzg = Kzg::new(); + let erasure_coding = ErasureCoding::new( + NonZeroUsize::new(Record::NUM_S_BUCKETS.next_power_of_two().ilog2() as usize) + .expect("Not zero; qed"), + ) + .map_err(|error| anyhow!("Failed to instantiate erasure coding: {error}"))?; + + let out_connections = dsn_options.out_connections; + // TODO: move this service code into its own function, in a new library part of this crate + let (dsn_node, dsn_node_runner, node_client) = configure_network(dsn_options).await?; + + let piece_provider = PieceProvider::new( + dsn_node.clone(), + SegmentCommitmentPieceValidator::new(dsn_node, node_client, kzg), + Arc::new(Semaphore::new( + out_connections as usize * PIECE_PROVIDER_MULTIPLIER, + )), + ); + let piece_getter = DsnPieceGetter::new(piece_provider); + let object_fetcher = ObjectFetcher::new(piece_getter.into(), erasure_coding, Some(max_size)); + + Ok((object_fetcher, dsn_node_runner)) +} diff --git a/crates/subspace-gateway/src/commands/http.rs b/crates/subspace-gateway/src/commands/http.rs new file mode 100644 index 0000000000..43ef3d2e74 --- /dev/null +++ b/crates/subspace-gateway/src/commands/http.rs @@ -0,0 +1,65 @@ +//! Gateway http command. +//! This command start an HTTP server to serve object requests. + +pub(crate) mod server; + +use crate::commands::http::server::{start_server, ServerParameters}; +use crate::commands::{initialize_object_fetcher, shutdown_signal, GatewayOptions}; +use clap::Parser; +use futures::{select, FutureExt}; +use tracing::info; + +/// Options for HTTP server. +#[derive(Debug, Parser)] +pub(crate) struct HttpCommandOptions { + #[clap(flatten)] + gateway_options: GatewayOptions, + + #[arg(long, default_value = "127.0.0.1:3000")] + indexer_endpoint: String, + + #[arg(long, default_value = "127.0.0.1:8080")] + http_listen_on: String, +} + +/// Runs an HTTP server +pub async fn run(run_options: HttpCommandOptions) -> anyhow::Result<()> { + let signal = shutdown_signal(); + + let HttpCommandOptions { + gateway_options, + indexer_endpoint, + http_listen_on, + } = run_options; + + let (object_fetcher, mut dsn_node_runner) = initialize_object_fetcher(gateway_options).await?; + let dsn_fut = dsn_node_runner.run(); + + let server_params = ServerParameters { + object_fetcher, + indexer_endpoint, + http_endpoint: http_listen_on, + }; + let http_server_fut = start_server(server_params); + + // This defines order in which things are dropped + let dsn_fut = dsn_fut; + let http_server_fut = http_server_fut; + + select! { + // Signal future + () = signal.fuse() => {}, + + // Networking future + () = dsn_fut.fuse() => { + info!("DSN network runner exited."); + }, + + // HTTP service future + _ = http_server_fut.fuse() => { + info!("HTTP server exited."); + }, + } + + anyhow::Ok(()) +} diff --git a/crates/subspace-gateway/src/commands/http/server.rs b/crates/subspace-gateway/src/commands/http/server.rs new file mode 100644 index 0000000000..e9d23c33cd --- /dev/null +++ b/crates/subspace-gateway/src/commands/http/server.rs @@ -0,0 +1,151 @@ +use actix_web::{web, App, HttpResponse, HttpServer, Responder}; +use serde::{Deserialize, Deserializer, Serialize}; +use std::default::Default; +use std::sync::Arc; +use subspace_core_primitives::hashes::{blake3_hash, Blake3Hash}; +use subspace_core_primitives::pieces::PieceIndex; +use subspace_core_primitives::BlockNumber; +use subspace_data_retrieval::object_fetcher::ObjectFetcher; +use subspace_data_retrieval::piece_getter::PieceGetter; +use tracing::{debug, error, trace}; + +pub(crate) struct ServerParameters +where + PG: PieceGetter + Send + Sync + 'static, +{ + pub(crate) object_fetcher: ObjectFetcher, + pub(crate) indexer_endpoint: String, + pub(crate) http_endpoint: String, +} + +#[derive(Serialize, Deserialize, Debug, Default)] +#[serde(rename_all = "camelCase")] +struct ObjectMapping { + hash: Blake3Hash, + piece_index: PieceIndex, + piece_offset: u32, + #[serde(deserialize_with = "string_to_u32")] + block_number: BlockNumber, +} + +fn string_to_u32<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let s: String = Deserialize::deserialize(deserializer)?; + s.parse::().map_err(serde::de::Error::custom) +} + +async fn request_object_mappings(endpoint: String, key: String) -> anyhow::Result { + let client = reqwest::Client::new(); + let object_mappings_url = format!("http://{}/objects/{}", endpoint, key,); + + debug!(?key, ?object_mappings_url, "Requesting object mapping..."); + + let response = client + .get(object_mappings_url.clone()) + .send() + .await? + .json::() + .await; + match &response { + Ok(json) => { + trace!(?key, ?json, "Requested object mapping."); + } + Err(err) => { + error!(?key, ?err, ?object_mappings_url, "Request failed"); + } + } + + response.map_err(|err| err.into()) +} + +async fn serve_object( + key: web::Path, + additional_data: web::Data>>, +) -> impl Responder +where + PG: PieceGetter + Send + Sync + 'static, +{ + let server_params = additional_data.into_inner(); + let key = key.into_inner(); + + // Validate object hash + let decode_result = hex::decode(key.clone()); + let object_hash = match decode_result { + Ok(hash) => { + if hash.len() != Blake3Hash::SIZE { + error!(?key, ?hash, "Invalid hash provided."); + return HttpResponse::BadRequest().finish(); + } + + Blake3Hash::try_from(hash.as_slice()).expect("Hash size was confirmed.") + } + Err(err) => { + error!(?key, ?err, "Invalid hash provided."); + return HttpResponse::BadRequest().finish(); + } + }; + + let Ok(object_mapping) = + request_object_mappings(server_params.indexer_endpoint.clone(), key.clone()).await + else { + return HttpResponse::BadRequest().finish(); + }; + + if object_mapping.hash != object_hash { + error!( + ?key, + object_mapping_hash=?object_mapping.hash, + "Requested hash doesn't match object mapping." + ); + return HttpResponse::ServiceUnavailable().finish(); + } + + let object_fetcher_result = server_params + .object_fetcher + .fetch_object(object_mapping.piece_index, object_mapping.piece_offset) + .await; + + let object = match object_fetcher_result { + Ok(object) => { + trace!(?key, size=%object.len(), "Object fetched successfully"); + + let data_hash = blake3_hash(&object); + if data_hash != object_hash { + error!( + ?data_hash, + ?object_hash, + "Retrieved data did not match mapping hash" + ); + return HttpResponse::ServiceUnavailable().finish(); + } + + object + } + Err(err) => { + error!(?key, ?err, "Failed to fetch object."); + return HttpResponse::ServiceUnavailable().finish(); + } + }; + + HttpResponse::Ok() + .content_type("application/octet-stream") + .body(object) +} + +pub async fn start_server(server_params: ServerParameters) -> std::io::Result<()> +where + PG: PieceGetter + Send + Sync + 'static, +{ + let server_params = Arc::new(server_params); + let http_endpoint = server_params.http_endpoint.clone(); + HttpServer::new(move || { + App::new() + .app_data(web::Data::new(server_params.clone())) + .route("/data/{hash}", web::get().to(serve_object::)) + }) + .bind(http_endpoint)? + .run() + .await +} diff --git a/crates/subspace-gateway/src/commands/run/network.rs b/crates/subspace-gateway/src/commands/network.rs similarity index 99% rename from crates/subspace-gateway/src/commands/run/network.rs rename to crates/subspace-gateway/src/commands/network.rs index f8688c0038..f985c59a5a 100644 --- a/crates/subspace-gateway/src/commands/run/network.rs +++ b/crates/subspace-gateway/src/commands/network.rs @@ -37,7 +37,7 @@ pub(crate) struct NetworkArgs { /// Maximum established outgoing swarm connection limit. #[arg(long, default_value_t = 100)] - out_connections: u32, + pub(crate) out_connections: u32, /// Maximum pending outgoing swarm connection limit. #[arg(long, default_value_t = 100)] diff --git a/crates/subspace-gateway/src/commands/rpc.rs b/crates/subspace-gateway/src/commands/rpc.rs new file mode 100644 index 0000000000..0edd2c1a3d --- /dev/null +++ b/crates/subspace-gateway/src/commands/rpc.rs @@ -0,0 +1,63 @@ +//! Gateway rpc command. +//! This command start an RPC server to serve object requests. +pub(crate) mod server; + +use crate::commands::rpc::server::{launch_rpc_server, RpcOptions, RPC_DEFAULT_PORT}; +use crate::commands::{initialize_object_fetcher, shutdown_signal, GatewayOptions}; +use clap::Parser; +use futures::{select, FutureExt}; +use std::pin::pin; +use subspace_gateway_rpc::{SubspaceGatewayRpc, SubspaceGatewayRpcConfig}; +use tracing::info; + +/// Options for RPC server. +#[derive(Debug, Parser)] +pub(crate) struct RpcCommandOptions { + #[clap(flatten)] + gateway_options: GatewayOptions, + + /// Options for RPC + #[clap(flatten)] + rpc_options: RpcOptions, +} + +/// Runs an RPC server +pub async fn run(run_options: RpcCommandOptions) -> anyhow::Result<()> { + let signal = shutdown_signal(); + + let RpcCommandOptions { + gateway_options, + rpc_options, + } = run_options; + let (object_fetcher, mut dsn_node_runner) = initialize_object_fetcher(gateway_options).await?; + let dsn_fut = dsn_node_runner.run(); + + let rpc_api = SubspaceGatewayRpc::new(SubspaceGatewayRpcConfig { object_fetcher }); + let rpc_handle = launch_rpc_server(rpc_api, rpc_options).await?; + let rpc_fut = rpc_handle.stopped(); + + // This defines order in which things are dropped + let dsn_fut = dsn_fut; + let rpc_fut = rpc_fut; + + let dsn_fut = pin!(dsn_fut); + let rpc_fut = pin!(rpc_fut); + + select! { + // Signal future + () = signal.fuse() => {}, + + // Networking future + () = dsn_fut.fuse() => { + info!("DSN network runner exited."); + }, + + // RPC service future + () = rpc_fut.fuse() => { + info!("RPC server exited."); + }, + + } + + anyhow::Ok(()) +} diff --git a/crates/subspace-gateway/src/commands/run/rpc.rs b/crates/subspace-gateway/src/commands/rpc/server.rs similarity index 81% rename from crates/subspace-gateway/src/commands/run/rpc.rs rename to crates/subspace-gateway/src/commands/rpc/server.rs index a7e58bb234..74a5b3dae4 100644 --- a/crates/subspace-gateway/src/commands/run/rpc.rs +++ b/crates/subspace-gateway/src/commands/rpc/server.rs @@ -3,6 +3,7 @@ use clap::Parser; use jsonrpsee::server::{ServerBuilder, ServerHandle}; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_gateway_rpc::{SubspaceGatewayRpc, SubspaceGatewayRpcApiServer}; use tracing::info; @@ -27,10 +28,13 @@ pub(crate) struct RpcOptions { // - add an argument for a custom tokio runtime // - move this RPC code into a new library part of this crate // - make a RPC config that is independent of clap -pub async fn launch_rpc_server( - rpc_api: SubspaceGatewayRpc, - rpc_options: RpcOptions

, -) -> anyhow::Result { +pub async fn launch_rpc_server( + rpc_api: SubspaceGatewayRpc, + rpc_options: RpcOptions, +) -> anyhow::Result +where + PG: PieceGetter + Send + Sync + 'static, +{ let server = ServerBuilder::default() .build(rpc_options.rpc_listen_on) .await?; diff --git a/crates/subspace-gateway/src/commands/run.rs b/crates/subspace-gateway/src/commands/run.rs deleted file mode 100644 index ace0f5333b..0000000000 --- a/crates/subspace-gateway/src/commands/run.rs +++ /dev/null @@ -1,128 +0,0 @@ -//! Gateway run command. -//! This is the primary command for the gateway. - -mod network; -mod rpc; - -use crate::commands::run::network::{configure_network, NetworkArgs}; -use crate::commands::run::rpc::{launch_rpc_server, RpcOptions, RPC_DEFAULT_PORT}; -use crate::commands::shutdown_signal; -use crate::piece_getter::DsnPieceGetter; -use crate::piece_validator::SegmentCommitmentPieceValidator; -use anyhow::anyhow; -use clap::Parser; -use futures::{select, FutureExt}; -use std::env; -use std::num::NonZeroUsize; -use std::pin::pin; -use subspace_core_primitives::pieces::Record; -use subspace_data_retrieval::object_fetcher::ObjectFetcher; -use subspace_erasure_coding::ErasureCoding; -use subspace_gateway_rpc::{SubspaceGatewayRpc, SubspaceGatewayRpcConfig}; -use subspace_kzg::Kzg; -use tracing::info; - -/// The default size limit, based on the maximum block size in some domains. -pub const DEFAULT_MAX_SIZE: usize = 5 * 1024 * 1024; - -/// Options for running a node -#[derive(Debug, Parser)] -pub(crate) struct RunOptions { - #[clap(flatten)] - gateway: GatewayOptions, -} - -/// Options for running a gateway -#[derive(Debug, Parser)] -pub(crate) struct GatewayOptions { - /// Enable development mode. - /// - /// Implies following flags (unless customized): - /// * `--allow-private-ips` - #[arg(long, verbatim_doc_comment)] - dev: bool, - - /// The maximum object size to fetch. - /// Larger objects will return an error. - #[arg(long, default_value_t = DEFAULT_MAX_SIZE)] - max_size: usize, - - #[clap(flatten)] - dsn_options: NetworkArgs, - - /// Options for RPC - #[clap(flatten)] - rpc_options: RpcOptions, -} - -/// Default run command for gateway -pub async fn run(run_options: RunOptions) -> anyhow::Result<()> { - let signal = shutdown_signal(); - - let RunOptions { - gateway: - GatewayOptions { - dev, - max_size, - mut dsn_options, - rpc_options, - }, - } = run_options; - - // Development mode handling is limited to this section - { - if dev { - dsn_options.allow_private_ips = true; - } - } - - info!("Subspace Gateway"); - info!("✌️ version {}", env!("CARGO_PKG_VERSION")); - info!("❤️ by {}", env!("CARGO_PKG_AUTHORS")); - - let kzg = Kzg::new(); - let erasure_coding = ErasureCoding::new( - NonZeroUsize::new(Record::NUM_S_BUCKETS.next_power_of_two().ilog2() as usize) - .expect("Not zero; qed"), - ) - .map_err(|error| anyhow!("Failed to instantiate erasure coding: {error}"))?; - - // TODO: move this service code into its own function, in a new library part of this crate - let (dsn_node, mut dsn_node_runner, node_client) = configure_network(dsn_options).await?; - let dsn_fut = dsn_node_runner.run(); - - let piece_getter = DsnPieceGetter::new( - dsn_node.clone(), - SegmentCommitmentPieceValidator::new(dsn_node, node_client, kzg), - ); - let object_fetcher = ObjectFetcher::new(piece_getter, erasure_coding, Some(max_size)); - - let rpc_api = SubspaceGatewayRpc::new(SubspaceGatewayRpcConfig { object_fetcher }); - let rpc_handle = launch_rpc_server(rpc_api, rpc_options).await?; - let rpc_fut = rpc_handle.stopped(); - - // This defines order in which things are dropped - let dsn_fut = dsn_fut; - let rpc_fut = rpc_fut; - - let dsn_fut = pin!(dsn_fut); - let rpc_fut = pin!(rpc_fut); - - select! { - // Signal future - () = signal.fuse() => {}, - - // Networking future - () = dsn_fut.fuse() => { - info!("DSN network runner exited."); - }, - - // RPC service future - () = rpc_fut.fuse() => { - info!("RPC server exited."); - }, - - } - - anyhow::Ok(()) -} diff --git a/crates/subspace-gateway/src/main.rs b/crates/subspace-gateway/src/main.rs index d215a014cb..e2ee1fef70 100644 --- a/crates/subspace-gateway/src/main.rs +++ b/crates/subspace-gateway/src/main.rs @@ -5,8 +5,9 @@ mod node_client; mod piece_getter; mod piece_validator; -use crate::commands::{init_logger, raise_fd_limit, set_exit_on_panic, Command}; +use crate::commands::{raise_fd_limit, set_exit_on_panic, Command}; use clap::Parser; +use subspace_logging::init_logger; #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -17,11 +18,18 @@ async fn main() -> anyhow::Result<()> { init_logger(); raise_fd_limit(); + info!("Subspace Gateway"); + info!("✌️ version {}", env!("CARGO_PKG_VERSION")); + info!("❤️ by {}", env!("CARGO_PKG_AUTHORS")); + let command = Command::parse(); match command { - Command::Run(run_options) => { - commands::run::run(run_options).await?; + Command::Rpc(run_options) => { + commands::rpc::run(run_options).await?; + } + Command::Http(run_options) => { + commands::http::run(run_options).await?; } } Ok(()) diff --git a/crates/subspace-gateway/src/piece_getter.rs b/crates/subspace-gateway/src/piece_getter.rs index a13bedb289..1582467c34 100644 --- a/crates/subspace-gateway/src/piece_getter.rs +++ b/crates/subspace-gateway/src/piece_getter.rs @@ -2,18 +2,17 @@ use async_trait::async_trait; use futures::stream::StreamExt; +use futures::{FutureExt, Stream}; use std::fmt; -use std::ops::{Deref, DerefMut}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; -use subspace_data_retrieval::piece_getter::{BoxError, ObjectPieceGetter}; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_networking::utils::piece_provider::{PieceProvider, PieceValidator}; -use subspace_networking::Node; /// The maximum number of peer-to-peer walking rounds for L1 archival storage. const MAX_RANDOM_WALK_ROUNDS: usize = 15; -/// Wrapper type for PieceProvider, so it can implement ObjectPieceGetter. -pub struct DsnPieceGetter(pub PieceProvider); +/// Wrapper type for [`PieceProvider`], so it can implement [`PieceGetter`] +pub struct DsnPieceGetter(PieceProvider); impl fmt::Debug for DsnPieceGetter where @@ -26,37 +25,17 @@ where } } -impl Deref for DsnPieceGetter -where - PV: PieceValidator, -{ - type Target = PieceProvider; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for DsnPieceGetter -where - PV: PieceValidator, -{ - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - // TODO: -// - change ObjectPieceGetter trait to take a list of piece indexes +// - reconstruct segment if piece is missing // - move this piece getter impl into a new library part of this crate #[async_trait] -impl ObjectPieceGetter for DsnPieceGetter +impl PieceGetter for DsnPieceGetter where PV: PieceValidator, { - async fn get_piece(&self, piece_index: PieceIndex) -> Result, BoxError> { + async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { if let Some((got_piece_index, maybe_piece)) = - self.get_from_cache([piece_index]).await.next().await + self.0.get_from_cache([piece_index]).await.next().await { assert_eq!(piece_index, got_piece_index); @@ -66,9 +45,37 @@ where } Ok(self + .0 .get_piece_from_archival_storage(piece_index, MAX_RANDOM_WALK_ROUNDS) .await) } + + async fn get_pieces<'a>( + &'a self, + piece_indices: Vec, + ) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, + > { + let stream = + self.0 + .get_from_cache(piece_indices) + .await + .then(move |(piece_index, maybe_piece)| { + let fut = async move { + if let Some(piece) = maybe_piece { + return (piece_index, Ok(Some(piece))); + } + + self.0 + .get_piece_from_archival_storage(piece_index, MAX_RANDOM_WALK_ROUNDS) + .map(|piece| (piece_index, Ok(piece))) + .await + }; + Box::pin(fut) + }); + + Ok(Box::new(stream)) + } } impl DsnPieceGetter @@ -76,7 +83,7 @@ where PV: PieceValidator, { /// Creates new DSN piece getter. - pub fn new(node: Node, piece_validator: PV) -> Self { - Self(PieceProvider::new(node, piece_validator)) + pub fn new(piece_provider: PieceProvider) -> Self { + Self(piece_provider) } } diff --git a/crates/subspace-malicious-operator/src/bin/subspace-malicious-operator.rs b/crates/subspace-malicious-operator/src/bin/subspace-malicious-operator.rs index a28c6059a2..7727865a2a 100644 --- a/crates/subspace-malicious-operator/src/bin/subspace-malicious-operator.rs +++ b/crates/subspace-malicious-operator/src/bin/subspace-malicious-operator.rs @@ -200,9 +200,8 @@ fn main() -> Result<(), Error> { base: consensus_chain_config, // Domain node needs slots notifications for bundle production. force_new_slot_notifications: true, - create_object_mappings: CreateObjectMappings::Block(0), + create_object_mappings: CreateObjectMappings::No, subspace_networking: SubspaceNetworking::Create { config: dsn_config }, - dsn_piece_getter: None, sync: Default::default(), is_timekeeper: false, timekeeper_cpu_cores: Default::default(), @@ -355,7 +354,6 @@ fn main() -> Result<(), Error> { _, _, _, - DomainBlock, _, _, >( diff --git a/crates/subspace-malicious-operator/src/malicious_bundle_producer.rs b/crates/subspace-malicious-operator/src/malicious_bundle_producer.rs index 9a66c0a694..d80594d78b 100644 --- a/crates/subspace-malicious-operator/src/malicious_bundle_producer.rs +++ b/crates/subspace-malicious-operator/src/malicious_bundle_producer.rs @@ -1,5 +1,5 @@ use crate::malicious_bundle_tamper::MaliciousBundleTamper; -use domain_client_operator::domain_bundle_producer::DomainBundleProducer; +use domain_client_operator::domain_bundle_producer::{BundleProducer, TestBundleProducer}; use domain_client_operator::domain_bundle_proposer::DomainBundleProposer; use domain_client_operator::{OpaqueBundleFor, OperatorSlotInfo}; use domain_runtime_primitives::opaque::Block as DomainBlock; @@ -18,10 +18,7 @@ use sp_blockchain::Info; use sp_consensus_slots::Slot; use sp_core::crypto::UncheckedFrom; use sp_domains::core_api::DomainCoreApi; -use sp_domains::{ - BundleProducerElectionApi, DomainId, DomainsApi, OperatorId, OperatorPublicKey, - OperatorSignature, OperatorSigningKeyProofOfOwnershipData, -}; +use sp_domains::{BundleProducerElectionApi, DomainId, DomainsApi, OperatorId, OperatorPublicKey}; use sp_keyring::Sr25519Keyring; use sp_keystore::{Keystore, KeystorePtr}; use sp_messenger::MessengerApi; @@ -89,7 +86,7 @@ pub struct MaliciousBundleProducer { operator_keystore: KeystorePtr, consensus_client: Arc, consensus_offchain_tx_pool_factory: OffchainTransactionPoolFactory, - bundle_producer: DomainBundleProducer, + bundle_producer: TestBundleProducer, malicious_bundle_tamper: MaliciousBundleTamper, malicious_operator_status: MaliciousOperatorStatus, } @@ -135,7 +132,7 @@ where ); let (bundle_sender, _bundle_receiver) = tracing_unbounded("domain_bundle_stream", 100); - let bundle_producer = DomainBundleProducer::new( + let bundle_producer = TestBundleProducer::new( domain_id, consensus_client.clone(), domain_client.clone(), @@ -272,25 +269,21 @@ where } }; - let data = OperatorSigningKeyProofOfOwnershipData { - operator_owner: self.sudo_account.clone(), - }; - let signature = OperatorSignature::from( - self.operator_keystore - .sr25519_sign( - OperatorPublicKey::ID, - signing_key.clone().as_ref(), - &data.encode(), - )? - .expect("key pair must be avaible on keystore for signing"), - ); - - let maybe_operator_id = self - .consensus_client - .runtime_api() - .operator_id_by_signing_key(consensus_best_hash, signing_key.clone())?; + let mut maybe_operator_id = None; + for operator_id in current_operators.keys().chain(next_operators.iter()) { + if let Some((operator_signing_key, _)) = self + .consensus_client + .runtime_api() + .operator(consensus_best_hash, *operator_id)? + { + if operator_signing_key == signing_key { + maybe_operator_id = Some(*operator_id); + break; + } + } + } - // The `signing_key` is linked to a operator means the previous registeration request is succeeded + // If the `signing_key` is linked to a operator, the previous registration request succeeded, // otherwise we need to retry match maybe_operator_id { None => { @@ -299,7 +292,6 @@ where self.submit_register_operator( nonce, signing_key, - signature, // Ideally we should use the `next_total_stake` but it is tricky to get MALICIOUS_OPR_STAKE_MULTIPLIER * current_total_stake, )?; @@ -347,7 +339,6 @@ where &self, nonce: Nonce, signing_key: OperatorPublicKey, - signature: OperatorSignature, staking_amount: Balance, ) -> Result<(), Box> { let call = pallet_domains::Call::register_operator { @@ -358,7 +349,6 @@ where minimum_nominator_stake: Balance::MAX, nomination_tax: Default::default(), }, - signing_key_proof_of_ownership: signature, }; self.submit_consensus_extrinsic(Some(nonce), call.into()) } diff --git a/crates/subspace-networking/Cargo.toml b/crates/subspace-networking/Cargo.toml index f512556e9d..1f6184dbb8 100644 --- a/crates/subspace-networking/Cargo.toml +++ b/crates/subspace-networking/Cargo.toml @@ -16,13 +16,12 @@ include = [ ] [dependencies] -async-mutex = "1.4.0" +async-lock = "3.4.0" async-trait = "0.1.83" backoff = { version = "0.4.0", features = ["futures", "tokio"] } bytes = "1.7.2" clap = { version = "4.5.18", features = ["color", "derive"] } derive_more = { version = "1.0.0", features = ["full"] } -either = "1.13.0" event-listener-primitives = "2.0.1" # TODO: Switch to fs4 once https://github.com/al8n/fs4-rs/issues/15 is resolved fs2 = "0.4.3" @@ -41,15 +40,16 @@ schnellru = "0.2.3" serde = { version = "1.0.110", features = ["derive"] } serde_json = "1.0.128" subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives" } +subspace-logging = { version = "0.0.1", path = "../../shared/subspace-logging" } subspace-metrics = { version = "0.1.0", path = "../../shared/subspace-metrics" } thiserror = "2.0.0" tokio = { version = "1.40.0", features = ["macros", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] } tokio-stream = "0.1.16" tracing = "0.1.40" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } unsigned-varint = { version = "0.8.0", features = ["futures", "asynchronous_codec"] } void = "1.0.2" + [dependencies.libp2p] # TODO: Replace with upstream once https://github.com/libp2p/rust-libp2p/issues/5626 and # https://github.com/libp2p/rust-libp2p/issues/5634 are resolved diff --git a/crates/subspace-networking/examples/benchmark.rs b/crates/subspace-networking/examples/benchmark.rs index 3f29fd1580..250176d16f 100644 --- a/crates/subspace-networking/examples/benchmark.rs +++ b/crates/subspace-networking/examples/benchmark.rs @@ -1,3 +1,4 @@ +use async_lock::Semaphore; use backoff::future::retry; use backoff::ExponentialBackoff; use clap::Parser; @@ -14,14 +15,11 @@ use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; use std::time::{Duration, Instant}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; +use subspace_logging::init_logger; use subspace_networking::protocols::request_response::handlers::piece_by_index::PieceByIndexRequestHandler; use subspace_networking::utils::piece_provider::{NoPieceValidator, PieceProvider, PieceValidator}; use subspace_networking::{Config, Node}; -use tokio::sync::Semaphore; use tracing::{debug, error, info, trace, warn, Level}; -use tracing_subscriber::fmt::Subscriber; -use tracing_subscriber::util::SubscriberInitExt; -use tracing_subscriber::EnvFilter; /// Defines initial duration between get_piece calls. const GET_PIECE_INITIAL_INTERVAL: Duration = Duration::from_secs(5); @@ -128,8 +126,7 @@ enum Command { #[tokio::main] async fn main() { - init_logging(); - + init_logger(); let args: Args = Args::parse(); info!(?args, "Benchmark started."); @@ -214,7 +211,7 @@ async fn simple_benchmark(node: Node, max_pieces: usize, start_with: usize, retr return; } - let piece_provider = PieceProvider::new(node, NoPieceValidator); + let piece_provider = PieceProvider::new(node, NoPieceValidator, Arc::new(Semaphore::new(100))); let mut total_duration = Duration::default(); for i in start_with..(start_with + max_pieces) { let piece_index = PieceIndex::from(i as u64); @@ -266,7 +263,11 @@ async fn parallel_benchmark( let semaphore = &Semaphore::new(parallelism_level.into()); - let piece_provider = &PieceProvider::new(node, NoPieceValidator); + let piece_provider = &PieceProvider::new( + node, + NoPieceValidator, + Arc::new(Semaphore::new(parallelism_level.into())), + ); let mut total_duration = Duration::default(); let mut pure_total_duration = Duration::default(); let mut pending_pieces = (start_with..(start_with + max_pieces)) @@ -277,10 +278,7 @@ async fn parallel_benchmark( async move { let start = Instant::now(); - let permit = semaphore - .acquire() - .await - .expect("Semaphore cannot be closed."); + let permit = semaphore.acquire().await; let semaphore_acquired = Instant::now(); let maybe_piece = get_piece_from_dsn_cache_with_retries( piece_provider, @@ -395,14 +393,3 @@ pub async fn configure_dsn( node } - -fn init_logging() { - // set default log to info if the RUST_LOG is not set. - let env_filter = EnvFilter::builder() - .with_default_directive(Level::INFO.into()) - .from_env_lossy(); - - let builder = Subscriber::builder().with_env_filter(env_filter).finish(); - - builder.init() -} diff --git a/crates/subspace-networking/examples/get-peers.rs b/crates/subspace-networking/examples/get-peers.rs index acf345bfe1..521bc00029 100644 --- a/crates/subspace-networking/examples/get-peers.rs +++ b/crates/subspace-networking/examples/get-peers.rs @@ -4,11 +4,12 @@ use libp2p::multiaddr::Protocol; use parking_lot::Mutex; use std::sync::Arc; use std::time::Duration; +use subspace_logging::init_logger; use subspace_networking::Config; #[tokio::main] async fn main() { - tracing_subscriber::fmt::init(); + init_logger(); let config_1 = Config { listen_on: vec!["/ip4/0.0.0.0/tcp/0".parse().unwrap()], diff --git a/crates/subspace-networking/examples/metrics.rs b/crates/subspace-networking/examples/metrics.rs index 51abd409bb..85d6820834 100644 --- a/crates/subspace-networking/examples/metrics.rs +++ b/crates/subspace-networking/examples/metrics.rs @@ -7,6 +7,7 @@ use parking_lot::Mutex; use prometheus_client::registry::Registry; use std::sync::Arc; use std::time::Duration; +use subspace_logging::init_logger; use subspace_metrics::{start_prometheus_metrics_server, RegistryAdapter}; use subspace_networking::{Config, Node}; use tokio::signal; @@ -15,7 +16,7 @@ use tracing::{error, info}; #[tokio::main] async fn main() { - tracing_subscriber::fmt::init(); + init_logger(); let mut metric_registry = Registry::default(); let metrics = Metrics::new(&mut metric_registry); diff --git a/crates/subspace-networking/examples/networking.rs b/crates/subspace-networking/examples/networking.rs index 08fc896b41..8d36096744 100644 --- a/crates/subspace-networking/examples/networking.rs +++ b/crates/subspace-networking/examples/networking.rs @@ -7,13 +7,14 @@ use libp2p::multiaddr::Protocol; use parking_lot::Mutex; use std::sync::Arc; use std::time::Duration; +use subspace_logging::init_logger; use subspace_networking::Config; const TOPIC: &str = "Foo"; #[tokio::main] async fn main() { - tracing_subscriber::fmt::init(); + init_logger(); let config_1 = Config { listen_on: vec!["/ip4/0.0.0.0/tcp/0".parse().unwrap()], diff --git a/crates/subspace-networking/examples/random-walker.rs b/crates/subspace-networking/examples/random-walker.rs index 1e8a27f417..05c5c9ea29 100644 --- a/crates/subspace-networking/examples/random-walker.rs +++ b/crates/subspace-networking/examples/random-walker.rs @@ -10,14 +10,12 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; use subspace_core_primitives::pieces::PieceIndex; +use subspace_logging::init_logger; use subspace_networking::protocols::request_response::handlers::piece_by_index::{ PieceByIndexRequest, PieceByIndexRequestHandler, PieceByIndexResponse, }; use subspace_networking::{Config, Multihash, Node, PeerDiscovered, SendRequestError}; use tracing::{debug, error, info, warn, Level}; -use tracing_subscriber::fmt::Subscriber; -use tracing_subscriber::util::SubscriberInitExt; -use tracing_subscriber::EnvFilter; #[derive(Debug, Parser)] struct Args { @@ -47,7 +45,7 @@ struct Args { #[tokio::main] async fn main() { - init_logging(); + init_logger(); let args: Args = Args::parse(); @@ -416,14 +414,3 @@ async fn configure_dsn( node } - -fn init_logging() { - // set default log to info if the RUST_LOG is not set. - let env_filter = EnvFilter::builder() - .with_default_directive(Level::INFO.into()) - .from_env_lossy(); - - let builder = Subscriber::builder().with_env_filter(env_filter).finish(); - - builder.init() -} diff --git a/crates/subspace-networking/examples/requests.rs b/crates/subspace-networking/examples/requests.rs index a9d178fad4..4bf99eb62a 100644 --- a/crates/subspace-networking/examples/requests.rs +++ b/crates/subspace-networking/examples/requests.rs @@ -4,6 +4,7 @@ use parity_scale_codec::{Decode, Encode}; use parking_lot::Mutex; use std::sync::Arc; use std::time::Duration; +use subspace_logging::init_logger; use subspace_networking::protocols::request_response::handlers::generic_request_handler::{ GenericRequest, GenericRequestHandler, }; @@ -24,7 +25,7 @@ struct ExampleResponse; #[tokio::main] async fn main() { - tracing_subscriber::fmt::init(); + init_logger(); let config_1 = Config { listen_on: vec!["/ip4/0.0.0.0/tcp/0".parse().unwrap()], diff --git a/crates/subspace-networking/src/behavior.rs b/crates/subspace-networking/src/behavior.rs index 2666fe924d..cb7ceac09e 100644 --- a/crates/subspace-networking/src/behavior.rs +++ b/crates/subspace-networking/src/behavior.rs @@ -42,6 +42,9 @@ pub(crate) struct BehaviorConfig { pub(crate) record_store: RecordStore, /// The configuration for the [`RequestResponsesBehaviour`] protocol. pub(crate) request_response_protocols: Vec>, + /// The upper bound for the number of concurrent inbound + outbound streams for request/response + /// protocols. + pub(crate) request_response_max_concurrent_streams: usize, /// Connection limits for the swarm. pub(crate) connection_limits: ConnectionLimits, /// The configuration for the [`ReservedPeersBehaviour`]. @@ -97,6 +100,7 @@ where ping: Ping::default(), request_response: RequestResponseFactoryBehaviour::new( config.request_response_protocols, + config.request_response_max_concurrent_streams, ) //TODO: Convert to an error. .expect("RequestResponse protocols registration failed."), diff --git a/crates/subspace-networking/src/bin/subspace-bootstrap-node/main.rs b/crates/subspace-networking/src/bin/subspace-bootstrap-node/main.rs index cfbc9cea15..9453c15c28 100644 --- a/crates/subspace-networking/src/bin/subspace-bootstrap-node/main.rs +++ b/crates/subspace-networking/src/bin/subspace-bootstrap-node/main.rs @@ -15,13 +15,11 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::panic; use std::process::exit; use std::sync::Arc; +use subspace_logging::init_logger; use subspace_metrics::{start_prometheus_metrics_server, RegistryAdapter}; use subspace_networking::libp2p::multiaddr::Protocol; use subspace_networking::{peer_id, Config, KademliaMode}; -use tracing::{debug, info, Level}; -use tracing_subscriber::fmt::Subscriber; -use tracing_subscriber::util::SubscriberInitExt; -use tracing_subscriber::EnvFilter; +use tracing::{debug, info}; /// Size of the LRU cache for peers. pub const KNOWN_PEERS_CACHE_SIZE: u32 = 10000; @@ -117,22 +115,10 @@ fn set_exit_on_panic() { })); } -fn init_logging() { - // set default log to info if the RUST_LOG is not set. - let env_filter = EnvFilter::builder() - .with_default_directive(Level::INFO.into()) - .from_env_lossy(); - - let builder = Subscriber::builder().with_env_filter(env_filter).finish(); - - builder.init() -} - #[tokio::main] async fn main() -> Result<(), Box> { set_exit_on_panic(); - init_logging(); - + init_logger(); let command: Command = Command::parse(); match command { diff --git a/crates/subspace-networking/src/constructor.rs b/crates/subspace-networking/src/constructor.rs index 5defa20517..70b29f433f 100644 --- a/crates/subspace-networking/src/constructor.rs +++ b/crates/subspace-networking/src/constructor.rs @@ -61,7 +61,8 @@ const SWARM_MAX_PENDING_INCOMING_CONNECTIONS: u32 = 80; /// The default maximum pending incoming connection number for the swarm. const SWARM_MAX_PENDING_OUTGOING_CONNECTIONS: u32 = 80; const KADEMLIA_QUERY_TIMEOUT: Duration = Duration::from_secs(40); -const SWARM_MAX_ESTABLISHED_CONNECTIONS_PER_PEER: Option = Some(3); +const SWARM_MAX_ESTABLISHED_CONNECTIONS_PER_PEER: u32 = 3; +const MAX_CONCURRENT_STREAMS_PER_CONNECTION: usize = 10; // TODO: Consider moving this constant to configuration or removing `Toggle` wrapper when we find a // use-case for gossipsub protocol. const ENABLE_GOSSIP_PROTOCOL: bool = false; @@ -441,7 +442,7 @@ where ); let connection_limits = ConnectionLimits::default() - .with_max_established_per_peer(SWARM_MAX_ESTABLISHED_CONNECTIONS_PER_PEER) + .with_max_established_per_peer(Some(SWARM_MAX_ESTABLISHED_CONNECTIONS_PER_PEER)) .with_max_pending_incoming(Some(max_pending_incoming_connections)) .with_max_pending_outgoing(Some(max_pending_outgoing_connections)) .with_max_established_incoming(Some(max_established_incoming_connections)) @@ -469,6 +470,11 @@ where gossipsub, record_store: LocalOnlyRecordStore::new(local_records_provider), request_response_protocols, + request_response_max_concurrent_streams: { + let max_num_connections = max_established_incoming_connections as usize + + max_established_outgoing_connections as usize; + max_num_connections * MAX_CONCURRENT_STREAMS_PER_CONNECTION + }, connection_limits, reserved_peers: ReservedPeersConfig { reserved_peers: reserved_peers.clone(), diff --git a/crates/subspace-networking/src/lib.rs b/crates/subspace-networking/src/lib.rs index 132d142ea7..b13554da37 100644 --- a/crates/subspace-networking/src/lib.rs +++ b/crates/subspace-networking/src/lib.rs @@ -16,7 +16,7 @@ //! Networking functionality of Subspace Network, primarily used for DSN (Distributed Storage //! Network). -#![feature(impl_trait_in_assoc_type, ip, try_blocks)] +#![feature(exact_size_is_empty, impl_trait_in_assoc_type, ip, try_blocks)] #![warn(missing_docs)] mod behavior; diff --git a/crates/subspace-networking/src/node.rs b/crates/subspace-networking/src/node.rs index be429d5a73..8c622e60bd 100644 --- a/crates/subspace-networking/src/node.rs +++ b/crates/subspace-networking/src/node.rs @@ -571,7 +571,7 @@ impl Node { pub async fn connected_peers(&self) -> Result, ConnectedPeersError> { let (result_sender, result_receiver) = oneshot::channel(); - trace!("Starting 'connected_peers' request."); + trace!("Starting `connected_peers` request"); self.shared .command_sender @@ -584,11 +584,28 @@ impl Node { .map_err(|_| ConnectedPeersError::ConnectedPeers) } + /// Returns a collection of currently connected servers (typically farmers). + pub async fn connected_servers(&self) -> Result, ConnectedPeersError> { + let (result_sender, result_receiver) = oneshot::channel(); + + trace!("Starting `connected_servers` request."); + + self.shared + .command_sender + .clone() + .send(Command::ConnectedServers { result_sender }) + .await?; + + result_receiver + .await + .map_err(|_| ConnectedPeersError::ConnectedPeers) + } + /// Bootstraps Kademlia network pub async fn bootstrap(&self) -> Result<(), BootstrapError> { let (result_sender, mut result_receiver) = mpsc::unbounded(); - debug!("Starting 'bootstrap' request."); + debug!("Starting `bootstrap` request"); self.shared .command_sender diff --git a/crates/subspace-networking/src/node/tests.rs b/crates/subspace-networking/src/node/tests.rs index a76edf1471..ac124b05c7 100644 --- a/crates/subspace-networking/src/node/tests.rs +++ b/crates/subspace-networking/src/node/tests.rs @@ -7,6 +7,7 @@ use libp2p::multiaddr::Protocol; use parity_scale_codec::{Decode, Encode}; use parking_lot::Mutex; use std::sync::Arc; +use subspace_logging::init_logger; #[derive(Encode, Decode)] struct ExampleRequest; @@ -22,7 +23,7 @@ struct ExampleResponse; #[tokio::test] async fn request_with_addresses() { - tracing_subscriber::fmt::init(); + init_logger(); let config_1 = Config { listen_on: vec!["/ip4/0.0.0.0/tcp/0".parse().unwrap()], diff --git a/crates/subspace-networking/src/node_runner.rs b/crates/subspace-networking/src/node_runner.rs index 77bf983b74..8c85e1e2f8 100644 --- a/crates/subspace-networking/src/node_runner.rs +++ b/crates/subspace-networking/src/node_runner.rs @@ -10,7 +10,7 @@ use crate::protocols::request_response::request_response_factory::{ }; use crate::shared::{Command, CreatedSubscription, PeerDiscovered, Shared}; use crate::utils::{is_global_address_or_dns, strip_peer_id, SubspaceMetrics}; -use async_mutex::Mutex as AsyncMutex; +use async_lock::Mutex as AsyncMutex; use bytes::Bytes; use event_listener_primitives::HandlerId; use futures::channel::mpsc; @@ -108,6 +108,7 @@ where periodical_tasks_interval: Pin>>, /// Manages the networking parameters like known peers and addresses known_peers_registry: Box, + connected_servers: HashSet, /// Defines set of peers with a permanent connection (and reconnection if necessary). reserved_peers: HashMap, /// Temporarily banned peers. @@ -211,6 +212,7 @@ where // We'll make the first dial right away and continue at the interval. periodical_tasks_interval: Box::pin(tokio::time::sleep(Duration::from_secs(0)).fuse()), known_peers_registry, + connected_servers: HashSet::new(), reserved_peers, temporary_bans, libp2p_metrics, @@ -569,6 +571,7 @@ where if num_established == 0 { self.peer_ip_addresses.remove(&peer_id); + self.connected_servers.remove(&peer_id); } let num_established_peer_connections = shared .num_established_peer_connections @@ -831,6 +834,8 @@ where kademlia.remove_address(&peer_id, &old_address); } + + self.connected_servers.insert(peer_id); } else { debug!( %local_peer_id, @@ -841,6 +846,7 @@ where ); kademlia.remove_peer(&peer_id); + self.connected_servers.remove(&peer_id); } } } @@ -1475,6 +1481,11 @@ where let _ = result_sender.send(connected_peers); } + Command::ConnectedServers { result_sender } => { + let connected_servers = self.connected_servers.iter().cloned().collect(); + + let _ = result_sender.send(connected_servers); + } Command::Bootstrap { result_sender } => { let kademlia = &mut self.swarm.behaviour_mut().kademlia; diff --git a/crates/subspace-networking/src/protocols/request_response/request_response_factory.rs b/crates/subspace-networking/src/protocols/request_response/request_response_factory.rs index 9c576158fa..cbac6eb043 100644 --- a/crates/subspace-networking/src/protocols/request_response/request_response_factory.rs +++ b/crates/subspace-networking/src/protocols/request_response/request_response_factory.rs @@ -323,6 +323,7 @@ impl RequestResponseFactoryBehaviour { /// the same protocol is passed twice. pub fn new( list: impl IntoIterator>, + max_concurrent_streams: usize, ) -> Result { let mut protocols = HashMap::new(); let mut request_handlers = Vec::new(); @@ -341,7 +342,9 @@ impl RequestResponseFactoryBehaviour { max_response_size: config.max_response_size, }, iter::once(StreamProtocol::new(config.name)).zip(iter::repeat(protocol_support)), - RequestResponseConfig::default().with_request_timeout(config.request_timeout), + RequestResponseConfig::default() + .with_request_timeout(config.request_timeout) + .with_max_concurrent_streams(max_concurrent_streams), ); match protocols.entry(Cow::Borrowed(config.name)) { diff --git a/crates/subspace-networking/src/protocols/request_response/request_response_factory/tests.rs b/crates/subspace-networking/src/protocols/request_response/request_response_factory/tests.rs index 1c6a99de64..20916d93fe 100644 --- a/crates/subspace-networking/src/protocols/request_response/request_response_factory/tests.rs +++ b/crates/subspace-networking/src/protocols/request_response/request_response_factory/tests.rs @@ -41,7 +41,7 @@ async fn build_swarm( .into_iter() .map(|config| Box::new(MockRunner(config)) as Box) .collect::>(); - let behaviour = RequestResponseFactoryBehaviour::new(configs).unwrap(); + let behaviour = RequestResponseFactoryBehaviour::new(configs, 100).unwrap(); let mut swarm = SwarmBuilder::with_new_identity() .with_tokio() diff --git a/crates/subspace-networking/src/shared.rs b/crates/subspace-networking/src/shared.rs index 8aacbda525..edde4d8586 100644 --- a/crates/subspace-networking/src/shared.rs +++ b/crates/subspace-networking/src/shared.rs @@ -108,6 +108,9 @@ pub(crate) enum Command { ConnectedPeers { result_sender: oneshot::Sender>, }, + ConnectedServers { + result_sender: oneshot::Sender>, + }, Bootstrap { // No result sender means background async bootstrapping result_sender: Option>, diff --git a/crates/subspace-networking/src/utils/piece_provider.rs b/crates/subspace-networking/src/utils/piece_provider.rs index 5c5c877935..0b90093a8d 100644 --- a/crates/subspace-networking/src/utils/piece_provider.rs +++ b/crates/subspace-networking/src/utils/piece_provider.rs @@ -8,6 +8,7 @@ use crate::protocols::request_response::handlers::piece_by_index::{ }; use crate::utils::multihash::ToMultihash; use crate::{Multihash, Node}; +use async_lock::{Semaphore, SemaphoreGuard}; use async_trait::async_trait; use futures::channel::mpsc; use futures::future::FusedFuture; @@ -18,15 +19,15 @@ use libp2p::kad::store::RecordStore; use libp2p::kad::{store, Behaviour as Kademlia, KBucketKey, ProviderRecord, Record, RecordKey}; use libp2p::swarm::NetworkBehaviour; use libp2p::{Multiaddr, PeerId}; -use parking_lot::Mutex; use rand::prelude::*; use std::any::type_name; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::iter::Empty; +use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use std::{fmt, iter, mem}; +use std::{fmt, iter}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; use tokio_stream::StreamMap; use tracing::{debug, trace, warn, Instrument}; @@ -56,9 +57,11 @@ impl PieceValidator for NoPieceValidator { /// Piece provider with cancellation and piece validator. /// Use `NoPieceValidator` to disable validation. +#[derive(Clone)] pub struct PieceProvider { node: Node, piece_validator: PV, + piece_downloading_semaphore: Arc, } impl fmt::Debug for PieceProvider { @@ -74,10 +77,15 @@ where PV: PieceValidator, { /// Creates new piece provider. - pub fn new(node: Node, piece_validator: PV) -> Self { + pub fn new( + node: Node, + piece_validator: PV, + piece_downloading_semaphore: Arc, + ) -> Self { Self { node, piece_validator, + piece_downloading_semaphore, } } @@ -91,14 +99,32 @@ where where PieceIndices: IntoIterator + 'a, { + let download_id = random::(); let (tx, mut rx) = mpsc::unbounded(); - let fut = get_from_cache_inner( - piece_indices.into_iter(), - &self.node, - &self.piece_validator, - tx, - ); - let mut fut = Box::pin(fut.fuse()); + let fut = async move { + let not_downloaded_pieces = download_cached_pieces( + piece_indices.into_iter(), + &self.node, + &self.piece_validator, + &tx, + &self.piece_downloading_semaphore, + ) + .await; + + if not_downloaded_pieces.is_empty() { + debug!("Done"); + return; + } + + for piece_index in not_downloaded_pieces { + tx.unbounded_send((piece_index, None)) + .expect("This future isn't polled after receiver is dropped; qed"); + } + + debug!("Done #2"); + }; + + let mut fut = Box::pin(fut.instrument(tracing::info_span!("", %download_id)).fuse()); // Drive above future and stream back any pieces that were downloaded so far stream::poll_fn(move |cx| { @@ -247,9 +273,9 @@ where // TODO: consider using retry policy for L1 lookups as well. trace!(%piece_index, "Getting piece from archival storage.."); - let connected_peers = { - let connected_peers = match self.node.connected_peers().await { - Ok(connected_peers) => connected_peers, + let connected_servers = { + let connected_servers = match self.node.connected_servers().await { + Ok(connected_servers) => connected_servers, Err(err) => { debug!(%piece_index, ?err, "Cannot get connected peers (DSN L1 lookup)"); @@ -257,13 +283,13 @@ where } }; - HashSet::::from_iter(connected_peers) + HashSet::::from_iter(connected_servers) }; - if connected_peers.is_empty() { + if connected_servers.is_empty() { debug!(%piece_index, "Cannot acquire piece from no connected peers (DSN L1 lookup)"); } else { - for peer_id in connected_peers.iter() { + for peer_id in connected_servers.iter() { let maybe_piece = self.get_piece_from_peer(*peer_id, piece_index).await; if maybe_piece.is_some() { @@ -484,58 +510,15 @@ impl KademliaWrapper { } } -async fn get_from_cache_inner( - piece_indices: PieceIndices, - node: &Node, - piece_validator: &PV, - results: mpsc::UnboundedSender<(PieceIndex, Option)>, -) where - PV: PieceValidator, - PieceIndices: Iterator, -{ - let download_id = random::(); - - // TODO: It'd be nice to combine downloading from connected peers with downloading from closest - // peers concurrently - let fut = async move { - // Download from connected peers first - let pieces_to_download = download_cached_pieces_from_connected_peers( - piece_indices, - node, - piece_validator, - &results, - ) - .await; - - if pieces_to_download.is_empty() { - debug!("Done"); - return; - } - - // Download from iteratively closer peers according to Kademlia rules - download_cached_pieces_from_closest_peers( - pieces_to_download, - node, - piece_validator, - &results, - ) - .await; - - debug!("Done #2"); - }; - - fut.instrument(tracing::info_span!("", %download_id)).await; -} - /// Takes pieces to download as an input, sends results with pieces that were downloaded -/// successfully and returns those that were not downloaded from connected peer with addresses of -/// potential candidates -async fn download_cached_pieces_from_connected_peers( +/// successfully and returns those that were not downloaded +async fn download_cached_pieces( piece_indices: PieceIndices, node: &Node, piece_validator: &PV, results: &mpsc::UnboundedSender<(PieceIndex, Option)>, -) -> HashMap>> + semaphore: &Semaphore, +) -> impl ExactSizeIterator where PV: PieceValidator, PieceIndices: Iterator, @@ -546,376 +529,413 @@ where // At the end pieces that were not downloaded will remain with a collection of known closest // peers for them. let mut pieces_to_download = piece_indices - .map(|piece_index| (piece_index, HashMap::new())) - .collect::>>>(); + .map(|piece_index| async move { + let mut kademlia = KademliaWrapper::new(node.id()); + let key = piece_index.to_multihash(); - debug!(num_pieces = %pieces_to_download.len(), "Starting"); + let local_closest_peers = node + .get_closest_local_peers(key, None) + .await + .unwrap_or_default(); - let mut checked_connected_peers = HashSet::new(); + // Seed with local closest peers + for (peer_id, addresses) in local_closest_peers { + kademlia.add_peer(&peer_id, addresses); + } - // The loop is in order to check peers that might be connected after the initial loop has - // started. - loop { - let Ok(connected_peers) = node.connected_peers().await else { - trace!("Connected peers error"); - break; - }; + (piece_index, kademlia) + }) + .collect::>() + .collect::>() + .await; - debug!( - connected_peers = %connected_peers.len(), - pieces_to_download = %pieces_to_download.len(), - "Loop" - ); - if connected_peers.is_empty() || pieces_to_download.is_empty() { - break; - } + let num_pieces = pieces_to_download.len(); + debug!(%num_pieces, "Starting"); - let num_pieces = pieces_to_download.len(); - let step = num_pieces / connected_peers.len().min(num_pieces); + let mut checked_peers = HashSet::new(); - // Dispatch initial set of requests to peers - let mut downloading_stream = connected_peers - .into_iter() - .take(num_pieces) - .enumerate() - .filter_map(|(peer_index, peer_id)| { - if !checked_connected_peers.insert(peer_id) { - return None; - } + let Ok(connected_servers) = node.connected_servers().await else { + trace!("Connected servers error"); + return pieces_to_download.into_keys(); + }; - // Take unique first piece index for each connected peer and the rest just to check - // cached pieces up to recommended limit - let mut peer_piece_indices = pieces_to_download - .keys() - .cycle() - .skip(step * peer_index) - .take(num_pieces.min(CachedPieceByIndexRequest::RECOMMENDED_LIMIT)) - .copied() - .collect::>(); - // Pick first piece index as the piece we want to download - let piece_index = peer_piece_indices.swap_remove(0); - - let fut = download_cached_piece_from_peer( + let num_connected_servers = connected_servers.len(); + debug!( + %num_connected_servers, + %num_pieces, + "Starting downloading" + ); + + // Dispatch initial set of requests to peers with checked pieces distributed uniformly + let mut downloading_stream = connected_servers + .into_iter() + .take(num_pieces) + .enumerate() + .map(|(peer_index, peer_id)| { + checked_peers.insert(peer_id); + + // Inside to avoid division by zero in case there are no connected servers or pieces + let step = num_pieces / num_connected_servers.min(num_pieces); + + // Take unique first piece index for each connected peer and the rest just to check + // cached pieces up to recommended limit + let mut check_cached_pieces = pieces_to_download + .keys() + .cycle() + .skip(step * peer_index) + // + 1 because one index below is removed below + .take(num_pieces.min(CachedPieceByIndexRequest::RECOMMENDED_LIMIT + 1)) + .copied() + .collect::>(); + // Pick first piece index as the piece we want to download + let piece_index = check_cached_pieces.swap_remove(0); + + trace!(%peer_id, %piece_index, "Downloading piece from initially connected peer"); + + let permit = semaphore.try_acquire(); + + let fut = async move { + let permit = match permit { + Some(permit) => permit, + None => semaphore.acquire().await, + }; + + download_cached_piece_from_peer( node, piece_validator, peer_id, Vec::new(), - Arc::new(peer_piece_indices), + Arc::new(check_cached_pieces), piece_index, HashSet::new(), HashSet::new(), - ); + permit, + ) + .await + }; - Some((piece_index, Box::pin(fut.into_stream()))) - }) - .collect::>(); + (piece_index, Box::pin(fut.into_stream()) as _) + }) + .collect::>(); - // Process every response and potentially schedule follow-up request to the same peer - while let Some((piece_index, result)) = downloading_stream.next().await { - let DownloadedPieceFromPeer { - peer_id, - result, - mut cached_pieces, - not_cached_pieces, - } = result; - trace!(%piece_index, %peer_id, result = %result.is_some(), "Piece response"); + loop { + // Process up to 50% of the pieces concurrently + let mut additional_pieces_to_download = + (num_pieces / 2).saturating_sub(downloading_stream.len()); + if additional_pieces_to_download > 0 { + trace!( + %additional_pieces_to_download, + num_pieces, + currently_downloading = %downloading_stream.len(), + "Downloading additional pieces from closest peers" + ); + // Pick up any newly connected peers (if any) + 'outer: for peer_id in node + .connected_servers() + .await + .unwrap_or_default() + .into_iter() + .filter(|peer_id| checked_peers.insert(*peer_id)) + .take(additional_pieces_to_download) + { + let permit = if downloading_stream.is_empty() { + semaphore.acquire().await + } else if let Some(permit) = semaphore.try_acquire() { + permit + } else { + break; + }; - let Some(result) = result else { - // Downloading failed, ignore peer - continue; - }; + for &piece_index in pieces_to_download.keys() { + if downloading_stream.contains_key(&piece_index) { + continue; + } - match result { - PieceResult::Piece(piece) => { - trace!(%piece_index, %peer_id, "Got piece"); + trace!(%peer_id, %piece_index, "Downloading piece from newly connected peer"); - // Downloaded successfully - pieces_to_download.remove(&piece_index); + let check_cached_pieces = sample_cached_piece_indices( + pieces_to_download.keys(), + &HashSet::new(), + &HashSet::new(), + piece_index, + ); + let fut = download_cached_piece_from_peer( + node, + piece_validator, + peer_id, + Vec::new(), + Arc::new(check_cached_pieces), + piece_index, + HashSet::new(), + HashSet::new(), + permit, + ); - results - .unbounded_send((piece_index, Some(piece))) - .expect("This future isn't polled after receiver is dropped; qed"); + downloading_stream.insert(piece_index, Box::pin(fut.into_stream()) as _); + additional_pieces_to_download -= 1; - if pieces_to_download.is_empty() { - return HashMap::new(); - } + continue 'outer; } - PieceResult::ClosestPeers(closest_peers) => { - trace!(%piece_index, %peer_id, "Got closest peers"); - // Store closer peers in case piece index was not downloaded yet - if let Some(peers) = pieces_to_download.get_mut(&piece_index) { - peers.extend(Vec::from(closest_peers)); - } + break; + } - // No need to ask this peer again if they didn't have the piece we expected, or - // they claimed to have earlier + // Pick up more pieces to download from the closest peers + // Ideally we'd not allocate here, but it is hard to explain to the compiler that + // entries are not removed otherwise + let pieces_indices_to_download = pieces_to_download.keys().copied().collect::>(); + for piece_index in pieces_indices_to_download { + if additional_pieces_to_download == 0 { + break; + } + if downloading_stream.contains_key(&piece_index) { continue; } - } + let permit = if downloading_stream.is_empty() { + semaphore.acquire().await + } else if let Some(permit) = semaphore.try_acquire() { + permit + } else { + break; + }; - let mut maybe_piece_index_to_download_next = None; - // Clear useless entries in cached pieces and find something to download next - cached_pieces.retain(|piece_index| { - // Clear downloaded pieces - if !pieces_to_download.contains_key(piece_index) { - return false; - } + let kbucket_key = KBucketKey::from(piece_index.to_multihash()); + let closest_peers_to_check = pieces_to_download + .get_mut(&piece_index) + .expect("Entries are not removed here; qed") + .closest_peers(&kbucket_key); + for (peer_id, addresses) in closest_peers_to_check { + if !checked_peers.insert(peer_id) { + continue; + } - // Try to pick a piece to download that is not being downloaded already - if maybe_piece_index_to_download_next.is_none() - && !downloading_stream.contains_key(piece_index) - { - maybe_piece_index_to_download_next.replace(*piece_index); - // We'll not need to download it after this attempt - return false; - } + trace!(%peer_id, %piece_index, "Downloading piece from closest peer"); - // Retain everything else - true - }); + let check_cached_pieces = sample_cached_piece_indices( + pieces_to_download.keys(), + &HashSet::new(), + &HashSet::new(), + piece_index, + ); + let fut = download_cached_piece_from_peer( + node, + piece_validator, + peer_id, + addresses, + Arc::new(check_cached_pieces), + piece_index, + HashSet::new(), + HashSet::new(), + permit, + ); - let piece_index_to_download_next = - if let Some(piece_index) = maybe_piece_index_to_download_next { - trace!(%piece_index, %peer_id, "Next piece to download from peer"); - piece_index - } else { - trace!(%peer_id, "Peer doesn't have anything else"); - // Nothing left to do with this peer - continue; - }; + downloading_stream.insert(piece_index, Box::pin(fut.into_stream()) as _); + additional_pieces_to_download -= 1; + break; + } + } - let fut = download_cached_piece_from_peer( - node, - piece_validator, - peer_id, - Vec::new(), - // Sample more random cached piece indices for connected peer, algorithm can be - // improved, but has to be something simple and this should do it for now - Arc::new( - pieces_to_download - .keys() - // Do a bit of work to filter-out piece indices we already know remote peer - // has or doesn't to decrease burden on them - .filter_map(|piece_index| { - if piece_index == &piece_index_to_download_next - || cached_pieces.contains(piece_index) - || not_cached_pieces.contains(piece_index) - { - None - } else { - Some(*piece_index) - } - }) - .choose_multiple( - &mut thread_rng(), - CachedPieceByIndexRequest::RECOMMENDED_LIMIT, - ), - ), - piece_index_to_download_next, - cached_pieces, - not_cached_pieces, + trace!( + pieces_left = %additional_pieces_to_download, + "Initiated downloading additional pieces from closest peers" ); - downloading_stream.insert(piece_index_to_download_next, Box::pin(fut.into_stream())); } - if pieces_to_download.len() == num_pieces { - debug!(%num_pieces, "Finished downloading from connected peers"); - // Nothing was downloaded, we're done here + let Some((piece_index, result)) = downloading_stream.next().await else { + if !pieces_to_download.is_empty() { + debug!( + %num_pieces, + to_download = %pieces_to_download.len(), + "Finished downloading early" + ); + // Nothing was downloaded, we're done here + break; + } + break; + }; + process_downloading_result( + piece_index, + result, + &mut pieces_to_download, + &mut downloading_stream, + node, + piece_validator, + results, + ); + + if pieces_to_download.is_empty() { break; } } - pieces_to_download + pieces_to_download.into_keys() } -/// Takes pieces to download with potential peer candidates as an input, sends results with pieces -/// that were downloaded successfully and returns those that were not downloaded -async fn download_cached_pieces_from_closest_peers( - maybe_pieces_to_download: HashMap>>, - node: &Node, - piece_validator: &PV, - results: &mpsc::UnboundedSender<(PieceIndex, Option)>, +fn process_downloading_result<'a, 'b, PV>( + piece_index: PieceIndex, + result: DownloadedPieceFromPeer<'a>, + pieces_to_download: &'b mut HashMap, + downloading_stream: &'b mut StreamMap< + PieceIndex, + Pin> + Send + 'a>>, + >, + node: &'a Node, + piece_validator: &'a PV, + results: &'a mpsc::UnboundedSender<(PieceIndex, Option)>, ) where PV: PieceValidator, { - let kademlia = &Mutex::new(KademliaWrapper::new(node.id())); - // Collection of pieces to download and already connected peers that claim to have them - let connected_peers_with_piece = &Mutex::new( - maybe_pieces_to_download - .keys() - .map(|&piece_index| (piece_index, HashSet::::new())) - .collect::>(), - ); - - let mut downloaded_pieces = maybe_pieces_to_download - .into_iter() - .map(|(piece_index, collected_peers)| async move { - let key = piece_index.to_multihash(); - let kbucket_key = KBucketKey::from(key); - let mut checked_closest_peers = HashSet::::new(); + let DownloadedPieceFromPeer { + peer_id, + result, + mut cached_pieces, + not_cached_pieces, + permit, + } = result; + trace!(%piece_index, %peer_id, result = %result.is_some(), "Piece response"); + + let Some(result) = result else { + // Downloading failed, ignore peer + return; + }; - { - let local_closest_peers = node - .get_closest_local_peers(key, None) - .await - .unwrap_or_default(); - let mut kademlia = kademlia.lock(); + match result { + PieceResult::Piece(piece) => { + trace!(%piece_index, %peer_id, "Got piece"); - for (peer_id, addresses) in collected_peers { - kademlia.add_peer(&peer_id, addresses); - } - for (peer_id, addresses) in local_closest_peers { - kademlia.add_peer(&peer_id, addresses); - } - } + // Downloaded successfully + pieces_to_download.remove(&piece_index); - loop { - // Collect pieces that still need to be downloaded and connected peers that claim to - // have them - let (pieces_to_download, connected_peers) = { - let mut connected_peers_with_piece = connected_peers_with_piece.lock(); - - ( - Arc::new( - connected_peers_with_piece - .keys() - .filter(|&candidate| candidate != &piece_index) - .take(CachedPieceByIndexRequest::RECOMMENDED_LIMIT) - .copied() - .collect::>(), - ), - connected_peers_with_piece - .get_mut(&piece_index) - .map(mem::take) - .unwrap_or_default(), - ) - }; + results + .unbounded_send((piece_index, Some(piece))) + .expect("This future isn't polled after receiver is dropped; qed"); - // Check connected peers that claim to have the piece index first - for peer_id in connected_peers { - let fut = download_cached_piece_from_peer( - node, - piece_validator, - peer_id, - Vec::new(), - Arc::default(), - piece_index, - HashSet::new(), - HashSet::new(), - ); + if pieces_to_download.is_empty() { + return; + } - match fut.await.result { - Some(PieceResult::Piece(piece)) => { - return (piece_index, Some(piece)); - } - Some(PieceResult::ClosestPeers(closest_peers)) => { - let mut kademlia = kademlia.lock(); + cached_pieces.remove(&piece_index); + } + PieceResult::ClosestPeers(closest_peers) => { + trace!(%piece_index, %peer_id, "Got closest peers"); - // Store additional closest peers reported by the peer - for (peer_id, addresses) in Vec::from(closest_peers) { - kademlia.add_peer(&peer_id, addresses); - } - } - None => { - checked_closest_peers.insert(peer_id); - } - } + // Store closer peers in case piece index was not downloaded yet + if let Some(kademlia) = pieces_to_download.get_mut(&piece_index) { + for (peer_id, addresses) in Vec::from(closest_peers) { + kademlia.add_peer(&peer_id, addresses); } + } - // Find the closest peers that were not queried yet - let closest_peers_to_check = kademlia.lock().closest_peers(&kbucket_key); - let closest_peers_to_check = closest_peers_to_check - .filter(|(peer_id, _addresses)| checked_closest_peers.insert(*peer_id)) - .collect::>(); + // No need to ask this peer again if they claimed to have this piece index earlier + if cached_pieces.remove(&piece_index) { + return; + } + } + } - if closest_peers_to_check.is_empty() { - // No new closest peers found, nothing left to do here - break; - } + let mut maybe_piece_index_to_download_next = None; + // Clear useless entries in cached pieces and find something to download next + cached_pieces.retain(|piece_index| { + // Clear downloaded pieces + if !pieces_to_download.contains_key(piece_index) { + return false; + } - for (peer_id, addresses) in closest_peers_to_check { - let fut = download_cached_piece_from_peer( - node, - piece_validator, - peer_id, - addresses, - Arc::clone(&pieces_to_download), - piece_index, - HashSet::new(), - HashSet::new(), - ); + // Try to pick a piece to download that is not being downloaded already + if maybe_piece_index_to_download_next.is_none() + && !downloading_stream.contains_key(piece_index) + { + maybe_piece_index_to_download_next.replace(*piece_index); + // We'll check it later when receiving response + return true; + } - let DownloadedPieceFromPeer { - peer_id: _, - result, - cached_pieces, - not_cached_pieces: _, - } = fut.await; - - if !cached_pieces.is_empty() { - let mut connected_peers_with_piece = connected_peers_with_piece.lock(); - - // Remember that this peer has some pieces that need to be downloaded here - for cached_piece_index in cached_pieces { - if let Some(peers) = - connected_peers_with_piece.get_mut(&cached_piece_index) - { - peers.insert(peer_id); - } - } - } + // Retain everything else + true + }); - match result { - Some(PieceResult::Piece(piece)) => { - return (piece_index, Some(piece)); - } - Some(PieceResult::ClosestPeers(closest_peers)) => { - let mut kademlia = kademlia.lock(); + let piece_index_to_download_next = if let Some(piece_index) = maybe_piece_index_to_download_next + { + trace!(%piece_index, %peer_id, "Next piece to download from peer"); + piece_index + } else { + trace!(%peer_id, "Peer doesn't have anything else"); + // Nothing left to do with this peer + return; + }; - // Store additional closest peers - for (peer_id, addresses) in Vec::from(closest_peers) { - kademlia.add_peer(&peer_id, addresses); - } - } - None => { - checked_closest_peers.insert(peer_id); - } - } - } - } + let fut = download_cached_piece_from_peer( + node, + piece_validator, + peer_id, + Vec::new(), + // Sample more random cached piece indices for connected peer, algorithm can be + // improved, but has to be something simple and this should do it for now + Arc::new(sample_cached_piece_indices( + pieces_to_download.keys(), + &cached_pieces, + ¬_cached_pieces, + piece_index_to_download_next, + )), + piece_index_to_download_next, + cached_pieces, + not_cached_pieces, + permit, + ); + downloading_stream.insert(piece_index_to_download_next, Box::pin(fut.into_stream())); +} - (piece_index, None) +fn sample_cached_piece_indices<'a, I>( + pieces_to_download: I, + cached_pieces: &HashSet, + not_cached_pieces: &HashSet, + piece_index_to_download_next: PieceIndex, +) -> Vec +where + I: Iterator, +{ + pieces_to_download + // Do a bit of work to filter-out piece indices we already know remote peer + // has or doesn't to decrease burden on them + .filter_map(|piece_index| { + if piece_index == &piece_index_to_download_next + || cached_pieces.contains(piece_index) + || not_cached_pieces.contains(piece_index) + { + None + } else { + Some(*piece_index) + } }) - .collect::>(); - - while let Some((piece_index, maybe_piece)) = downloaded_pieces.next().await { - connected_peers_with_piece.lock().remove(&piece_index); - - results - .unbounded_send((piece_index, maybe_piece)) - .expect("This future isn't polled after receiver is dropped; qed"); - } + .choose_multiple( + &mut thread_rng(), + CachedPieceByIndexRequest::RECOMMENDED_LIMIT, + ) } -struct DownloadedPieceFromPeer { +struct DownloadedPieceFromPeer<'a> { peer_id: PeerId, result: Option, cached_pieces: HashSet, not_cached_pieces: HashSet, + permit: SemaphoreGuard<'a>, } +/// `check_cached_pieces` contains a list of pieces for peer to filter-out according to locally +/// caches pieces, `cached_pieces` and `not_cached_pieces` contain piece indices peer claims is +/// known to have or not have already #[allow(clippy::too_many_arguments)] -async fn download_cached_piece_from_peer( - node: &Node, - piece_validator: &PV, +async fn download_cached_piece_from_peer<'a, PV>( + node: &'a Node, + piece_validator: &'a PV, peer_id: PeerId, addresses: Vec, - peer_piece_indices: Arc>, + check_cached_pieces: Arc>, piece_index: PieceIndex, mut cached_pieces: HashSet, mut not_cached_pieces: HashSet, -) -> DownloadedPieceFromPeer + permit: SemaphoreGuard<'a>, +) -> DownloadedPieceFromPeer<'a> where PV: PieceValidator, { @@ -925,7 +945,7 @@ where addresses, CachedPieceByIndexRequest { piece_index, - cached_pieces: peer_piece_indices, + cached_pieces: Arc::clone(&check_cached_pieces), }, ) .await @@ -958,24 +978,29 @@ where }; match result { - Some(result) => DownloadedPieceFromPeer { - peer_id, - result: Some(result.result), - cached_pieces: { - cached_pieces.extend(result.cached_pieces); - cached_pieces - }, - not_cached_pieces, - }, - None => { - not_cached_pieces.insert(piece_index); + Some(result) => { + cached_pieces.extend(result.cached_pieces); + not_cached_pieces.extend( + check_cached_pieces + .iter() + .filter(|piece_index| !cached_pieces.contains(piece_index)) + .copied(), + ); DownloadedPieceFromPeer { peer_id, - result: None, - cached_pieces, + result: Some(result.result), + cached_pieces: { cached_pieces }, not_cached_pieces, + permit, } } + None => DownloadedPieceFromPeer { + peer_id, + result: None, + cached_pieces, + not_cached_pieces, + permit, + }, } } diff --git a/crates/subspace-node/Cargo.toml b/crates/subspace-node/Cargo.toml index b5dd8b0afd..0d0ad438fe 100644 --- a/crates/subspace-node/Cargo.toml +++ b/crates/subspace-node/Cargo.toml @@ -15,11 +15,6 @@ include = [ "/README.md" ] -# TODO: remove when cargo-audit supports v4 lock files -# https://github.com/rustsec/rustsec/issues/1249 -# This setting applies to the workspace lockfile, even though it's in a crate. -rust-version = "1.81" # ensure we stay at lockfile v3 - [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] @@ -28,7 +23,6 @@ auto-id-domain-runtime = { version = "0.1.0", path = "../../domains/runtime/auto bip39 = { version = "2.0.0", features = ["rand"] } clap = { version = "4.5.18", features = ["derive"] } cross-domain-message-gossip = { version = "0.1.0", path = "../../domains/client/cross-domain-message-gossip" } -dirs = "5.0.1" domain-client-message-relayer = { version = "0.1.0", path = "../../domains/client/relayer" } domain-client-operator = { version = "0.1.0", path = "../../domains/client/domain-operator" } domain-eth-service = { version = "0.1.0", path = "../../domains/client/eth-service" } @@ -70,7 +64,6 @@ sp-consensus-subspace = { version = "0.1.0", path = "../sp-consensus-subspace" } sp-core = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sp-domains = { version = "0.1.0", path = "../sp-domains" } sp-domain-digests = { version = "0.1.0", path = "../../domains/primitives/digests" } -sp-domains-fraud-proof = { version = "0.1.0", path = "../sp-domains-fraud-proof" } sp-keystore = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sp-messenger = { version = "0.1.0", path = "../../domains/primitives/messenger" } sp-runtime = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } @@ -82,13 +75,12 @@ subspace-runtime = { version = "0.1.0", path = "../subspace-runtime" } subspace-runtime-primitives = { version = "0.1.0", path = "../subspace-runtime-primitives" } subspace-service = { version = "0.1.0", path = "../subspace-service" } substrate-prometheus-endpoint = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } -supports-color = "3.0.1" tempfile = "3.13.0" thiserror = "2.0.0" tokio = { version = "1.40.0", features = ["macros"] } tokio-stream = "0.1.16" tracing = "0.1.40" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +subspace-logging = { version = "0.0.1", path = "../../shared/subspace-logging" } [build-dependencies] substrate-build-script-utils = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } diff --git a/crates/subspace-node/src/commands/domain_key.rs b/crates/subspace-node/src/commands/domain_key.rs index 7fe024b474..77405fb3ec 100644 --- a/crates/subspace-node/src/commands/domain_key.rs +++ b/crates/subspace-node/src/commands/domain_key.rs @@ -1,6 +1,4 @@ -use crate::commands::shared::{ - derive_keypair, init_logger, store_key_in_keystore, KeystoreOptions, -}; +use crate::commands::shared::{derive_keypair, store_key_in_keystore, KeystoreOptions}; use bip39::Mnemonic; use clap::Parser; use sc_cli::{Error, KeystoreParams}; @@ -9,6 +7,7 @@ use sp_core::crypto::{ExposeSecret, SecretString}; use sp_core::Pair; use sp_domains::DomainId; use std::path::PathBuf; +use subspace_logging::init_logger; use tracing::{info, warn}; /// Options for creating domain key @@ -27,7 +26,6 @@ pub struct CreateDomainKeyOptions { pub fn create_domain_key(options: CreateDomainKeyOptions) -> Result<(), Error> { init_logger(); - let CreateDomainKeyOptions { base_path, domain_id, @@ -95,7 +93,6 @@ pub struct InsertDomainKeyOptions { pub fn insert_domain_key(options: InsertDomainKeyOptions) -> Result<(), Error> { init_logger(); - let InsertDomainKeyOptions { base_path, domain_id, diff --git a/crates/subspace-node/src/commands/run.rs b/crates/subspace-node/src/commands/run.rs index 2ad0205072..cce8f4d3cf 100644 --- a/crates/subspace-node/src/commands/run.rs +++ b/crates/subspace-node/src/commands/run.rs @@ -8,7 +8,6 @@ use crate::commands::run::consensus::{ use crate::commands::run::domain::{ create_domain_configuration, run_domain, DomainOptions, DomainStartOptions, }; -use crate::commands::shared::init_logger; use crate::{set_default_ss58_version, Error, PosTable}; use clap::Parser; use cross_domain_message_gossip::GossipWorkerBuilder; @@ -28,6 +27,7 @@ use sp_core::traits::SpawnEssentialNamed; use sp_messenger::messages::ChainId; use std::env; use std::sync::Arc; +use subspace_logging::init_logger; use subspace_metrics::{start_prometheus_metrics_server, RegistryAdapter}; use subspace_runtime::{Block, RuntimeApi}; use subspace_service::config::ChainSyncMode; @@ -76,7 +76,6 @@ fn raise_fd_limit() { pub async fn run(run_options: RunOptions) -> Result<(), Error> { init_logger(); raise_fd_limit(); - let signals = Signals::capture()?; let RunOptions { @@ -212,7 +211,6 @@ pub async fn run(run_options: RunOptions) -> Result<(), Error> { _, _, _, - DomainBlock, _, _, >( diff --git a/crates/subspace-node/src/commands/run/consensus.rs b/crates/subspace-node/src/commands/run/consensus.rs index 9ca9fc25ad..519a5fd386 100644 --- a/crates/subspace-node/src/commands/run/consensus.rs +++ b/crates/subspace-node/src/commands/run/consensus.rs @@ -15,6 +15,7 @@ use sc_telemetry::TelemetryEndpoints; use std::collections::HashSet; use std::fmt; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; +use std::num::NonZeroU32; use std::path::PathBuf; use std::str::FromStr; use subspace_core_primitives::BlockNumber; @@ -308,10 +309,17 @@ struct TimekeeperOptions { pub enum CreateObjectMappingConfig { /// Start creating object mappings from this block number. /// - /// This can be lower than the latest archived block. - Block(BlockNumber), + /// This can be lower than the latest archived block, but must be greater than genesis. + /// + /// The genesis block doesn't have mappings, so starting mappings at genesis is pointless. + /// The archiver will fail if it can't get the data for this block, and snap sync doesn't store + /// the genesis data on disk. So avoiding genesis also avoids this error. + /// + Block(NonZeroU32), /// Create object mappings as archiving is happening. + /// This continues from the last archived segment, but mappings that were in the channel or RPC + /// segment when the node shut down can be lost. Yes, /// Don't create object mappings. @@ -337,7 +345,7 @@ impl FromStr for CreateObjectMappingConfig { "no" => Ok(Self::No), "yes" => Ok(Self::Yes), block => block.parse().map(Self::Block).map_err(|_| { - "Unsupported create object mappings setting: use `yes`, `no` or a block number" + "Unsupported create object mappings setting: use `yes`, `no` or a non-zero block number" .to_string() }), } @@ -443,10 +451,10 @@ pub(super) struct ConsensusChainOptions { /// Create object mappings during archiving. /// - /// Can be set to `no` (default), `yes` (creates object mappings as archiving is happening) or - /// block number from which to continue creating object mappings. + /// Can be set to `no` (default), `yes` (creates object mappings as archiving is happening), or + /// a non-zero block number where the node starts creating object mappings. /// - /// --dev mode enables mappings from genesis automatically, unless the value is supplied + /// --dev mode enables mappings from the first block automatically, unless a value is supplied /// explicitly. /// Use `no` to disable mappings in --dev mode. #[arg(long)] @@ -519,7 +527,6 @@ pub(super) fn create_consensus_chain_configuration( let transaction_pool; let rpc_cors; - // Development mode handling is limited to this section { if dev { @@ -534,7 +541,7 @@ pub(super) fn create_consensus_chain_configuration( timekeeper_options.timekeeper = true; if create_object_mappings.is_none() { - create_object_mappings = Some(CreateObjectMappingConfig::Block(0)); + create_object_mappings = Some(CreateObjectMappingConfig::Block(NonZeroU32::MIN)); } if sync.is_none() { @@ -750,7 +757,6 @@ pub(super) fn create_consensus_chain_configuration( force_new_slot_notifications: domains_enabled, create_object_mappings: create_object_mappings.unwrap_or_default().into(), subspace_networking: SubspaceNetworking::Create { config: dsn_config }, - dsn_piece_getter: None, sync, is_timekeeper: timekeeper_options.timekeeper, timekeeper_cpu_cores: timekeeper_options.timekeeper_cpu_cores, diff --git a/crates/subspace-node/src/commands/shared.rs b/crates/subspace-node/src/commands/shared.rs index 52f48d1eec..f45b7e965b 100644 --- a/crates/subspace-node/src/commands/shared.rs +++ b/crates/subspace-node/src/commands/shared.rs @@ -9,9 +9,6 @@ use sp_keystore::Keystore; use std::panic; use std::path::PathBuf; use std::process::exit; -use tracing_subscriber::filter::LevelFilter; -use tracing_subscriber::prelude::*; -use tracing_subscriber::{fmt, EnvFilter}; /// Options used for keystore #[derive(Debug, Parser)] @@ -63,22 +60,3 @@ pub(crate) fn set_exit_on_panic() { exit(1); })); } - -pub(super) fn init_logger() { - // TODO: Workaround for https://github.com/tokio-rs/tracing/issues/2214, also on - // Windows terminal doesn't support the same colors as bash does - let enable_color = if cfg!(windows) { - false - } else { - supports_color::on(supports_color::Stream::Stderr).is_some() - }; - tracing_subscriber::registry() - .with( - fmt::layer().with_ansi(enable_color).with_filter( - EnvFilter::builder() - .with_default_directive(LevelFilter::INFO.into()) - .from_env_lossy(), - ), - ) - .init(); -} diff --git a/crates/subspace-node/src/commands/wipe.rs b/crates/subspace-node/src/commands/wipe.rs index 5302cce98a..7f2bc2ce53 100644 --- a/crates/subspace-node/src/commands/wipe.rs +++ b/crates/subspace-node/src/commands/wipe.rs @@ -1,7 +1,7 @@ -use crate::commands::shared::init_logger; use clap::Parser; use std::path::PathBuf; use std::{fs, io}; +use subspace_logging::init_logger; use tracing::info; /// Options for running a node @@ -13,7 +13,6 @@ pub struct WipeOptions { pub fn wipe(WipeOptions { base_path }: WipeOptions) -> Result<(), io::Error> { init_logger(); - let paths = [ base_path.join("db"), base_path.join("domains"), diff --git a/crates/subspace-runtime/src/lib.rs b/crates/subspace-runtime/src/lib.rs index d71743c48e..7ba9312dff 100644 --- a/crates/subspace-runtime/src/lib.rs +++ b/crates/subspace-runtime/src/lib.rs @@ -83,6 +83,7 @@ use sp_messenger::endpoint::{Endpoint, EndpointHandler as EndpointHandlerT, Endp use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, CrossDomainMessage, FeeModel, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_messenger_host_functions::{get_storage_key, StorageKeyRequest}; use sp_mmr_primitives::EncodableOpaqueLeaf; use sp_runtime::traits::{ @@ -130,7 +131,7 @@ pub const VERSION: RuntimeVersion = RuntimeVersion { spec_name: Cow::Borrowed("subspace"), impl_name: Cow::Borrowed("subspace"), authoring_version: 0, - spec_version: 0, + spec_version: 1, impl_version: 0, apis: RUNTIME_API_VERSIONS, transaction_version: 0, @@ -1325,10 +1326,6 @@ impl_runtime_apis! { }) } - fn operator_id_by_signing_key(signing_key: OperatorPublicKey) -> Option { - Domains::operator_signing_key(signing_key) - } - fn receipt_hash(domain_id: DomainId, domain_number: DomainNumber) -> Option { Domains::receipt_hash(domain_id, domain_number) } @@ -1446,6 +1443,22 @@ impl_runtime_apis! { fn domain_chains_allowlist_update(domain_id: DomainId) -> Option{ Messenger::domain_chains_allowlist_update(domain_id) } + + fn xdm_id(ext: &::Extrinsic) -> Option { + match &ext.function { + RuntimeCall::Messenger(pallet_messenger::Call::relay_message { msg })=> { + Some(XdmId::RelayMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + RuntimeCall::Messenger(pallet_messenger::Call::relay_message_response { msg }) => { + Some(XdmId::RelayResponseMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + _ => None, + } + } + + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Messenger::channel_nonce(chain_id, channel_id) + } } impl sp_messenger::RelayerApi::Hash> for Runtime { diff --git a/crates/subspace-service/Cargo.toml b/crates/subspace-service/Cargo.toml index 1cb4b260eb..b3397a7f72 100644 --- a/crates/subspace-service/Cargo.toml +++ b/crates/subspace-service/Cargo.toml @@ -16,8 +16,10 @@ include = [ targets = ["x86_64-unknown-linux-gnu"] [dependencies] +anyhow = "1.0.89" array-bytes = "6.2.3" async-channel = "1.8.0" +async-lock = "3.4.0" async-trait = "0.1.83" cross-domain-message-gossip = { version = "0.1.0", path = "../../domains/client/cross-domain-message-gossip" } domain-runtime-primitives = { version = "0.1.0", path = "../../domains/primitives/runtime" } @@ -43,9 +45,7 @@ sc-domains = { version = "0.1.0", path = "../sc-domains" } sc-executor = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-informant = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-network = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } -sc-network-light = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-network-sync = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } -sc-network-transactions = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-offchain = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-proof-of-time = { version = "0.1.0", path = "../sc-proof-of-time" } sc-rpc = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } @@ -57,7 +57,6 @@ sc-telemetry = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8 sc-tracing = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-transaction-pool = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } sc-transaction-pool-api = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } -sc-utils = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } schnellru = "0.2.1" schnorrkel = "0.11.4" sp-api = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } @@ -83,6 +82,7 @@ sp-transaction-pool = { git = "https://github.com/subspace/polkadot-sdk", rev = static_assertions = "1.1.0" subspace-archiving = { version = "0.1.0", path = "../subspace-archiving" } subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives" } +subspace-data-retrieval = { version = "0.1.0", path = "../../shared/subspace-data-retrieval" } subspace-erasure-coding = { version = "0.1.0", path = "../subspace-erasure-coding" } subspace-kzg = { version = "0.1.0", path = "../../shared/subspace-kzg" } subspace-networking = { version = "0.1.0", path = "../subspace-networking" } diff --git a/crates/subspace-service/src/config.rs b/crates/subspace-service/src/config.rs index 6f6e895f3f..113bce6dcc 100644 --- a/crates/subspace-service/src/config.rs +++ b/crates/subspace-service/src/config.rs @@ -1,5 +1,4 @@ use crate::dsn::DsnConfig; -use crate::sync_from_dsn::DsnSyncPieceGetter; use sc_chain_spec::ChainSpec; use sc_consensus_subspace::archiver::CreateObjectMappings; use sc_network::config::{ @@ -24,6 +23,7 @@ use std::path::PathBuf; use std::str::FromStr; use std::sync::atomic::AtomicBool; use std::sync::Arc; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_networking::libp2p::Multiaddr; use subspace_networking::Node; use tokio::runtime::Handle; @@ -287,6 +287,8 @@ pub enum SubspaceNetworking { node: Node, /// Bootstrap nodes used (that can be also sent to the farmer over RPC) bootstrap_nodes: Vec, + /// Piece getter + piece_getter: Arc, }, /// Networking must be instantiated internally Create { @@ -307,8 +309,6 @@ pub struct SubspaceConfiguration { pub create_object_mappings: CreateObjectMappings, /// Subspace networking (DSN). pub subspace_networking: SubspaceNetworking, - /// DSN piece getter - pub dsn_piece_getter: Option>, /// Is this node a Timekeeper pub is_timekeeper: bool, /// CPU cores that timekeeper can use diff --git a/crates/subspace-service/src/lib.rs b/crates/subspace-service/src/lib.rs index f31b8f4be4..e94734a852 100644 --- a/crates/subspace-service/src/lib.rs +++ b/crates/subspace-service/src/lib.rs @@ -42,7 +42,9 @@ use crate::mmr::request_handler::MmrRequestHandler; pub use crate::mmr::sync::mmr_sync; use crate::sync_from_dsn::piece_validator::SegmentCommitmentPieceValidator; use crate::sync_from_dsn::snap_sync::snap_sync; +use crate::sync_from_dsn::DsnPieceGetter; use crate::transaction_pool::FullPool; +use async_lock::Semaphore; use core::sync::atomic::{AtomicU32, Ordering}; use cross_domain_message_gossip::xdm_gossip_peers_set_config; use domain_runtime_primitives::opaque::{Block as DomainBlock, Header as DomainHeader}; @@ -136,7 +138,6 @@ use subspace_proof_of_space::Table; use subspace_runtime_primitives::opaque::Block; use subspace_runtime_primitives::{AccountId, Balance, Hash, Nonce}; use tokio::sync::broadcast; -use tokio::sync::broadcast::Receiver; use tracing::{debug, error, info, Instrument}; pub use utils::wait_for_block_import; @@ -148,6 +149,8 @@ const_assert!(std::mem::size_of::() >= std::mem::size_of::()); /// too large to handle const POT_VERIFIER_CACHE_SIZE: u32 = 30_000; const SYNC_TARGET_UPDATE_INTERVAL: Duration = Duration::from_secs(1); +/// Multiplier on top of outgoing connections number for piece downloading purposes +const PIECE_PROVIDER_MULTIPLIER: usize = 10; /// Error type for Subspace service. #[derive(thiserror::Error, Debug)] @@ -735,7 +738,7 @@ pub async fn new_full( prometheus_registry: Option<&mut Registry>, enable_rpc_extensions: bool, block_proposal_slot_portion: SlotProportion, - consensus_snap_sync_target_block_receiver: Option>, + consensus_snap_sync_target_block_receiver: Option>, ) -> Result, Error> where PosTable: Table, @@ -775,11 +778,12 @@ where } = other; let offchain_indexing_enabled = config.base.offchain_worker.indexing_enabled; - let (node, bootstrap_nodes) = match config.subspace_networking { + let (node, bootstrap_nodes, piece_getter) = match config.subspace_networking { SubspaceNetworking::Reuse { node, bootstrap_nodes, - } => (node, bootstrap_nodes), + piece_getter, + } => (node, bootstrap_nodes, piece_getter), SubspaceNetworking::Create { config: dsn_config } => { let dsn_protocol_version = hex::encode(client.chain_info().genesis_hash); @@ -789,6 +793,7 @@ where "Setting DSN protocol version..." ); + let out_connections = dsn_config.max_out_connections; let (node, mut node_runner) = create_dsn_instance( dsn_protocol_version, dsn_config.clone(), @@ -822,7 +827,23 @@ where ), ); - (node, dsn_config.bootstrap_nodes) + let piece_provider = PieceProvider::new( + node.clone(), + SegmentCommitmentPieceValidator::new( + node.clone(), + subspace_link.kzg().clone(), + segment_headers_store.clone(), + ), + Arc::new(Semaphore::new( + out_connections as usize * PIECE_PROVIDER_MULTIPLIER, + )), + ); + + ( + node, + dsn_config.bootstrap_nodes, + Arc::new(DsnPieceGetter::new(piece_provider)) as _, + ) } }; @@ -1049,17 +1070,6 @@ where network_wrapper.set(network_service.clone()); - let dsn_sync_piece_getter = config.dsn_piece_getter.unwrap_or_else(|| { - Arc::new(PieceProvider::new( - node.clone(), - SegmentCommitmentPieceValidator::new( - node.clone(), - subspace_link.kzg().clone(), - segment_headers_store.clone(), - ), - )) - }); - if !config.base.network.force_synced { // Start with DSN sync in this case pause_sync.store(true, Ordering::Release); @@ -1072,7 +1082,7 @@ where Arc::clone(&client), import_queue_service1, pause_sync.clone(), - dsn_sync_piece_getter.clone(), + piece_getter.clone(), sync_service.clone(), network_service_handle, subspace_link.erasure_coding().clone(), @@ -1090,7 +1100,7 @@ where sync_service.clone(), sync_target_block_number, pause_sync, - dsn_sync_piece_getter, + piece_getter, subspace_link.erasure_coding().clone(), ); task_manager diff --git a/crates/subspace-service/src/sync_from_dsn.rs b/crates/subspace-service/src/sync_from_dsn.rs index 892498b531..41207f776e 100644 --- a/crates/subspace-service/src/sync_from_dsn.rs +++ b/crates/subspace-service/src/sync_from_dsn.rs @@ -7,7 +7,7 @@ use crate::sync_from_dsn::import_blocks::import_blocks_from_dsn; use crate::sync_from_dsn::segment_header_downloader::SegmentHeaderDownloader; use async_trait::async_trait; use futures::channel::mpsc; -use futures::{select, FutureExt, StreamExt}; +use futures::{select, FutureExt, Stream, StreamExt}; use sc_client_api::{AuxStore, BlockBackend, BlockchainEvents}; use sc_consensus::import_queue::ImportQueueService; use sc_consensus_subspace::archiver::SegmentHeadersStore; @@ -17,7 +17,6 @@ use sp_api::ProvideRuntimeApi; use sp_blockchain::HeaderBackend; use sp_consensus_subspace::SubspaceApi; use sp_runtime::traits::{Block as BlockT, CheckedSub, NumberFor}; -use std::error::Error; use std::fmt; use std::future::Future; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; @@ -26,6 +25,7 @@ use std::time::{Duration, Instant}; use subspace_core_primitives::pieces::{Piece, PieceIndex}; use subspace_core_primitives::segments::SegmentIndex; use subspace_core_primitives::PublicKey; +use subspace_data_retrieval::piece_getter::PieceGetter; use subspace_erasure_coding::ErasureCoding; use subspace_networking::utils::piece_provider::{PieceProvider, PieceValidator}; use subspace_networking::Node; @@ -40,38 +40,53 @@ const CHECK_ALMOST_SYNCED_INTERVAL: Duration = Duration::from_secs(1); /// Period of time during which node should be offline for DSN sync to kick-in const MIN_OFFLINE_PERIOD: Duration = Duration::from_secs(60); -/// Trait representing a way to get pieces for DSN sync purposes -#[async_trait] -pub trait DsnSyncPieceGetter: fmt::Debug { - async fn get_piece( - &self, - piece_index: PieceIndex, - ) -> Result, Box>; +/// Wrapper type for [`PieceProvider`], so it can implement [`PieceGetter`] +pub struct DsnPieceGetter(PieceProvider); + +impl fmt::Debug for DsnPieceGetter +where + PV: PieceValidator, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("DsnPieceGetter") + .field(&format!("{:?}", self.0)) + .finish() + } } #[async_trait] -impl DsnSyncPieceGetter for Arc +impl PieceGetter for DsnPieceGetter where - T: DsnSyncPieceGetter + Send + Sync + ?Sized, + PV: PieceValidator, { - async fn get_piece( - &self, - piece_index: PieceIndex, - ) -> Result, Box> { - self.as_ref().get_piece(piece_index).await + #[inline] + async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { + Ok(self.0.get_piece_from_cache(piece_index).await) + } + + #[inline] + async fn get_pieces<'a>( + &'a self, + piece_indices: Vec, + ) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, + > { + let stream = self + .0 + .get_from_cache(piece_indices) + .await + .map(|(piece_index, maybe_piece)| (piece_index, Ok(maybe_piece))); + Ok(Box::new(stream)) } } -#[async_trait] -impl DsnSyncPieceGetter for PieceProvider +impl DsnPieceGetter where PV: PieceValidator, { - async fn get_piece( - &self, - piece_index: PieceIndex, - ) -> Result, Box> { - Ok(self.get_piece_from_cache(piece_index).await) + /// Creates new DSN piece getter + pub fn new(piece_provider: PieceProvider) -> Self { + Self(piece_provider) } } @@ -114,7 +129,7 @@ where + Sync + 'static, Client::Api: SubspaceApi, - PG: DsnSyncPieceGetter + Send + Sync + 'static, + PG: PieceGetter + Send + Sync + 'static, { let (tx, rx) = mpsc::channel(0); let observer_fut = { @@ -278,7 +293,7 @@ where + Sync + 'static, Client::Api: SubspaceApi, - PG: DsnSyncPieceGetter, + PG: PieceGetter, { let info = client.info(); let chain_constants = client diff --git a/crates/subspace-service/src/sync_from_dsn/import_blocks.rs b/crates/subspace-service/src/sync_from_dsn/import_blocks.rs index fea5dfc551..d6b648561d 100644 --- a/crates/subspace-service/src/sync_from_dsn/import_blocks.rs +++ b/crates/subspace-service/src/sync_from_dsn/import_blocks.rs @@ -15,9 +15,7 @@ // along with this program. If not, see . use crate::sync_from_dsn::segment_header_downloader::SegmentHeaderDownloader; -use crate::sync_from_dsn::DsnSyncPieceGetter; -use futures::stream::FuturesUnordered; -use futures::StreamExt; +use crate::sync_from_dsn::PieceGetter; use sc_client_api::{AuxStore, BlockBackend, HeaderBackend}; use sc_consensus::import_queue::ImportQueueService; use sc_consensus::IncomingBlock; @@ -31,16 +29,11 @@ use sp_runtime::Saturating; use std::sync::{Arc, Mutex}; use std::time::Duration; use subspace_archiving::reconstructor::Reconstructor; -use subspace_core_primitives::pieces::Piece; -use subspace_core_primitives::segments::{ - ArchivedHistorySegment, RecordedHistorySegment, SegmentIndex, -}; +use subspace_core_primitives::segments::SegmentIndex; use subspace_core_primitives::BlockNumber; +use subspace_data_retrieval::segment_downloading::download_segment_pieces; use subspace_erasure_coding::ErasureCoding; -use subspace_networking::utils::multihash::ToMultihash; -use tokio::sync::Semaphore; -use tokio::task::spawn_blocking; -use tracing::warn; +use tokio::task; /// How many blocks to queue before pausing and waiting for blocks to be imported, this is /// essentially used to ensure we use a bounded amount of RAM during sync process. @@ -66,7 +59,7 @@ where Block: BlockT, AS: AuxStore + Send + Sync + 'static, Client: HeaderBackend + BlockBackend + Send + Sync + 'static, - PG: DsnSyncPieceGetter, + PG: PieceGetter, IQS: ImportQueueService + ?Sized, { { @@ -141,8 +134,26 @@ where continue; } - let blocks = - download_and_reconstruct_blocks(segment_index, piece_getter, &reconstructor).await?; + let segment_pieces = download_segment_pieces(segment_index, piece_getter) + .await + .map_err(|error| format!("Failed to download segment pieces: {error}"))?; + // CPU-intensive piece and segment reconstruction code can block the async executor. + let segment_contents_fut = task::spawn_blocking({ + let reconstructor = reconstructor.clone(); + + move || { + reconstructor + .lock() + .expect("Panic if previous thread panicked when holding the mutex") + .add_segment(segment_pieces.as_ref()) + } + }); + let blocks = segment_contents_fut + .await + .expect("Panic if blocking task panicked") + .map_err(|error| error.to_string())? + .blocks; + trace!(%segment_index, "Segment reconstructed successfully"); let mut blocks_to_import = Vec::with_capacity(QUEUED_BLOCKS_LIMIT as usize); @@ -235,109 +246,3 @@ where Ok(imported_blocks) } - -/// Downloads and reconstructs blocks from a DSN segment, by concurrently downloading its pieces. -pub(super) async fn download_and_reconstruct_blocks( - segment_index: SegmentIndex, - piece_getter: &PG, - reconstructor: &Arc>, -) -> Result)>, Error> -where - PG: DsnSyncPieceGetter, -{ - debug!(%segment_index, "Retrieving pieces of the segment"); - - let semaphore = &Semaphore::new(RecordedHistorySegment::NUM_RAW_RECORDS); - - let mut received_segment_pieces = segment_index - .segment_piece_indexes_source_first() - .into_iter() - .map(|piece_index| { - // Source pieces will acquire permit here right away - let maybe_permit = semaphore.try_acquire().ok(); - - async move { - let permit = match maybe_permit { - Some(permit) => permit, - None => { - // Other pieces will acquire permit here instead - match semaphore.acquire().await { - Ok(permit) => permit, - Err(error) => { - warn!( - %piece_index, - %error, - "Semaphore was closed, interrupting piece retrieval" - ); - return None; - } - } - } - }; - let maybe_piece = match piece_getter.get_piece(piece_index).await { - Ok(maybe_piece) => maybe_piece, - Err(error) => { - trace!( - %error, - ?piece_index, - "Piece request failed", - ); - return None; - } - }; - - let key = - subspace_networking::libp2p::kad::RecordKey::from(piece_index.to_multihash()); - trace!( - ?piece_index, - key = hex::encode(&key), - piece_found = maybe_piece.is_some(), - "Piece request succeeded", - ); - - maybe_piece.map(|received_piece| { - // Piece was received successfully, "remove" this slot from semaphore - permit.forget(); - (piece_index, received_piece) - }) - } - }) - .collect::>(); - - let mut segment_pieces = vec![None::; ArchivedHistorySegment::NUM_PIECES]; - let mut pieces_received = 0; - - while let Some(maybe_result) = received_segment_pieces.next().await { - let Some((piece_index, piece)) = maybe_result else { - continue; - }; - - segment_pieces - .get_mut(piece_index.position() as usize) - .expect("Piece position is by definition within segment; qed") - .replace(piece); - - pieces_received += 1; - - if pieces_received >= RecordedHistorySegment::NUM_RAW_RECORDS { - trace!(%segment_index, "Received half of the segment."); - break; - } - } - - // CPU-intensive piece and segment reconstruction code can block the async executor. - let reconstructor = reconstructor.clone(); - let reconstructed_contents = spawn_blocking(move || { - reconstructor - .lock() - .expect("Panic if previous thread panicked when holding the mutex") - .add_segment(segment_pieces.as_ref()) - }) - .await - .expect("Panic if blocking task panicked") - .map_err(|error| error.to_string())?; - - trace!(%segment_index, "Segment reconstructed successfully"); - - Ok(reconstructed_contents.blocks) -} diff --git a/crates/subspace-service/src/sync_from_dsn/snap_sync.rs b/crates/subspace-service/src/sync_from_dsn/snap_sync.rs index 2e8102a17c..27bb39e08f 100644 --- a/crates/subspace-service/src/sync_from_dsn/snap_sync.rs +++ b/crates/subspace-service/src/sync_from_dsn/snap_sync.rs @@ -1,7 +1,6 @@ use crate::mmr::sync::mmr_sync; -use crate::sync_from_dsn::import_blocks::download_and_reconstruct_blocks; use crate::sync_from_dsn::segment_header_downloader::SegmentHeaderDownloader; -use crate::sync_from_dsn::DsnSyncPieceGetter; +use crate::sync_from_dsn::PieceGetter; use crate::utils::wait_for_block_import; use sc_client_api::{AuxStore, BlockchainEvents, ProofProvider}; use sc_consensus::import_queue::ImportQueueService; @@ -31,11 +30,13 @@ use std::time::Duration; use subspace_archiving::reconstructor::Reconstructor; use subspace_core_primitives::segments::SegmentIndex; use subspace_core_primitives::{BlockNumber, PublicKey}; +use subspace_data_retrieval::segment_downloading::download_segment_pieces; use subspace_erasure_coding::ErasureCoding; use subspace_networking::Node; use tokio::sync::broadcast::Receiver; +use tokio::task; use tokio::time::sleep; -use tracing::{debug, error, warn}; +use tracing::{debug, error, trace, warn}; /// Error type for snap sync. #[derive(thiserror::Error, Debug)] @@ -95,7 +96,7 @@ where + 'static, Client::Api: SubspaceApi + ObjectsApi + MmrApi>, - PG: DsnSyncPieceGetter, + PG: PieceGetter, OS: OffchainStorage, { let info = client.info(); @@ -162,7 +163,7 @@ pub(crate) async fn get_blocks_from_target_segment( ) -> Result)>)>, Error> where AS: AuxStore, - PG: DsnSyncPieceGetter, + PG: PieceGetter, { sync_segment_headers(segment_headers_store, node) .await @@ -290,10 +291,30 @@ where let reconstructor = Arc::new(Mutex::new(Reconstructor::new(erasure_coding.clone()))); for segment_index in segments_to_reconstruct { - let blocks_fut = - download_and_reconstruct_blocks(segment_index, piece_getter, &reconstructor); + let segment_pieces = download_segment_pieces(segment_index, piece_getter) + .await + .map_err(|error| format!("Failed to download segment pieces: {error}"))?; + // CPU-intensive piece and segment reconstruction code can block the async executor. + let segment_contents_fut = task::spawn_blocking({ + let reconstructor = reconstructor.clone(); + + move || { + reconstructor + .lock() + .expect("Panic if previous thread panicked when holding the mutex") + .add_segment(segment_pieces.as_ref()) + } + }); + + blocks = VecDeque::from( + segment_contents_fut + .await + .expect("Panic if blocking task panicked") + .map_err(|error| error.to_string())? + .blocks, + ); - blocks = VecDeque::from(blocks_fut.await?); + trace!(%segment_index, "Segment reconstructed successfully"); } } @@ -318,7 +339,7 @@ async fn sync( network_request: NR, ) -> Result<(), Error> where - PG: DsnSyncPieceGetter, + PG: PieceGetter, AS: AuxStore, Block: BlockT, Client: HeaderBackend diff --git a/docker/gateway.Dockerfile b/docker/gateway.Dockerfile new file mode 100644 index 0000000000..85f4c62f1c --- /dev/null +++ b/docker/gateway.Dockerfile @@ -0,0 +1,109 @@ +# This Dockerfile supports both native building and cross-compilation to x86-64, aarch64 and riscv64 +FROM --platform=$BUILDPLATFORM ubuntu:22.04 + +ARG RUSTC_VERSION=nightly-2024-10-22 +ARG PROFILE=production +ARG RUSTFLAGS +# Incremental compilation here isn't helpful +ENV CARGO_INCREMENTAL=0 +ENV PKG_CONFIG_ALLOW_CROSS=true + +ARG BUILDARCH +ARG TARGETARCH + +WORKDIR /code + +RUN \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + protobuf-compiler \ + curl \ + git \ + llvm \ + clang \ + automake \ + libtool \ + pkg-config \ + make + +RUN \ + if [ $BUILDARCH != "arm64" ] && [ $TARGETARCH = "arm64" ]; then \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + g++-aarch64-linux-gnu \ + gcc-aarch64-linux-gnu \ + libc6-dev-arm64-cross \ + ; fi + +RUN \ + if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + g++-riscv64-linux-gnu \ + gcc-riscv64-linux-gnu \ + libc6-dev-riscv64-cross \ + ; fi + +RUN \ + if [ $BUILDARCH != "amd64" ] && [ $TARGETARCH = "amd64" ]; then \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + g++-x86-64-linux-gnu \ + gcc-x86-64-linux-gnu \ + libc6-dev-amd64-cross \ + ; fi + +RUN \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain $RUSTC_VERSION && \ + /root/.cargo/bin/rustup target add wasm32-unknown-unknown + +COPY Cargo.lock /code/Cargo.lock +COPY Cargo.toml /code/Cargo.toml +COPY rust-toolchain.toml /code/rust-toolchain.toml + +COPY crates /code/crates +COPY domains /code/domains +COPY shared /code/shared +COPY test /code/test + +# Up until this line all Rust images in this repo should be the same to share the same layers + +ARG TARGETVARIANT + +RUN \ + if [ $BUILDARCH != "arm64" ] && [ $TARGETARCH = "arm64" ]; then \ + export RUSTFLAGS="$RUSTFLAGS -C linker=aarch64-linux-gnu-gcc" \ + ; fi && \ + if [ $BUILDARCH != "riscv64" ] && [ $TARGETARCH = "riscv64" ]; then \ + export RUSTFLAGS="$RUSTFLAGS -C linker=riscv64-linux-gnu-gcc" \ + ; fi && \ + if [ $TARGETARCH = "amd64" ] && [ "$RUSTFLAGS" = "" ]; then \ + case "$TARGETVARIANT" in \ + # x86-64-v2 with AES-NI + "v2") export RUSTFLAGS="-C target-cpu=x86-64-v2" ;; \ + # x86-64-v3 with AES-NI + "v3") export RUSTFLAGS="-C target-cpu=x86-64-v3 -C target-feature=+aes" ;; \ + # v4 is compiled for Zen 4+ + "v4") export RUSTFLAGS="-C target-cpu=znver4" ;; \ + # Default build is for Skylake + *) export RUSTFLAGS="-C target-cpu=skylake" ;; \ + esac \ + ; fi && \ + if [ $BUILDARCH != "amd64" ] && [ $TARGETARCH = "amd64" ]; then \ + export RUSTFLAGS="$RUSTFLAGS -C linker=x86_64-linux-gnu-gcc" \ + ; fi && \ + RUSTC_TARGET_ARCH=$(echo $TARGETARCH | sed "s/amd64/x86_64/g" | sed "s/arm64/aarch64/g" | sed "s/riscv64/riscv64gc/g") && \ + /root/.cargo/bin/cargo -Zgitoxide -Zgit build \ + --locked \ + -Z build-std \ + --profile $PROFILE \ + --bin subspace-gateway \ + --target $RUSTC_TARGET_ARCH-unknown-linux-gnu && \ + mv target/*/*/subspace-gateway subspace-gateway && \ + rm -rf target + +FROM ubuntu:22.04 + +COPY --from=0 /code/subspace-gateway /subspace-gateway + +USER nobody:nogroup + +ENTRYPOINT ["/subspace-gateway"] diff --git a/docker/gateway.Dockerfile.dockerignore b/docker/gateway.Dockerfile.dockerignore new file mode 120000 index 0000000000..5a3c9b7d37 --- /dev/null +++ b/docker/gateway.Dockerfile.dockerignore @@ -0,0 +1 @@ +.dockerignore \ No newline at end of file diff --git a/docs/development.md b/docs/development.md index 64d598883a..91e3a21d12 100644 --- a/docs/development.md +++ b/docs/development.md @@ -31,6 +31,11 @@ Check [crates/subspace-node](../crates/subspace-node/README.md) and [crates/subs This is a monorepo with multiple binaries and the workflow is typical for Rust projects: +You may need to follow both node & farmer requirements + +**node**: https://github.com/autonomys/subspace/tree/main/crates/subspace-node +**farmer**: https://github.com/autonomys/subspace/tree/main/crates/subspace-farmer + - `cargo run --release --bin subspace-node -- run --dev` to run [a node](/crates/subspace-node) - To [start farming](/crates/subspace-farmer): - Single farm: `cargo run --release --bin subspace-farmer -- farm --reward-address REWARD-ADDRESS path=FARM0-DIR,size=FARM0-SIZE` diff --git a/domains/client/cross-domain-message-gossip/src/aux_schema.rs b/domains/client/cross-domain-message-gossip/src/aux_schema.rs index 0e83e917b3..b2fa4ae0de 100644 --- a/domains/client/cross-domain-message-gossip/src/aux_schema.rs +++ b/domains/client/cross-domain-message-gossip/src/aux_schema.rs @@ -1,10 +1,13 @@ //! Schema for channel update storage. +use crate::message_listener::LOG_TARGET; use parity_scale_codec::{Decode, Encode}; use sc_client_api::backend::AuxStore; -use sp_blockchain::{Error as ClientError, Result as ClientResult}; +use sp_blockchain::{Error as ClientError, Info, Result as ClientResult}; use sp_core::H256; use sp_messenger::messages::{ChainId, ChannelId, ChannelState, Nonce}; +use sp_messenger::{ChannelNonce, XdmId}; +use sp_runtime::traits::{Block as BlockT, NumberFor}; use subspace_runtime_primitives::BlockNumber; const CHANNEL_DETAIL: &[u8] = b"channel_detail"; @@ -86,3 +89,176 @@ where vec![], ) } + +mod xdm_keys { + use parity_scale_codec::Encode; + use sp_domains::{ChainId, ChannelId}; + use sp_messenger::messages::MessageKey; + use sp_messenger::XdmId; + + const XDM: &[u8] = b"xdm"; + const XDM_RELAY: &[u8] = b"relay_msg"; + const XDM_RELAY_RESPONSE: &[u8] = b"relay_msg_response"; + const XDM_LAST_CLEANUP_NONCE: &[u8] = b"xdm_last_cleanup_nonce"; + + pub(super) fn get_key_for_xdm_id(xdm_id: XdmId) -> Vec { + match xdm_id { + XdmId::RelayMessage(id) => get_key_for_xdm_relay(id), + XdmId::RelayResponseMessage(id) => get_key_for_xdm_relay_response(id), + } + } + + pub(super) fn get_key_for_last_cleanup_relay_nonce( + chain_id: ChainId, + channel_id: ChannelId, + ) -> Vec { + (XDM, XDM_RELAY, XDM_LAST_CLEANUP_NONCE, chain_id, channel_id).encode() + } + + pub(super) fn get_key_for_last_cleanup_relay_response_nonce( + chain_id: ChainId, + channel_id: ChannelId, + ) -> Vec { + ( + XDM, + XDM_RELAY_RESPONSE, + XDM_LAST_CLEANUP_NONCE, + chain_id, + channel_id, + ) + .encode() + } + + pub(super) fn get_key_for_xdm_relay(id: MessageKey) -> Vec { + (XDM, XDM_RELAY, id).encode() + } + + pub(super) fn get_key_for_xdm_relay_response(id: MessageKey) -> Vec { + (XDM, XDM_RELAY_RESPONSE, id).encode() + } +} + +#[derive(Debug, Encode, Decode, Clone)] +pub(super) struct BlockId { + pub(super) number: NumberFor, + pub(super) hash: Block::Hash, +} + +impl From> for BlockId { + fn from(value: Info) -> Self { + BlockId { + number: value.best_number, + hash: value.best_hash, + } + } +} + +/// Store the given XDM ID as processed at given block. +pub fn set_xdm_message_processed_at( + backend: &Backend, + xdm_id: XdmId, + block_id: BlockId, +) -> ClientResult<()> +where + Backend: AuxStore, + Block: BlockT, +{ + let key = xdm_keys::get_key_for_xdm_id(xdm_id); + backend.insert_aux(&[(key.as_slice(), block_id.encode().as_slice())], vec![]) +} + +/// Returns the maybe last processed block number for given xdm. +pub fn get_xdm_processed_block_number( + backend: &Backend, + xdm_id: XdmId, +) -> ClientResult>> +where + Backend: AuxStore, + Block: BlockT, +{ + load_decode(backend, xdm_keys::get_key_for_xdm_id(xdm_id).as_slice()) +} + +/// Cleans up all the xdm storages until the latest nonces. +pub fn cleanup_chain_channel_storages( + backend: &Backend, + chain_id: ChainId, + channel_id: ChannelId, + channel_nonce: ChannelNonce, +) -> ClientResult<()> +where + Backend: AuxStore, +{ + let mut to_insert = vec![]; + let mut to_delete = vec![]; + if let Some(latest_relay_nonce) = channel_nonce.relay_msg_nonce { + let last_cleanup_relay_nonce_key = + xdm_keys::get_key_for_last_cleanup_relay_nonce(chain_id, channel_id); + let last_cleaned_up_nonce = + load_decode::<_, Nonce>(backend, last_cleanup_relay_nonce_key.as_slice())?; + + let mut from_nonce = match last_cleaned_up_nonce { + None => Nonce::zero(), + Some(last_nonce) => last_nonce.saturating_add(Nonce::one()), + }; + + tracing::debug!( + target: LOG_TARGET, + "Cleaning Relay xdm keys for {:?} channel: {:?} from: {:?} to: {:?}", + chain_id, + channel_id, + from_nonce, + latest_relay_nonce + ); + + while from_nonce <= latest_relay_nonce { + to_delete.push(xdm_keys::get_key_for_xdm_relay(( + chain_id, channel_id, from_nonce, + ))); + from_nonce = from_nonce.saturating_add(Nonce::one()); + } + + to_insert.push((last_cleanup_relay_nonce_key, latest_relay_nonce.encode())); + } + + if let Some(latest_relay_response_nonce) = channel_nonce.relay_response_msg_nonce { + let last_cleanup_relay_response_nonce_key = + xdm_keys::get_key_for_last_cleanup_relay_response_nonce(chain_id, channel_id); + let last_cleaned_up_nonce = + load_decode::<_, Nonce>(backend, last_cleanup_relay_response_nonce_key.as_slice())?; + + let mut from_nonce = match last_cleaned_up_nonce { + None => Nonce::zero(), + Some(last_nonce) => last_nonce.saturating_add(Nonce::one()), + }; + + tracing::debug!( + target: LOG_TARGET, + "Cleaning Relay response xdm keys for {:?} channel: {:?} from: {:?} to: {:?}", + chain_id, + channel_id, + from_nonce, + latest_relay_response_nonce + ); + + while from_nonce <= latest_relay_response_nonce { + to_delete.push(xdm_keys::get_key_for_xdm_relay_response(( + chain_id, channel_id, from_nonce, + ))); + from_nonce = from_nonce.saturating_add(Nonce::one()); + } + + to_insert.push(( + last_cleanup_relay_response_nonce_key, + latest_relay_response_nonce.encode(), + )); + } + + backend.insert_aux( + &to_insert + .iter() + .map(|(k, v)| (k.as_slice(), v.as_slice())) + .collect::>(), + &to_delete.iter().map(|k| k.as_slice()).collect::>(), + ) +} diff --git a/domains/client/cross-domain-message-gossip/src/gossip_worker.rs b/domains/client/cross-domain-message-gossip/src/gossip_worker.rs index cc9d957ab7..8554f1d0eb 100644 --- a/domains/client/cross-domain-message-gossip/src/gossip_worker.rs +++ b/domains/client/cross-domain-message-gossip/src/gossip_worker.rs @@ -314,4 +314,8 @@ pub(crate) mod rep { /// Reputation change when a peer sends us a gossip message that can't be decoded. pub(crate) const GOSSIP_NOT_DECODABLE: ReputationChange = ReputationChange::new_fatal("Cross chain message: not decodable"); + + /// Reputation change when a peer sends us a non XDM message + pub(crate) const NOT_XDM: ReputationChange = + ReputationChange::new_fatal("Cross chain message: not XDM"); } diff --git a/domains/client/cross-domain-message-gossip/src/lib.rs b/domains/client/cross-domain-message-gossip/src/lib.rs index c88eb250f8..2f2feb7e07 100644 --- a/domains/client/cross-domain-message-gossip/src/lib.rs +++ b/domains/client/cross-domain-message-gossip/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(let_chains)] #![warn(rust_2018_idioms)] mod aux_schema; diff --git a/domains/client/cross-domain-message-gossip/src/message_listener.rs b/domains/client/cross-domain-message-gossip/src/message_listener.rs index e0648fc65f..2243990b25 100644 --- a/domains/client/cross-domain-message-gossip/src/message_listener.rs +++ b/domains/client/cross-domain-message-gossip/src/message_listener.rs @@ -1,4 +1,7 @@ -use crate::aux_schema::{get_channel_state, set_channel_state}; +use crate::aux_schema::{ + cleanup_chain_channel_storages, get_channel_state, get_xdm_processed_block_number, + set_channel_state, set_xdm_message_processed_at, BlockId, +}; use crate::gossip_worker::{ChannelUpdate, MessageData}; use crate::{ChainMsg, ChannelDetail}; use domain_block_preprocessor::stateless_runtime::StatelessRuntime; @@ -8,7 +11,7 @@ use sc_client_api::AuxStore; use sc_executor::RuntimeVersionOf; use sc_network::NetworkPeers; use sc_transaction_pool_api::{TransactionPool, TransactionSource}; -use sp_api::{ApiError, ProvideRuntimeApi, StorageProof}; +use sp_api::{ApiError, ApiExt, ProvideRuntimeApi, StorageProof}; use sp_blockchain::HeaderBackend; use sp_consensus::SyncOracle; use sp_core::crypto::AccountId32; @@ -18,17 +21,21 @@ use sp_core::{Hasher, H256}; use sp_domains::proof_provider_and_verifier::{StorageProofVerifier, VerificationError}; use sp_domains::{DomainId, DomainsApi, RuntimeType}; use sp_messenger::messages::{ChainId, Channel, ChannelId}; -use sp_messenger::RelayerApi; +use sp_messenger::{ChannelNonce, MessengerApi, RelayerApi, XdmId}; use sp_runtime::codec::Decode; use sp_runtime::traits::{BlakeTwo256, Block as BlockT, Header, NumberFor}; +use sp_runtime::{SaturatedConversion, Saturating}; use std::collections::BTreeMap; use std::sync::Arc; use subspace_runtime_primitives::{Balance, BlockNumber}; use thiserror::Error; -const LOG_TARGET: &str = "domain_message_listener"; +pub(crate) const LOG_TARGET: &str = "domain_message_listener"; +/// Number of blocks an already submitted XDM is not accepted since last submission. +const XDM_ACCEPT_BLOCK_LIMIT: u32 = 5; type BlockOf = ::Block; +type HeaderOf = <::Block as BlockT>::Header; type ExtrinsicOf = <::Block as BlockT>::Extrinsic; type HashingFor = <::Header as Header>::Hashing; @@ -88,7 +95,6 @@ pub async fn start_cross_chain_message_listener< TxnListener, CClient, CBlock, - Block, Executor, SO, >( @@ -102,11 +108,11 @@ pub async fn start_cross_chain_message_listener< sync_oracle: SO, ) where TxPool: TransactionPool + 'static, - Client: ProvideRuntimeApi> + HeaderBackend>, + Client: ProvideRuntimeApi> + HeaderBackend> + AuxStore, CBlock: BlockT, - Block: BlockT, + Client::Api: MessengerApi, NumberFor, CBlock::Hash>, CClient: ProvideRuntimeApi + HeaderBackend + AuxStore, - CClient::Api: DomainsApi + CClient::Api: DomainsApi> + RelayerApi, NumberFor, CBlock::Hash>, TxnListener: Stream + Unpin, Executor: CodeExecutor + RuntimeVersionOf, @@ -153,15 +159,25 @@ pub async fn start_cross_chain_message_listener< } }; - handle_xdm_message(&client, &tx_pool, chain_id, ext).await; + if let Ok(valid) = + handle_xdm_message::<_, _, CBlock>(&client, &tx_pool, chain_id, ext).await + && !valid + { + if let Some(peer_id) = msg.maybe_peer { + network.report_peer(peer_id, crate::gossip_worker::rep::NOT_XDM); + } + continue; + } + } + MessageData::ChannelUpdate(channel_update) => { + handle_channel_update::<_, _, _, BlockOf>( + chain_id, + channel_update, + &consensus_client, + domain_executor.clone(), + &mut domain_storage_key_cache, + ) } - MessageData::ChannelUpdate(channel_update) => handle_channel_update::<_, _, _, Block>( - chain_id, - channel_update, - &consensus_client, - domain_executor.clone(), - &mut domain_storage_key_cache, - ), } } } @@ -460,32 +476,175 @@ where Ok(()) } -async fn handle_xdm_message( +fn can_allow_xdm_submission( + client: &Arc, + xdm_id: XdmId, + submitted_block_id: BlockId, + current_block_id: BlockId, + maybe_channel_nonce: Option, +) -> bool +where + Client: HeaderBackend, + Block: BlockT, +{ + if let Some(channel_nonce) = maybe_channel_nonce { + let maybe_nonces = match ( + xdm_id, + channel_nonce.relay_msg_nonce, + channel_nonce.relay_response_msg_nonce, + ) { + (XdmId::RelayMessage((_, _, nonce)), Some(channel_nonce), _) => { + Some((nonce, channel_nonce)) + } + (XdmId::RelayResponseMessage((_, _, nonce)), _, Some(channel_nonce)) => { + Some((nonce, channel_nonce)) + } + _ => None, + }; + + if let Some((xdm_nonce, channel_nonce)) = maybe_nonces + && (xdm_nonce <= channel_nonce) + { + tracing::debug!( + target: LOG_TARGET, + "Stale XDM submitted: XDM Nonce: {:?}, Channel Nonce: {:?}", + xdm_nonce, + channel_nonce + ); + return false; + } + } + + match client.hash(submitted_block_id.number).ok().flatten() { + // there is no block at this number, allow xdm submission + None => return true, + Some(hash) => { + if hash != submitted_block_id.hash { + // client re-org'ed, allow xdm submission + return true; + } + } + } + + let latest_block_number = current_block_id.number; + let block_limit: NumberFor = XDM_ACCEPT_BLOCK_LIMIT.saturated_into(); + submitted_block_id.number < latest_block_number.saturating_sub(block_limit) +} + +async fn handle_xdm_message( client: &Arc, tx_pool: &Arc, chain_id: ChainId, ext: ExtrinsicOf, -) where +) -> Result +where TxPool: TransactionPool + 'static, - Client: HeaderBackend>, + CBlock: BlockT, + Client: ProvideRuntimeApi> + HeaderBackend> + AuxStore, + Client::Api: MessengerApi, NumberFor, CBlock::Hash>, { - let at = client.info().best_hash; - tracing::debug!( - target: LOG_TARGET, - "Submitting extrinsic to tx pool at block: {:?}", - at - ); + let block_id: BlockId> = client.info().into(); + let runtime_api = client.runtime_api(); + let api_version = runtime_api + .api_version::, NumberFor, CBlock::Hash>>( + block_id.hash, + )? + .unwrap_or(1); + + let api_available = api_version >= 2; + if api_available { + let xdm_id = match runtime_api.xdm_id(block_id.hash, &ext)? { + // not a valid xdm, so return as invalid + None => return Ok(false), + Some(xdm_id) => xdm_id, + }; - let tx_pool_res = tx_pool - .submit_one(at, TransactionSource::External, ext) - .await; + let (src_chain_id, channel_id) = xdm_id.get_chain_id_and_channel_id(); + let maybe_channel_nonce = + runtime_api.channel_nonce(block_id.hash, src_chain_id, channel_id)?; + + if let Some(submitted_block_id) = + get_xdm_processed_block_number::<_, BlockOf>(&**client, xdm_id)? + && !can_allow_xdm_submission( + client, + xdm_id, + submitted_block_id.clone(), + block_id.clone(), + maybe_channel_nonce, + ) + { + tracing::debug!( + target: LOG_TARGET, + "Skipping XDM[{:?}] submission. At: {:?} and Now: {:?}", + xdm_id, + submitted_block_id, + block_id + ); + return Ok(true); + } - if let Err(err) = tx_pool_res { - tracing::error!( + tracing::debug!( target: LOG_TARGET, - "Failed to submit extrinsic to tx pool for Chain {:?} with error: {:?}", + "Submitting XDM[{:?}] to tx pool for chain {:?} at block: {:?}", + xdm_id, chain_id, - err + block_id ); + + let tx_pool_res = tx_pool + .submit_one(block_id.hash, TransactionSource::External, ext) + .await; + + let block_id: BlockId> = client.info().into(); + if let Err(err) = tx_pool_res { + tracing::error!( + target: LOG_TARGET, + "Failed to submit XDM[{:?}] to tx pool for Chain {:?} with error: {:?} at block: {:?}", + xdm_id, + chain_id, + err, + block_id + ); + } else { + tracing::debug!( + target: LOG_TARGET, + "Submitted XDM[{:?}] to tx pool for chain {:?} at {:?}", + xdm_id, + chain_id, + block_id + ); + + set_xdm_message_processed_at(&**client, xdm_id, block_id)?; + } + + if let Some(channel_nonce) = maybe_channel_nonce { + cleanup_chain_channel_storages(&**client, src_chain_id, channel_id, channel_nonce)?; + } + + Ok(true) + } else { + let tx_pool_res = tx_pool + .submit_one(block_id.hash, TransactionSource::External, ext) + .await; + + let block_id: BlockId> = client.info().into(); + if let Err(err) = tx_pool_res { + tracing::error!( + target: LOG_TARGET, + "Failed to submit XDM to tx pool for Chain {:?} with error: {:?} at block: {:?}", + chain_id, + err, + block_id + ); + } else { + tracing::debug!( + target: LOG_TARGET, + "Submitted XDM to tx pool for chain {:?} at {:?}", + chain_id, + block_id + ); + } + + Ok(true) } } diff --git a/domains/client/domain-operator/Cargo.toml b/domains/client/domain-operator/Cargo.toml index 99a83f9b21..d171d1494b 100644 --- a/domains/client/domain-operator/Cargo.toml +++ b/domains/client/domain-operator/Cargo.toml @@ -63,6 +63,7 @@ pallet-messenger = { version = "0.1.0", path = "../../../domains/pallets/messeng pallet-sudo = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } pallet-timestamp = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } pallet-transporter = { version = "0.1.0", path = "../../../domains/pallets/transporter" } +rand = "0.8.5" sc-cli = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305", default-features = false } sc-service = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305", default-features = false } sc-transaction-pool = { git = "https://github.com/subspace/polkadot-sdk", rev = "94a1a8143a89bbe9f938c1939ff68abc1519a305" } diff --git a/domains/client/domain-operator/src/domain_bundle_producer.rs b/domains/client/domain-operator/src/domain_bundle_producer.rs index f610b26e48..a527b9436a 100644 --- a/domains/client/domain-operator/src/domain_bundle_producer.rs +++ b/domains/client/domain-operator/src/domain_bundle_producer.rs @@ -2,6 +2,7 @@ use crate::bundle_producer_election_solver::BundleProducerElectionSolver; use crate::domain_bundle_proposer::DomainBundleProposer; use crate::utils::OperatorSlotInfo; use crate::BundleSender; +use async_trait::async_trait; use codec::Decode; use sc_client_api::{AuxStore, BlockBackend}; use sp_api::ProvideRuntimeApi; @@ -10,8 +11,9 @@ use sp_blockchain::HeaderBackend; use sp_consensus_slots::Slot; use sp_domains::core_api::DomainCoreApi; use sp_domains::{ - Bundle, BundleProducerElectionApi, DomainId, DomainsApi, OperatorId, OperatorPublicKey, - OperatorSignature, SealedBundleHeader, SealedSingletonReceipt, SingletonReceipt, + Bundle, BundleHeader, BundleProducerElectionApi, DomainId, DomainsApi, OperatorId, + OperatorPublicKey, OperatorSignature, ProofOfElection, SealedBundleHeader, + SealedSingletonReceipt, SingletonReceipt, }; use sp_keystore::KeystorePtr; use sp_messenger::MessengerApi; @@ -22,6 +24,19 @@ use std::sync::Arc; use subspace_runtime_primitives::Balance; use tracing::info; +/// Type alias for block hash. +pub type BlockHashFor = ::Hash; + +/// Type alias for block header. +pub type HeaderFor = ::Header; + +/// Type alias for bundle header. +pub type BundleHeaderFor = + BundleHeader, BlockHashFor, HeaderFor, Balance>; + +/// Type alias for extrinsics. +pub type ExtrinsicFor = ::Extrinsic; + type OpaqueBundle = sp_domains::OpaqueBundle< NumberFor, ::Hash, @@ -50,6 +65,20 @@ impl DomainProposal { } } +#[async_trait] +pub trait BundleProducer +where + Block: BlockT, + CBlock: BlockT, +{ + /// Produce a bundle for the given operator and slot. + async fn produce_bundle( + &mut self, + operator_id: OperatorId, + slot_info: OperatorSlotInfo, + ) -> sp_blockchain::Result>>; +} + pub struct DomainBundleProducer where Block: BlockT, @@ -62,13 +91,6 @@ where keystore: KeystorePtr, bundle_producer_election_solver: BundleProducerElectionSolver, domain_bundle_proposer: DomainBundleProposer, - // TODO: both `skip_empty_bundle_production` and `skip_out_of_order_slot` are only used in the - // tests, we should introduce a trait for `DomainBundleProducer` and use a wrapper of `DomainBundleProducer` - // in the test, both `skip_empty_bundle_production` and `skip_out_of_order_slot` should move into the wrapper - // to keep the production code clean. - skip_empty_bundle_production: bool, - skip_out_of_order_slot: bool, - last_processed_slot: Option, } impl Clone @@ -86,9 +108,6 @@ where keystore: self.keystore.clone(), bundle_producer_election_solver: self.bundle_producer_election_solver.clone(), domain_bundle_proposer: self.domain_bundle_proposer.clone(), - skip_empty_bundle_production: self.skip_empty_bundle_production, - skip_out_of_order_slot: self.skip_out_of_order_slot, - last_processed_slot: None, } } } @@ -109,7 +128,6 @@ where CClient::Api: DomainsApi + BundleProducerElectionApi, TransactionPool: sc_transaction_pool_api::TransactionPool, { - #[allow(clippy::too_many_arguments)] pub fn new( domain_id: DomainId, consensus_client: Arc, @@ -123,8 +141,6 @@ where >, bundle_sender: Arc>, keystore: KeystorePtr, - skip_empty_bundle_production: bool, - skip_out_of_order_slot: bool, ) -> Self { let bundle_producer_election_solver = BundleProducerElectionSolver::::new( keystore.clone(), @@ -138,9 +154,6 @@ where keystore, bundle_producer_election_solver, domain_bundle_proposer, - skip_empty_bundle_production, - skip_out_of_order_slot, - last_processed_slot: None, } } @@ -170,18 +183,26 @@ where }) } - pub async fn produce_bundle( - &mut self, + #[expect(clippy::type_complexity)] + fn claim_bundle_slot( + &self, operator_id: OperatorId, - slot_info: OperatorSlotInfo, - ) -> sp_blockchain::Result>> { + slot_info: &OperatorSlotInfo, + domain_best_number: NumberFor, + consensus_chain_best_hash: BlockHashFor, + ) -> sp_blockchain::Result< + Option<( + NumberFor, + NumberFor, + ProofOfElection, + OperatorPublicKey, + )>, + > { let OperatorSlotInfo { slot, proof_of_time, } = slot_info; - let domain_best_number = self.client.info().best_number; - let consensus_chain_best_hash = self.consensus_client.info().best_hash; let domain_best_number_onchain = self .consensus_client .runtime_api() @@ -200,24 +221,14 @@ where .runtime_api() .head_receipt_number(consensus_chain_best_hash, self.domain_id)?; - let should_skip_slot = { - // Operator is lagging behind the receipt chain on its parent chain as another operator - // already processed a block higher than the local best and submitted the receipt to - // the parent chain, we ought to catch up with the consensus block processing before - // producing new bundle. - let is_operator_lagging = - !domain_best_number.is_zero() && domain_best_number <= head_receipt_number; - - let skip_out_of_order_slot = self.skip_out_of_order_slot - && self - .last_processed_slot - .map(|last_slot| last_slot >= slot) - .unwrap_or(false); - - is_operator_lagging || skip_out_of_order_slot - }; + // Operator is lagging behind the receipt chain on its parent chain as another operator + // already processed a block higher than the local best and submitted the receipt to + // the parent chain, we ought to catch up with the consensus block processing before + // producing new bundle. + let is_operator_lagging = + !domain_best_number.is_zero() && domain_best_number <= head_receipt_number; - if should_skip_slot { + if is_operator_lagging { tracing::warn!( ?domain_best_number, "Skipping bundle production on slot {slot}" @@ -227,98 +238,407 @@ where if let Some((proof_of_election, operator_signing_key)) = self.bundle_producer_election_solver.solve_challenge( - slot, + *slot, consensus_chain_best_hash, self.domain_id, operator_id, - proof_of_time, + *proof_of_time, )? { tracing::info!("📦 Claimed slot {slot}"); + Ok(Some(( + domain_best_number_onchain, + head_receipt_number, + proof_of_election, + operator_signing_key, + ))) + } else { + Ok(None) + } + } + + fn prepare_receipt( + &self, + slot_info: &OperatorSlotInfo, + domain_best_number_onchain: NumberFor, + head_receipt_number: NumberFor, + proof_of_election: &ProofOfElection, + operator_signing_key: &OperatorPublicKey, + ) -> sp_blockchain::Result>> { + // When the receipt gap is greater than one, the operator needs to produce a receipt + // instead of a bundle + if domain_best_number_onchain.saturating_sub(head_receipt_number) > 1u32.into() { + info!( + ?domain_best_number_onchain, + ?head_receipt_number, + "🔖 Producing singleton receipt at slot {:?}", + slot_info.slot + ); + let receipt = self .domain_bundle_proposer .load_next_receipt(domain_best_number_onchain, head_receipt_number)?; - // When the receipt gap is greater than one the operator need to produce receipt - // instead of bundle - if domain_best_number_onchain.saturating_sub(head_receipt_number) > 1u32.into() { - info!( - ?domain_best_number_onchain, - ?head_receipt_number, - "🔖 Producing singleton receipt at slot {:?}", - slot_info.slot - ); - - let singleton_receipt = SingletonReceipt { - proof_of_election, - receipt, - }; + let singleton_receipt = SingletonReceipt { + proof_of_election: proof_of_election.clone(), + receipt, + }; - let signature = { - let to_sign: ::Hash = singleton_receipt.hash(); - self.sign(&operator_signing_key, to_sign.as_ref())? + let signature = { + let to_sign: BlockHashFor = singleton_receipt.hash(); + self.sign(operator_signing_key, to_sign.as_ref())? + }; + + let sealed_singleton_receipt: SealedSingletonReceiptFor = + SealedSingletonReceipt { + singleton_receipt, + signature, }; - let sealed_singleton_receipt: SealedSingletonReceiptFor = - SealedSingletonReceipt { - singleton_receipt, - signature, - }; - return Ok(Some(DomainProposal::Receipt(sealed_singleton_receipt))); - } + Ok(Some(DomainProposal::Receipt(sealed_singleton_receipt))) + } else { + Ok(None) + } + } - let tx_range = self + async fn prepare_bundle( + &mut self, + operator_id: OperatorId, + consensus_chain_best_hash: BlockHashFor, + domain_best_number_onchain: NumberFor, + head_receipt_number: NumberFor, + proof_of_election: ProofOfElection, + ) -> sp_blockchain::Result<(BundleHeaderFor, Vec>)> { + let tx_range = self + .consensus_client + .runtime_api() + .domain_tx_range(consensus_chain_best_hash, self.domain_id) + .map_err(|error| { + sp_blockchain::Error::Application(Box::from(format!( + "Error getting tx range: {error}" + ))) + })?; + + let receipt = self + .domain_bundle_proposer + .load_next_receipt(domain_best_number_onchain, head_receipt_number)?; + + let (bundle_header, extrinsics) = self + .domain_bundle_proposer + .propose_bundle_at(proof_of_election.clone(), tx_range, operator_id, receipt) + .await?; + + Ok((bundle_header, extrinsics)) + } + + fn is_bundle_empty( + &self, + consensus_chain_best_hash: BlockHashFor, + extrinsics: &[ExtrinsicFor], + ) -> sp_blockchain::Result { + let is_empty = extrinsics.is_empty() + && !self .consensus_client .runtime_api() - .domain_tx_range(consensus_chain_best_hash, self.domain_id) - .map_err(|error| { - sp_blockchain::Error::Application(Box::from(format!( - "Error getting tx range: {error}" - ))) - })?; - let (bundle_header, extrinsics) = self - .domain_bundle_proposer - .propose_bundle_at(proof_of_election, tx_range, operator_id, receipt) - .await?; - - // if there are no extrinsics and no receipts to confirm, skip the bundle - if self.skip_empty_bundle_production - && extrinsics.is_empty() - && !self - .consensus_client - .runtime_api() - .non_empty_er_exists(consensus_chain_best_hash, self.domain_id)? - { - tracing::warn!( - ?domain_best_number, - "Skipping empty bundle production on slot {slot}" - ); - return Ok(None); - } - - self.last_processed_slot.replace(slot); - - info!("🔖 Producing bundle at slot {:?}", slot_info.slot); + .non_empty_er_exists(consensus_chain_best_hash, self.domain_id)?; - let signature = { - let to_sign = bundle_header.hash(); - self.sign(&operator_signing_key, to_sign.as_ref())? - }; + Ok(is_empty) + } - let bundle = Bundle { - sealed_header: SealedBundleHeader::new(bundle_header, signature), - extrinsics, - }; + fn seal_bundle( + &self, + bundle_header: BundleHeaderFor, + operator_signing_key: &OperatorPublicKey, + extrinsics: Vec>, + ) -> sp_blockchain::Result> { + let signature = { + let to_sign = bundle_header.hash(); + self.sign(operator_signing_key, to_sign.as_ref())? + }; - // TODO: Re-enable the bundle gossip over X-Net when the compact bundle is supported. - // if let Err(e) = self.bundle_sender.unbounded_send(signed_bundle.clone()) { - // tracing::error!(error = ?e, "Failed to send transaction bundle"); - // } + let bundle = Bundle { + sealed_header: SealedBundleHeader::new(bundle_header, signature), + extrinsics, + }; - Ok(Some(DomainProposal::Bundle(bundle.into_opaque_bundle()))) - } else { - Ok(None) + // TODO: Re-enable the bundle gossip over X-Net when the compact bundle is supported. + // if let Err(e) = self.bundle_sender.unbounded_send(signed_bundle.clone()) { + // tracing::error!(error = ?e, "Failed to send transaction bundle"); + // } + + Ok(DomainProposal::Bundle(bundle.into_opaque_bundle())) + } +} + +#[async_trait] +impl BundleProducer + for DomainBundleProducer +where + Block: BlockT, + CBlock: BlockT, + NumberFor: Into>, + NumberFor: Into>, + Client: HeaderBackend + BlockBackend + AuxStore + ProvideRuntimeApi, + Client::Api: BlockBuilder + + DomainCoreApi + + TaggedTransactionQueue + + MessengerApi, CBlock::Hash>, + CClient: HeaderBackend + ProvideRuntimeApi, + CClient::Api: DomainsApi + BundleProducerElectionApi, + TransactionPool: sc_transaction_pool_api::TransactionPool, +{ + async fn produce_bundle( + &mut self, + operator_id: OperatorId, + slot_info: OperatorSlotInfo, + ) -> sp_blockchain::Result>> { + let domain_best_number = self.client.info().best_number; + let consensus_chain_best_hash = self.consensus_client.info().best_hash; + + let Some(( + domain_best_number_onchain, + head_receipt_number, + proof_of_election, + operator_signing_key, + )) = self.claim_bundle_slot( + operator_id, + &slot_info, + domain_best_number, + consensus_chain_best_hash, + )? + else { + return Ok(None); + }; + + if let Some(receipt) = self.prepare_receipt( + &slot_info, + domain_best_number_onchain, + head_receipt_number, + &proof_of_election, + &operator_signing_key, + )? { + return Ok(Some(receipt)); + } + + let (bundle_header, extrinsics) = self + .prepare_bundle( + operator_id, + consensus_chain_best_hash, + domain_best_number_onchain, + head_receipt_number, + proof_of_election, + ) + .await?; + + // if there are no extrinsics and no receipts to confirm, skip the bundle + // this is the default production behaviour + if self.is_bundle_empty(consensus_chain_best_hash, &extrinsics)? { + tracing::warn!( + ?domain_best_number, + "Skipping empty bundle production on slot {}", + slot_info.slot, + ); + + return Ok(None); } + + info!("🔖 Producing bundle at slot {:?}", slot_info.slot); + + let bundle = self.seal_bundle(bundle_header, &operator_signing_key, extrinsics)?; + + Ok(Some(bundle)) + } +} + +// TODO: only compile the test bundle producer in tests (ticket #3162) + +/// Returns true when passed the default parameters bundle producer parameters. +pub fn uses_default_bundle_producer_params( + skip_empty_bundle_production: bool, + skip_out_of_order_slot: bool, +) -> bool { + skip_empty_bundle_production && !skip_out_of_order_slot +} + +pub struct TestBundleProducer +where + Block: BlockT, + CBlock: BlockT, +{ + inner: DomainBundleProducer, + // Test-only parameters + skip_empty_bundle_production: bool, + skip_out_of_order_slot: bool, + last_processed_slot: Option, +} + +impl Clone + for TestBundleProducer +where + Block: BlockT, + CBlock: BlockT, +{ + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + skip_empty_bundle_production: self.skip_empty_bundle_production, + skip_out_of_order_slot: self.skip_out_of_order_slot, + last_processed_slot: None, + } + } +} + +impl + TestBundleProducer +where + Block: BlockT, + CBlock: BlockT, + NumberFor: Into>, + NumberFor: Into>, + Client: HeaderBackend + BlockBackend + AuxStore + ProvideRuntimeApi, + Client::Api: BlockBuilder + + DomainCoreApi + + TaggedTransactionQueue + + MessengerApi, CBlock::Hash>, + CClient: HeaderBackend + ProvideRuntimeApi, + CClient::Api: DomainsApi + BundleProducerElectionApi, + TransactionPool: sc_transaction_pool_api::TransactionPool, +{ + #[expect(clippy::too_many_arguments)] + pub fn new( + domain_id: DomainId, + consensus_client: Arc, + client: Arc, + domain_bundle_proposer: DomainBundleProposer< + Block, + Client, + CBlock, + CClient, + TransactionPool, + >, + bundle_sender: Arc>, + keystore: KeystorePtr, + skip_empty_bundle_production: bool, + skip_out_of_order_slot: bool, + ) -> Self { + Self { + inner: DomainBundleProducer::new( + domain_id, + consensus_client, + client, + domain_bundle_proposer, + bundle_sender, + keystore, + ), + skip_empty_bundle_production, + skip_out_of_order_slot, + last_processed_slot: None, + } + } +} + +#[async_trait] +impl BundleProducer + for TestBundleProducer +where + Block: BlockT, + CBlock: BlockT, + NumberFor: Into>, + NumberFor: Into>, + Client: HeaderBackend + BlockBackend + AuxStore + ProvideRuntimeApi, + Client::Api: BlockBuilder + + DomainCoreApi + + TaggedTransactionQueue + + MessengerApi, CBlock::Hash>, + CClient: HeaderBackend + ProvideRuntimeApi, + CClient::Api: DomainsApi + BundleProducerElectionApi, + TransactionPool: sc_transaction_pool_api::TransactionPool, +{ + async fn produce_bundle( + &mut self, + operator_id: OperatorId, + slot_info: OperatorSlotInfo, + ) -> sp_blockchain::Result>> { + let domain_best_number = self.inner.client.info().best_number; + let consensus_chain_best_hash = self.inner.consensus_client.info().best_hash; + + // Test-only behaviour: skip slot if configured to do so + let skip_out_of_order_slot = self.skip_out_of_order_slot + && self + .last_processed_slot + .map(|last_slot| last_slot >= slot_info.slot) + .unwrap_or(false); + + if skip_out_of_order_slot { + tracing::warn!( + ?domain_best_number, + "Skipping out of order bundle production on slot {}", + slot_info.slot, + ); + return Ok(None); + } + + let Some(( + domain_best_number_onchain, + head_receipt_number, + proof_of_election, + operator_signing_key, + )) = self.inner.claim_bundle_slot( + operator_id, + &slot_info, + domain_best_number, + consensus_chain_best_hash, + )? + else { + return Ok(None); + }; + + if let Some(receipt) = self.inner.prepare_receipt( + &slot_info, + domain_best_number_onchain, + head_receipt_number, + &proof_of_election, + &operator_signing_key, + )? { + return Ok(Some(receipt)); + } + + let (bundle_header, extrinsics) = self + .inner + .prepare_bundle( + operator_id, + consensus_chain_best_hash, + domain_best_number_onchain, + head_receipt_number, + proof_of_election, + ) + .await?; + + // if there are no extrinsics and no receipts to confirm, skip the bundle + // Test-only behaviour: if configured, *don't* skip empty bundles + if self.skip_empty_bundle_production + && self + .inner + .is_bundle_empty(consensus_chain_best_hash, &extrinsics)? + { + tracing::warn!( + ?domain_best_number, + "Skipping empty bundle production on slot {}", + slot_info.slot, + ); + + return Ok(None); + } + + self.last_processed_slot.replace(slot_info.slot); + + info!("🔖 Producing bundle at slot {:?}", slot_info.slot); + + let bundle = self + .inner + .seal_bundle(bundle_header, &operator_signing_key, extrinsics)?; + + Ok(Some(bundle)) } } diff --git a/domains/client/domain-operator/src/domain_worker.rs b/domains/client/domain-operator/src/domain_worker.rs index 6fd7e30650..70181684da 100644 --- a/domains/client/domain-operator/src/domain_worker.rs +++ b/domains/client/domain-operator/src/domain_worker.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . use crate::bundle_processor::BundleProcessor; -use crate::domain_bundle_producer::{DomainBundleProducer, DomainProposal}; +use crate::domain_bundle_producer::{BundleProducer, DomainProposal}; use crate::utils::{BlockInfo, OperatorSlotInfo}; use crate::{NewSlotNotification, OperatorStreams}; use futures::channel::mpsc; @@ -36,8 +36,9 @@ use sp_messenger::MessengerApi; use sp_mmr_primitives::MmrApi; use sp_runtime::traits::{Block as BlockT, Header as HeaderT, NumberFor}; use sp_transaction_pool::runtime_api::TaggedTransactionQueue; -use std::pin::pin; +use std::pin::{pin, Pin}; use std::sync::Arc; +use std::task::{Context, Poll}; use subspace_runtime_primitives::Balance; use tracing::{info, Instrument}; @@ -50,7 +51,6 @@ pub(super) async fn start_worker< CBlock, Client, CClient, - TransactionPool, Backend, IBNS, CIBNS, @@ -62,7 +62,7 @@ pub(super) async fn start_worker< consensus_client: Arc, consensus_offchain_tx_pool_factory: OffchainTransactionPoolFactory, maybe_operator_id: Option, - mut bundle_producer: DomainBundleProducer, + mut bundle_producer: Box + Send>, bundle_processor: BundleProcessor, operator_streams: OperatorStreams, ) where @@ -95,8 +95,6 @@ pub(super) async fn start_worker< + BundleProducerElectionApi + FraudProofApi + MmrApi>, - TransactionPool: - sc_transaction_pool_api::TransactionPool + 'static, Backend: sc_client_api::Backend + 'static, IBNS: Stream, mpsc::Sender<()>)> + Send + 'static, CIBNS: Stream> + Send + 'static, @@ -126,7 +124,8 @@ pub(super) async fn start_worker< if let Some(operator_id) = maybe_operator_id { info!("👷 Running as Operator[{operator_id}]..."); - let mut new_slot_notification_stream = pin!(new_slot_notification_stream); + let mut latest_slot_notification_stream = + LatestItemStream::new(new_slot_notification_stream); let mut acknowledgement_sender_stream = pin!(acknowledgement_sender_stream); loop { tokio::select! { @@ -134,7 +133,7 @@ pub(super) async fn start_worker< // NOTE: this is only necessary for the test. biased; - Some((slot, proof_of_time)) = new_slot_notification_stream.next() => { + Some((slot, proof_of_time)) = latest_slot_notification_stream.next() => { let res = bundle_producer .produce_bundle( operator_id, @@ -316,3 +315,42 @@ where block_info_receiver } + +struct LatestItemStream { + inner: Pin>, +} + +impl LatestItemStream { + fn new(stream: S) -> Self { + Self { + inner: Box::pin(stream), + } + } +} + +impl Stream for LatestItemStream +where + S: Stream, +{ + type Item = S::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let mut last_item = None; + while let Poll::Ready(poll) = self.inner.as_mut().poll_next(cx) { + match poll { + Some(item) => { + last_item = Some(item); + } + None => { + return Poll::Ready(last_item); + } + } + } + + if last_item.is_some() { + Poll::Ready(last_item) + } else { + Poll::Pending + } + } +} diff --git a/domains/client/domain-operator/src/operator.rs b/domains/client/domain-operator/src/operator.rs index 9e82c502e8..b1057b524c 100644 --- a/domains/client/domain-operator/src/operator.rs +++ b/domains/client/domain-operator/src/operator.rs @@ -1,6 +1,8 @@ use crate::bundle_processor::BundleProcessor; use crate::domain_block_processor::{DomainBlockProcessor, ReceiptsChecker}; -use crate::domain_bundle_producer::DomainBundleProducer; +use crate::domain_bundle_producer::{ + uses_default_bundle_producer_params, BundleProducer, DomainBundleProducer, TestBundleProducer, +}; use crate::domain_bundle_proposer::DomainBundleProposer; use crate::fraud_proof::FraudProofGenerator; use crate::snap_sync::{snap_sync, SyncParams}; @@ -28,7 +30,7 @@ use sp_runtime::traits::{Block as BlockT, Header, NumberFor}; use sp_transaction_pool::runtime_api::TaggedTransactionQueue; use std::sync::Arc; use subspace_runtime_primitives::Balance; -use tracing::{error, info, trace}; +use tracing::{error, info, trace, warn}; /// Domain operator. pub struct Operator @@ -141,16 +143,32 @@ where params.transaction_pool.clone(), ); - let bundle_producer = DomainBundleProducer::new( - params.domain_id, - params.consensus_client.clone(), - params.client.clone(), - domain_bundle_proposer, - params.bundle_sender, - params.keystore.clone(), + let bundle_producer = if uses_default_bundle_producer_params( params.skip_empty_bundle_production, params.skip_out_of_order_slot, - ); + ) { + Box::new(DomainBundleProducer::new( + params.domain_id, + params.consensus_client.clone(), + params.client.clone(), + domain_bundle_proposer, + params.bundle_sender, + params.keystore.clone(), + )) as Box + Send> + } else { + // TODO: only allow the test bundle producer in tests (ticket #3162) + warn!("Using test bundle producer..."); + Box::new(TestBundleProducer::new( + params.domain_id, + params.consensus_client.clone(), + params.client.clone(), + domain_bundle_proposer, + params.bundle_sender, + params.keystore.clone(), + params.skip_empty_bundle_production, + params.skip_out_of_order_slot, + )) as Box + Send> + }; let fraud_proof_generator = FraudProofGenerator::new( params.client.clone(), diff --git a/domains/client/domain-operator/src/tests.rs b/domains/client/domain-operator/src/tests.rs index 28547b48a3..e914098c86 100644 --- a/domains/client/domain-operator/src/tests.rs +++ b/domains/client/domain-operator/src/tests.rs @@ -1,5 +1,5 @@ use crate::domain_block_processor::{DomainBlockProcessor, PendingConsensusBlocks}; -use crate::domain_bundle_producer::DomainBundleProducer; +use crate::domain_bundle_producer::{BundleProducer, TestBundleProducer}; use crate::domain_bundle_proposer::DomainBundleProposer; use crate::fraud_proof::{FraudProofGenerator, TraceDiffType}; use crate::tests::TxPoolError::InvalidTransaction as TxPoolInvalidTransaction; @@ -32,7 +32,7 @@ use sp_domains::core_api::DomainCoreApi; use sp_domains::merkle_tree::MerkleTree; use sp_domains::{ Bundle, BundleValidity, ChainId, ChannelId, DomainsApi, HeaderHashingFor, InboxedBundle, - InvalidBundleType, OperatorSignature, OperatorSigningKeyProofOfOwnershipData, Transfers, + InvalidBundleType, Transfers, }; use sp_domains_fraud_proof::fraud_proof::{ ApplyExtrinsicMismatch, ExecutionPhase, FinalizeBlockMismatch, FraudProofVariant, @@ -1494,6 +1494,259 @@ async fn test_false_invalid_bundles_inherent_extrinsic_proof_creation_and_verifi assert!(!ferdie.does_receipt_exist(bad_receipt_hash).unwrap()); } +#[tokio::test(flavor = "multi_thread")] +async fn test_true_invalid_bundles_undecodeable_tx_proof_creation_and_verification() { + let directory = TempDir::new().expect("Must be able to create temporary directory"); + + let mut builder = sc_cli::LoggerBuilder::new(""); + builder.with_colors(false); + let _ = builder.init(); + + let tokio_handle = tokio::runtime::Handle::current(); + + // Start Ferdie + let mut ferdie = MockConsensusNode::run( + tokio_handle.clone(), + Ferdie, + BasePath::new(directory.path().join("ferdie")), + ); + + // Run Alice (a evm domain authority node) + let mut alice = domain_test_service::DomainNodeBuilder::new( + tokio_handle.clone(), + BasePath::new(directory.path().join("alice")), + ) + .build_evm_node(Role::Authority, Alice, &mut ferdie) + .await; + + let bundle_to_tx = |opaque_bundle| { + subspace_test_runtime::UncheckedExtrinsic::new_unsigned( + pallet_domains::Call::submit_bundle { opaque_bundle }.into(), + ) + .into() + }; + + let undecodable_tx = || { + let undecodable_extrinsic = rand::random::<[u8; 5]>().to_vec(); + OpaqueExtrinsic::from_bytes(&undecodable_extrinsic.encode()) + .expect("raw byte encoding and decoding never fails; qed") + }; + + produce_blocks!(ferdie, alice, 5).await.unwrap(); + + alice + .construct_and_send_extrinsic(pallet_balances::Call::transfer_allow_death { + dest: Bob.to_account_id(), + value: 1, + }) + .await + .expect("Failed to send extrinsic"); + + // Produce a bundle that contains the previously sent extrinsic and record that bundle for later use + let (slot, target_bundle) = ferdie.produce_slot_and_wait_for_bundle_submission().await; + assert_eq!(target_bundle.extrinsics.len(), 1); + produce_block_with!(ferdie.produce_block_with_slot(slot), alice) + .await + .unwrap(); + + // Get a bundle from the txn pool and modify the receipt of the target bundle to an invalid one + let (slot, mut opaque_bundle) = ferdie.produce_slot_and_wait_for_bundle_submission().await; + let extrinsics: Vec>; + let bundle_extrinsic_root; + let bad_submit_bundle_tx = { + opaque_bundle.extrinsics.push(undecodable_tx()); + extrinsics = opaque_bundle + .extrinsics + .clone() + .into_iter() + .map(|ext| ext.encode()) + .collect(); + bundle_extrinsic_root = + BlakeTwo256::ordered_trie_root(extrinsics.clone(), StateVersion::V1); + opaque_bundle.sealed_header.header.bundle_extrinsics_root = bundle_extrinsic_root; + opaque_bundle.sealed_header.signature = Sr25519Keyring::Alice + .pair() + .sign(opaque_bundle.sealed_header.pre_hash().as_ref()) + .into(); + bundle_to_tx(opaque_bundle) + }; + + // Produce a block that contains the `bad_submit_bundle_tx` + produce_block_with!( + ferdie.produce_block_with_slot_at( + slot, + ferdie.client.info().best_hash, + Some(vec![bad_submit_bundle_tx]) + ), + alice + ) + .await + .unwrap(); + + // produce another bundle that marks the previous extrinsic as invalid. + let (slot, mut opaque_bundle) = ferdie.produce_slot_and_wait_for_bundle_submission().await; + + let (bad_receipt_hash, bad_submit_bundle_tx) = { + let bad_receipt = &mut opaque_bundle.sealed_header.header.receipt; + // bad receipt marks this particular bundle as valid even though bundle contains inherent extrinsic + bad_receipt.inboxed_bundles = + vec![InboxedBundle::valid(H256::random(), bundle_extrinsic_root)]; + + opaque_bundle.sealed_header.signature = Sr25519Keyring::Alice + .pair() + .sign(opaque_bundle.sealed_header.pre_hash().as_ref()) + .into(); + ( + opaque_bundle.receipt().hash::(), + bundle_to_tx(opaque_bundle), + ) + }; + + // Wait for the fraud proof that target the bad ER + let wait_for_fraud_proof_fut = ferdie.wait_for_fraud_proof(move |fp| { + if let FraudProofVariant::InvalidBundles(proof) = &fp.proof { + if let InvalidBundleType::UndecodableTx(_) = proof.invalid_bundle_type { + assert!(proof.is_true_invalid_fraud_proof); + return true; + } + } + false + }); + + // Produce a consensus block that contains the `bad_submit_bundle_tx` and the bad receipt should + // be added to the consensus chain block tree + produce_block_with!( + ferdie.produce_block_with_slot_at( + slot, + ferdie.client.info().best_hash, + Some(vec![bad_submit_bundle_tx]) + ), + alice + ) + .await + .unwrap(); + assert!(ferdie.does_receipt_exist(bad_receipt_hash).unwrap()); + + let _ = wait_for_fraud_proof_fut.await; + + // Produce a consensus block that contains the fraud proof, the fraud proof wil be verified + // and executed, thus pruned the bad receipt from the block tree + ferdie.produce_blocks(1).await.unwrap(); + assert!(!ferdie.does_receipt_exist(bad_receipt_hash).unwrap()); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_false_invalid_bundles_undecodeable_tx_proof_creation_and_verification() { + let directory = TempDir::new().expect("Must be able to create temporary directory"); + + let mut builder = sc_cli::LoggerBuilder::new(""); + builder.with_colors(false); + let _ = builder.init(); + + let tokio_handle = tokio::runtime::Handle::current(); + + // Start Ferdie + let mut ferdie = MockConsensusNode::run( + tokio_handle.clone(), + Ferdie, + BasePath::new(directory.path().join("ferdie")), + ); + + // Run Alice (a evm domain authority node) + let mut alice = domain_test_service::DomainNodeBuilder::new( + tokio_handle.clone(), + BasePath::new(directory.path().join("alice")), + ) + .build_evm_node(Role::Authority, Alice, &mut ferdie) + .await; + + let bundle_to_tx = |opaque_bundle| { + subspace_test_runtime::UncheckedExtrinsic::new_unsigned( + pallet_domains::Call::submit_bundle { opaque_bundle }.into(), + ) + .into() + }; + + produce_blocks!(ferdie, alice, 5).await.unwrap(); + + alice + .construct_and_send_extrinsic(pallet_balances::Call::transfer_allow_death { + dest: Bob.to_account_id(), + value: 1, + }) + .await + .expect("Failed to send extrinsic"); + + // Produce a bundle that contains the previously sent extrinsic and record that bundle for later use + let (slot, target_bundle) = ferdie.produce_slot_and_wait_for_bundle_submission().await; + assert_eq!(target_bundle.extrinsics.len(), 1); + let extrinsics: Vec> = target_bundle + .extrinsics + .clone() + .into_iter() + .map(|ext| ext.encode()) + .collect(); + let bundle_extrinsic_root = + BlakeTwo256::ordered_trie_root(extrinsics.clone(), StateVersion::V1); + produce_block_with!(ferdie.produce_block_with_slot(slot), alice) + .await + .unwrap(); + + // produce another bundle that marks the previous valid extrinsic as invalid. + let (slot, mut opaque_bundle) = ferdie.produce_slot_and_wait_for_bundle_submission().await; + + let (bad_receipt_hash, bad_submit_bundle_tx) = { + let bad_receipt = &mut opaque_bundle.sealed_header.header.receipt; + // bad receipt marks this particular bundle as invalid even though bundle does not contain + // inherent extrinsic + bad_receipt.inboxed_bundles = vec![InboxedBundle::invalid( + InvalidBundleType::UndecodableTx(0), + bundle_extrinsic_root, + )]; + + opaque_bundle.sealed_header.signature = Sr25519Keyring::Alice + .pair() + .sign(opaque_bundle.sealed_header.pre_hash().as_ref()) + .into(); + ( + opaque_bundle.receipt().hash::(), + bundle_to_tx(opaque_bundle), + ) + }; + + // Wait for the fraud proof that target the bad ER + let wait_for_fraud_proof_fut = ferdie.wait_for_fraud_proof(move |fp| { + if let FraudProofVariant::InvalidBundles(proof) = &fp.proof { + if let InvalidBundleType::UndecodableTx(_) = proof.invalid_bundle_type { + assert!(!proof.is_true_invalid_fraud_proof); + return true; + } + } + false + }); + + // Produce a consensus block that contains the `bad_submit_bundle_tx` and the bad receipt should + // be added to the consensus chain block tree + produce_block_with!( + ferdie.produce_block_with_slot_at( + slot, + ferdie.client.info().best_hash, + Some(vec![bad_submit_bundle_tx]) + ), + alice + ) + .await + .unwrap(); + assert!(ferdie.does_receipt_exist(bad_receipt_hash).unwrap()); + + let _ = wait_for_fraud_proof_fut.await; + + // Produce a consensus block that contains the fraud proof, the fraud proof wil be verified + // and executed, thus pruned the bad receipt from the block tree + ferdie.produce_blocks(1).await.unwrap(); + assert!(!ferdie.does_receipt_exist(bad_receipt_hash).unwrap()); +} + #[tokio::test(flavor = "multi_thread")] async fn test_true_invalid_bundles_illegal_xdm_proof_creation_and_verification() { let directory = TempDir::new().expect("Must be able to create temporary directory"); @@ -3210,7 +3463,7 @@ async fn stale_and_in_future_bundle_should_be_rejected() { ); let (bundle_sender, _bundle_receiver) = sc_utils::mpsc::tracing_unbounded("domain_bundle_stream", 100); - DomainBundleProducer::new( + TestBundleProducer::new( EVM_DOMAIN_ID, ferdie.client.clone(), alice.client.clone(), @@ -4381,7 +4634,7 @@ async fn test_bad_receipt_chain() { ); let (bundle_sender, _bundle_receiver) = sc_utils::mpsc::tracing_unbounded("domain_bundle_stream", 100); - DomainBundleProducer::new( + TestBundleProducer::new( EVM_DOMAIN_ID, ferdie.client.clone(), alice.client.clone(), @@ -4535,12 +4788,6 @@ async fn test_bad_receipt_chain() { minimum_nominator_stake: Balance::MAX, nomination_tax: Default::default(), }, - signing_key_proof_of_ownership: OperatorSignature::from( - OperatorSigningKeyProofOfOwnershipData { - operator_owner: Sr25519Alice.to_account_id(), - } - .using_encoded(|e| Sr25519Keyring::Charlie.sign(e)), - ), }) .await .unwrap(); diff --git a/domains/pallets/messenger/src/lib.rs b/domains/pallets/messenger/src/lib.rs index 4c01990382..4fa397b3c1 100644 --- a/domains/pallets/messenger/src/lib.rs +++ b/domains/pallets/messenger/src/lib.rs @@ -47,9 +47,9 @@ use sp_runtime::traits::{Extrinsic, Hash}; use sp_runtime::DispatchError; pub(crate) mod verification_errors { - pub(crate) const INVALID_CHANNEL: u8 = 100; - pub(crate) const INVALID_NONCE: u8 = 101; - pub(crate) const NONCE_OVERFLOW: u8 = 102; + pub(crate) const INVALID_CHANNEL: u8 = 200; + pub(crate) const INVALID_NONCE: u8 = 201; + pub(crate) const NONCE_OVERFLOW: u8 = 202; } #[derive(Debug, Encode, Decode, Clone, Eq, PartialEq, TypeInfo, Copy)] @@ -139,7 +139,7 @@ mod pallet { MessageWeightTag, Payload, ProtocolMessageRequest, RequestResponse, VersionedPayload, }; use sp_messenger::{ - DomainRegistration, InherentError, InherentType, OnXDMRewards, StorageKeys, + ChannelNonce, DomainRegistration, InherentError, InherentType, OnXDMRewards, StorageKeys, INHERENT_IDENTIFIER, }; use sp_runtime::traits::Zero; @@ -1350,6 +1350,16 @@ mod pallet { UpdatedChannels::::get() } + pub fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Channels::::get(chain_id, channel_id).map(|channel| { + let last_inbox_nonce = channel.next_inbox_nonce.checked_sub(U256::one()); + ChannelNonce { + relay_msg_nonce: last_inbox_nonce, + relay_response_msg_nonce: channel.latest_response_received_message_nonce, + } + }) + } + pub fn pre_dispatch_with_trusted_mmr_proof( call: &Call, ) -> Result<(), TransactionValidityError> { diff --git a/domains/primitives/messenger/src/lib.rs b/domains/primitives/messenger/src/lib.rs index 8f26eab00b..aab7688add 100644 --- a/domains/primitives/messenger/src/lib.rs +++ b/domains/primitives/messenger/src/lib.rs @@ -23,7 +23,7 @@ pub mod messages; #[cfg(not(feature = "std"))] extern crate alloc; -use crate::messages::MessageKey; +use crate::messages::{MessageKey, Nonce}; #[cfg(not(feature = "std"))] use alloc::collections::BTreeSet; #[cfg(not(feature = "std"))] @@ -33,6 +33,7 @@ use codec::{Decode, Encode}; use frame_support::inherent::InherentData; use frame_support::inherent::{InherentIdentifier, IsFatalError}; use messages::{BlockMessagesWithStorageKey, ChannelId, CrossDomainMessage, MessageId}; +use scale_info::TypeInfo; use sp_domains::{ChainId, DomainAllowlistUpdates, DomainId}; use sp_subspace_mmr::ConsensusChainMmrLeafProof; #[cfg(feature = "std")] @@ -159,6 +160,32 @@ impl sp_inherents::InherentDataProvider for InherentDataProvider { } } +/// Represent a union of XDM types with their message ID +#[derive(Debug, Encode, Decode, TypeInfo, Copy, Clone)] +pub enum XdmId { + RelayMessage(MessageKey), + RelayResponseMessage(MessageKey), +} + +impl XdmId { + pub fn get_chain_id_and_channel_id(&self) -> (ChainId, ChannelId) { + match self { + XdmId::RelayMessage(key) => (key.0, key.1), + XdmId::RelayResponseMessage(key) => (key.0, key.1), + } + } +} + +#[derive(Debug, Encode, Decode, TypeInfo, Copy, Clone)] +pub struct ChannelNonce { + /// Last processed relay message nonce. + /// Could be None if there is no relay message yet. + pub relay_msg_nonce: Option, + /// Last processed relay response message nonce. + /// Could be None if there is no first response yet + pub relay_response_msg_nonce: Option, +} + sp_api::decl_runtime_apis! { /// Api useful for relayers to fetch messages and submit transactions. pub trait RelayerApi @@ -195,6 +222,7 @@ sp_api::decl_runtime_apis! { } /// Api to provide XDM extraction from Runtime Calls. + #[api_version(2)] pub trait MessengerApi where CNumber: Encode + Decode, @@ -220,5 +248,11 @@ sp_api::decl_runtime_apis! { /// Returns any domain's chains allowlist updates on consensus chain. fn domain_chains_allowlist_update(domain_id: DomainId) -> Option; + + /// Returns XDM message ID + fn xdm_id(ext: &Block::Extrinsic) -> Option; + + /// Get Channel nonce for given chain and channel id. + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option; } } diff --git a/domains/runtime/auto-id/src/lib.rs b/domains/runtime/auto-id/src/lib.rs index 61ddda8c3a..ade1200f0c 100644 --- a/domains/runtime/auto-id/src/lib.rs +++ b/domains/runtime/auto-id/src/lib.rs @@ -45,6 +45,7 @@ use sp_messenger::endpoint::{Endpoint, EndpointHandler as EndpointHandlerT, Endp use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, CrossDomainMessage, FeeModel, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_messenger_host_functions::{get_storage_key, StorageKeyRequest}; use sp_mmr_primitives::EncodableOpaqueLeaf; use sp_runtime::generic::Era; @@ -909,6 +910,22 @@ impl_runtime_apis! { // not valid call on domains None } + + fn xdm_id(ext: &::Extrinsic) -> Option { + match &ext.function { + RuntimeCall::Messenger(pallet_messenger::Call::relay_message { msg })=> { + Some(XdmId::RelayMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + RuntimeCall::Messenger(pallet_messenger::Call::relay_message_response { msg }) => { + Some(XdmId::RelayResponseMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + _ => None, + } + } + + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Messenger::channel_nonce(chain_id, channel_id) + } } impl sp_messenger::RelayerApi for Runtime { diff --git a/domains/runtime/evm/src/lib.rs b/domains/runtime/evm/src/lib.rs index 172e0c3b01..965573b918 100644 --- a/domains/runtime/evm/src/lib.rs +++ b/domains/runtime/evm/src/lib.rs @@ -58,6 +58,7 @@ use sp_messenger::endpoint::{Endpoint, EndpointHandler as EndpointHandlerT, Endp use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, CrossDomainMessage, FeeModel, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_messenger_host_functions::{get_storage_key, StorageKeyRequest}; use sp_mmr_primitives::EncodableOpaqueLeaf; use sp_runtime::generic::Era; @@ -1320,6 +1321,22 @@ impl_runtime_apis! { // not valid call on domains None } + + fn xdm_id(ext: &::Extrinsic) -> Option { + match &ext.0.function { + RuntimeCall::Messenger(pallet_messenger::Call::relay_message { msg })=> { + Some(XdmId::RelayMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + RuntimeCall::Messenger(pallet_messenger::Call::relay_message_response { msg }) => { + Some(XdmId::RelayResponseMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + _ => None, + } + } + + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Messenger::channel_nonce(chain_id, channel_id) + } } impl sp_messenger::RelayerApi for Runtime { diff --git a/domains/service/src/domain.rs b/domains/service/src/domain.rs index dea7b02b35..8ca2101b9c 100644 --- a/domains/service/src/domain.rs +++ b/domains/service/src/domain.rs @@ -581,25 +581,17 @@ where } // Start cross domain message listener for domain - let domain_listener = cross_domain_message_gossip::start_cross_chain_message_listener::< - _, - _, - _, - _, - _, - Block, - _, - _, - >( - ChainId::Domain(domain_id), - consensus_client.clone(), - client.clone(), - params.transaction_pool.clone(), - consensus_network, - domain_message_receiver, - code_executor.clone(), - domain_sync_oracle, - ); + let domain_listener = + cross_domain_message_gossip::start_cross_chain_message_listener::<_, _, _, _, _, _, _>( + ChainId::Domain(domain_id), + consensus_client.clone(), + client.clone(), + params.transaction_pool.clone(), + consensus_network, + domain_message_receiver, + code_executor.clone(), + domain_sync_oracle, + ); spawn_essential.spawn_essential_blocking( "domain-message-listener", diff --git a/domains/test/runtime/auto-id/src/lib.rs b/domains/test/runtime/auto-id/src/lib.rs index 1bfab38c27..0f77507876 100644 --- a/domains/test/runtime/auto-id/src/lib.rs +++ b/domains/test/runtime/auto-id/src/lib.rs @@ -45,6 +45,7 @@ use sp_messenger::endpoint::{Endpoint, EndpointHandler as EndpointHandlerT, Endp use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, CrossDomainMessage, FeeModel, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_messenger_host_functions::{get_storage_key, StorageKeyRequest}; use sp_mmr_primitives::EncodableOpaqueLeaf; use sp_runtime::generic::Era; @@ -900,6 +901,22 @@ impl_runtime_apis! { // not valid call on domains None } + + fn xdm_id(ext: &::Extrinsic) -> Option { + match &ext.function { + RuntimeCall::Messenger(pallet_messenger::Call::relay_message { msg })=> { + Some(XdmId::RelayMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + RuntimeCall::Messenger(pallet_messenger::Call::relay_message_response { msg }) => { + Some(XdmId::RelayResponseMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + _ => None, + } + } + + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Messenger::channel_nonce(chain_id, channel_id) + } } impl sp_messenger::RelayerApi for Runtime { diff --git a/domains/test/runtime/evm/src/lib.rs b/domains/test/runtime/evm/src/lib.rs index b530910fdb..26dce324a9 100644 --- a/domains/test/runtime/evm/src/lib.rs +++ b/domains/test/runtime/evm/src/lib.rs @@ -58,6 +58,7 @@ use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, ChannelId, CrossDomainMessage, FeeModel, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_messenger_host_functions::{get_storage_key, StorageKeyRequest}; use sp_mmr_primitives::EncodableOpaqueLeaf; use sp_runtime::generic::Era; @@ -1276,6 +1277,22 @@ impl_runtime_apis! { // not valid call on domains None } + + fn xdm_id(ext: &::Extrinsic) -> Option { + match &ext.0.function { + RuntimeCall::Messenger(pallet_messenger::Call::relay_message { msg })=> { + Some(XdmId::RelayMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + RuntimeCall::Messenger(pallet_messenger::Call::relay_message_response { msg }) => { + Some(XdmId::RelayResponseMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + _ => None, + } + } + + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Messenger::channel_nonce(chain_id, channel_id) + } } impl sp_messenger::RelayerApi for Runtime { diff --git a/shared/subspace-data-retrieval/Cargo.toml b/shared/subspace-data-retrieval/Cargo.toml index c7f6fe93d8..5f53ed11c2 100644 --- a/shared/subspace-data-retrieval/Cargo.toml +++ b/shared/subspace-data-retrieval/Cargo.toml @@ -12,7 +12,7 @@ include = [ ] [dependencies] -async-lock = "3.4.0" +anyhow = "1.0.89" async-trait = "0.1.83" futures = "0.3.31" parity-scale-codec = { version = "3.6.12", features = ["derive"] } @@ -28,5 +28,4 @@ tracing = "0.1.40" [features] parallel = [ "subspace-archiving/parallel", - "subspace-core-primitives/parallel", ] diff --git a/shared/subspace-data-retrieval/src/lib.rs b/shared/subspace-data-retrieval/src/lib.rs index 534f2d2adf..a14159b2cb 100644 --- a/shared/subspace-data-retrieval/src/lib.rs +++ b/shared/subspace-data-retrieval/src/lib.rs @@ -15,7 +15,9 @@ //! Fetching data from the archived history of the Subspace Distributed Storage Network. +#![feature(exact_size_is_empty)] + pub mod object_fetcher; pub mod piece_fetcher; pub mod piece_getter; -pub mod segment_fetcher; +pub mod segment_downloading; diff --git a/shared/subspace-data-retrieval/src/object_fetcher.rs b/shared/subspace-data-retrieval/src/object_fetcher.rs index 59436181ee..b89ad8c007 100644 --- a/shared/subspace-data-retrieval/src/object_fetcher.rs +++ b/shared/subspace-data-retrieval/src/object_fetcher.rs @@ -16,8 +16,8 @@ //! Fetching objects stored in the archived history of Subspace Network. use crate::piece_fetcher::download_pieces; -use crate::piece_getter::{BoxError, ObjectPieceGetter}; -use crate::segment_fetcher::{download_segment, SegmentGetterError}; +use crate::piece_getter::PieceGetter; +use crate::segment_downloading::{download_segment, SegmentDownloadingError}; use parity_scale_codec::{Compact, CompactLen, Decode, Encode}; use std::sync::Arc; use subspace_archiving::archiver::{Segment, SegmentItem}; @@ -116,14 +116,14 @@ pub enum Error { #[error("Getting segment failed: {source:?}")] SegmentGetter { #[from] - source: SegmentGetterError, + source: SegmentDownloadingError, }, /// Piece getter error #[error("Getting piece caused an error: {source:?}")] PieceGetterError { #[from] - source: BoxError, + source: anyhow::Error, }, /// Piece getter couldn't find the piece @@ -132,9 +132,12 @@ pub enum Error { } /// Object fetcher for the Subspace DSN. -pub struct ObjectFetcher { +pub struct ObjectFetcher +where + PG: PieceGetter + Send + Sync, +{ /// The piece getter used to fetch pieces. - piece_getter: Arc, + piece_getter: Arc, /// The erasure coding configuration of those pieces. erasure_coding: ErasureCoding, @@ -143,21 +146,21 @@ pub struct ObjectFetcher { max_object_len: usize, } -impl ObjectFetcher { +impl ObjectFetcher +where + PG: PieceGetter + Send + Sync, +{ /// Create a new object fetcher with the given configuration. /// /// `max_object_len` is the amount of data bytes we'll read for a single object before giving /// up and returning an error, or `None` for no limit (`usize::MAX`). - pub fn new( - piece_getter: PG, + pub fn new( + piece_getter: Arc, erasure_coding: ErasureCoding, max_object_len: Option, - ) -> Self - where - PG: ObjectPieceGetter + Send + Sync + 'static, - { + ) -> Self { Self { - piece_getter: Arc::new(piece_getter), + piece_getter, erasure_coding, max_object_len: max_object_len.unwrap_or(usize::MAX), } @@ -356,7 +359,7 @@ impl ObjectFetcher { .filter(|i| i.is_source()) .take(remaining_piece_count) .collect::>(); - self.read_pieces(&remaining_piece_indexes) + self.read_pieces(remaining_piece_indexes) .await? .into_iter() .for_each(|piece| { @@ -554,7 +557,7 @@ impl ObjectFetcher { } /// Concurrently read multiple pieces, and return them in the supplied order. - async fn read_pieces(&self, piece_indexes: &[PieceIndex]) -> Result, Error> { + async fn read_pieces(&self, piece_indexes: Vec) -> Result, Error> { download_pieces(piece_indexes, &self.piece_getter) .await .map_err(|source| Error::PieceGetterError { source }) diff --git a/shared/subspace-data-retrieval/src/piece_fetcher.rs b/shared/subspace-data-retrieval/src/piece_fetcher.rs index 14f6e823ae..49f13a2ff4 100644 --- a/shared/subspace-data-retrieval/src/piece_fetcher.rs +++ b/shared/subspace-data-retrieval/src/piece_fetcher.rs @@ -16,9 +16,8 @@ //! Fetching pieces of the archived history of Subspace Network. use crate::object_fetcher::Error; -use crate::piece_getter::{BoxError, ObjectPieceGetter}; -use futures::stream::FuturesOrdered; -use futures::TryStreamExt; +use crate::piece_getter::PieceGetter; +use futures::StreamExt; use subspace_core_primitives::pieces::{Piece, PieceIndex}; use tracing::{debug, trace}; @@ -29,11 +28,11 @@ use tracing::{debug, trace}; // This code was copied and modified from subspace_service::sync_from_dsn::download_and_reconstruct_blocks(): // pub async fn download_pieces( - piece_indexes: &[PieceIndex], + piece_indexes: Vec, piece_getter: &PG, -) -> Result, BoxError> +) -> anyhow::Result> where - PG: ObjectPieceGetter, + PG: PieceGetter, { debug!( count = piece_indexes.len(), @@ -42,40 +41,23 @@ where ); // TODO: - // - consider using a semaphore to limit the number of concurrent requests, like - // download_segment_pieces() - // - if we're close to the number of pieces in a segment, use segment downloading and piece + // - if we're close to the number of pieces in a segment, or we can't find a piece, use segment downloading and piece // reconstruction instead // Currently most objects are limited to 4 pieces, so this isn't needed yet. - let received_pieces = piece_indexes - .iter() - .map(|piece_index| async move { - match piece_getter.get_piece(*piece_index).await { - Ok(Some(piece)) => { - trace!(?piece_index, "Piece request succeeded",); - Ok(piece) - } - Ok(None) => { - trace!(?piece_index, "Piece not found"); - Err(Error::PieceNotFound { - piece_index: *piece_index, - } - .into()) - } - Err(error) => { - trace!( - %error, - ?piece_index, - "Piece request caused an error", - ); - Err(error) - } - } - }) - .collect::>(); + let mut received_pieces = piece_getter.get_pieces(piece_indexes.clone()).await?; - // We want exact pieces, so any errors are fatal. - let received_pieces: Vec = received_pieces.try_collect().await?; + let mut pieces = Vec::new(); + pieces.resize(piece_indexes.len(), Piece::default()); + + while let Some((piece_index, maybe_piece)) = received_pieces.next().await { + // We want exact pieces, so any errors are fatal. + let piece = maybe_piece?.ok_or(Error::PieceNotFound { piece_index })?; + let index_position = piece_indexes + .iter() + .position(|i| *i == piece_index) + .expect("get_pieces only returns indexes it was supplied; qed"); + pieces[index_position] = piece; + } trace!( count = piece_indexes.len(), @@ -83,5 +65,5 @@ where "Successfully retrieved exact pieces" ); - Ok(received_pieces) + Ok(pieces) } diff --git a/shared/subspace-data-retrieval/src/piece_getter.rs b/shared/subspace-data-retrieval/src/piece_getter.rs index 70f470de70..e23fee44ec 100644 --- a/shared/subspace-data-retrieval/src/piece_getter.rs +++ b/shared/subspace-data-retrieval/src/piece_getter.rs @@ -16,39 +16,59 @@ //! Getting object pieces from the Subspace Distributed Storage Network, or various caches. use async_trait::async_trait; +use futures::{stream, Stream, StreamExt}; use std::fmt; +use std::future::Future; use std::sync::Arc; use subspace_archiving::archiver::NewArchivedSegment; use subspace_core_primitives::pieces::{Piece, PieceIndex}; -/// A type-erased error -pub type BoxError = Box; - -/// Trait representing a way to get pieces from the DSN for object reconstruction -// TODO: make ObjectPieceGetter impls retry before failing, if that is useful +/// Trait representing a way to get pieces #[async_trait] -pub trait ObjectPieceGetter: fmt::Debug { +pub trait PieceGetter: fmt::Debug { /// Get piece by index. /// /// Returns `Ok(None)` if the piece is not found. /// Returns `Err(_)` if trying to get the piece caused an error. - async fn get_piece(&self, piece_index: PieceIndex) -> Result, BoxError>; + async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result>; + + /// Get pieces with provided indices. + /// + /// The number of elements in the returned stream is the same as the number of unique + /// `piece_indices`. + async fn get_pieces<'a>( + &'a self, + piece_indices: Vec, + ) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, + >; } #[async_trait] -impl ObjectPieceGetter for Arc +impl PieceGetter for Arc where - T: ObjectPieceGetter + Send + Sync + ?Sized, + T: PieceGetter + Send + Sync + ?Sized, { - async fn get_piece(&self, piece_index: PieceIndex) -> Result, BoxError> { + #[inline] + async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { self.as_ref().get_piece(piece_index).await } + + #[inline] + async fn get_pieces<'a>( + &'a self, + piece_indices: Vec, + ) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, + > { + self.as_ref().get_pieces(piece_indices).await + } } // Convenience methods, mainly used in testing #[async_trait] -impl ObjectPieceGetter for NewArchivedSegment { - async fn get_piece(&self, piece_index: PieceIndex) -> Result, BoxError> { +impl PieceGetter for NewArchivedSegment { + async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { if piece_index.segment_index() == self.segment_header.segment_index() { return Ok(Some( self.pieces @@ -60,15 +80,60 @@ impl ObjectPieceGetter for NewArchivedSegment { Ok(None) } + + async fn get_pieces<'a>( + &'a self, + piece_indices: Vec, + ) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, + > { + get_pieces_individually(|piece_index| self.get_piece(piece_index), piece_indices) + } } #[async_trait] -impl ObjectPieceGetter for (PieceIndex, Piece) { - async fn get_piece(&self, piece_index: PieceIndex) -> Result, BoxError> { +impl PieceGetter for (PieceIndex, Piece) { + async fn get_piece(&self, piece_index: PieceIndex) -> anyhow::Result> { if self.0 == piece_index { return Ok(Some(self.1.clone())); } Ok(None) } + + async fn get_pieces<'a>( + &'a self, + piece_indices: Vec, + ) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, + > { + get_pieces_individually(|piece_index| self.get_piece(piece_index), piece_indices) + } +} + +/// A default implementation which gets each piece individually, using the `get_piece` async +/// function. +/// +/// This is mainly used for testing, most production implementations can fetch multiple pieces more +/// efficiently. +#[expect(clippy::type_complexity, reason = "type matches trait signature")] +pub fn get_pieces_individually<'a, PieceIndices, Func, Fut>( + // TODO: replace with AsyncFn(PieceIndex) -> anyhow::Result> once it stabilises + // https://github.com/rust-lang/rust/issues/62290 + get_piece: Func, + piece_indices: PieceIndices, +) -> anyhow::Result< + Box>)> + Send + Unpin + 'a>, +> +where + PieceIndices: IntoIterator + Send + 'a, + Func: Fn(PieceIndex) -> Fut + Clone + Send + 'a, + Fut: Future>> + Send + Unpin + 'a, +{ + Ok(Box::new(Box::pin(stream::iter(piece_indices).then( + move |piece_index| { + let get_piece = get_piece.clone(); + async move { (piece_index, get_piece(piece_index).await) } + }, + )))) } diff --git a/shared/subspace-data-retrieval/src/segment_downloading.rs b/shared/subspace-data-retrieval/src/segment_downloading.rs new file mode 100644 index 0000000000..f84fe86ec4 --- /dev/null +++ b/shared/subspace-data-retrieval/src/segment_downloading.rs @@ -0,0 +1,141 @@ +// Copyright (C) 2024 Subspace Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Fetching segments of the archived history of Subspace Network. + +use crate::piece_getter::PieceGetter; +use futures::StreamExt; +use subspace_archiving::archiver::Segment; +use subspace_archiving::reconstructor::{Reconstructor, ReconstructorError}; +use subspace_core_primitives::pieces::Piece; +use subspace_core_primitives::segments::{ + ArchivedHistorySegment, RecordedHistorySegment, SegmentIndex, +}; +use subspace_erasure_coding::ErasureCoding; +use tokio::task::spawn_blocking; +use tracing::debug; + +/// Segment getter errors. +#[derive(Debug, thiserror::Error)] +pub enum SegmentDownloadingError { + /// Not enough pieces + #[error("Not enough ({downloaded_pieces}) pieces")] + NotEnoughPieces { + /// Number of pieces that were downloaded + downloaded_pieces: usize, + }, + + /// Piece getter error + #[error("Piece getter error: {source}")] + PieceGetterError { + #[from] + source: anyhow::Error, + }, + + /// Segment reconstruction error + #[error("Segment reconstruction error: {source}")] + SegmentReconstruction { + #[from] + source: ReconstructorError, + }, + + /// Segment decoding error + #[error("Segment data decoding error: {source}")] + SegmentDecoding { + #[from] + source: parity_scale_codec::Error, + }, +} + +/// Concurrently downloads the pieces for `segment_index`, and reconstructs the segment. +pub async fn download_segment( + segment_index: SegmentIndex, + piece_getter: &PG, + erasure_coding: ErasureCoding, +) -> Result +where + PG: PieceGetter, +{ + let reconstructor = Reconstructor::new(erasure_coding); + + let segment_pieces = download_segment_pieces(segment_index, piece_getter).await?; + + let segment = spawn_blocking(move || reconstructor.reconstruct_segment(&segment_pieces)) + .await + .expect("Panic if blocking task panicked")?; + + Ok(segment) +} + +/// Downloads pieces of the segment such that segment can be reconstructed afterward. +/// +/// Prefers source pieces if available, on error returns number of downloaded pieces +pub async fn download_segment_pieces( + segment_index: SegmentIndex, + piece_getter: &PG, +) -> Result>, SegmentDownloadingError> +where + PG: PieceGetter, +{ + let required_pieces_number = RecordedHistorySegment::NUM_RAW_RECORDS; + let mut downloaded_pieces = 0_usize; + + let mut segment_pieces = vec![None::; ArchivedHistorySegment::NUM_PIECES]; + + let mut pieces_iter = segment_index + .segment_piece_indexes_source_first() + .into_iter(); + + // Download in batches until we get enough or exhaust available pieces + while !pieces_iter.is_empty() && downloaded_pieces != required_pieces_number { + let piece_indices = pieces_iter + .by_ref() + .take(required_pieces_number - downloaded_pieces) + .collect(); + + let mut received_segment_pieces = piece_getter.get_pieces(piece_indices).await?; + + while let Some((piece_index, result)) = received_segment_pieces.next().await { + match result { + Ok(Some(piece)) => { + downloaded_pieces += 1; + segment_pieces + .get_mut(piece_index.position() as usize) + .expect("Piece position is by definition within segment; qed") + .replace(piece); + } + Ok(None) => { + debug!(%piece_index, "Piece was not found"); + } + Err(error) => { + debug!(%error, %piece_index, "Failed to get piece"); + } + } + } + } + + if downloaded_pieces < required_pieces_number { + debug!( + %segment_index, + %downloaded_pieces, + %required_pieces_number, + "Failed to retrieve pieces for segment" + ); + + return Err(SegmentDownloadingError::NotEnoughPieces { downloaded_pieces }); + } + + Ok(segment_pieces) +} diff --git a/shared/subspace-data-retrieval/src/segment_fetcher.rs b/shared/subspace-data-retrieval/src/segment_fetcher.rs deleted file mode 100644 index 686f5e7290..0000000000 --- a/shared/subspace-data-retrieval/src/segment_fetcher.rs +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright (C) 2024 Subspace Labs, Inc. -// SPDX-License-Identifier: Apache-2.0 - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Fetching segments of the archived history of Subspace Network. - -use crate::piece_getter::ObjectPieceGetter; -use async_lock::Semaphore; -use futures::stream::FuturesUnordered; -use futures::StreamExt; -use subspace_archiving::archiver::Segment; -use subspace_archiving::reconstructor::{Reconstructor, ReconstructorError}; -use subspace_core_primitives::pieces::Piece; -use subspace_core_primitives::segments::{ - ArchivedHistorySegment, RecordedHistorySegment, SegmentIndex, -}; -use subspace_erasure_coding::ErasureCoding; -use tokio::task::spawn_blocking; -use tracing::{debug, trace}; - -/// Segment getter errors. -#[derive(Debug, thiserror::Error)] -pub enum SegmentGetterError { - /// Piece getter error - #[error("Failed to get enough segment pieces")] - PieceGetter { segment_index: SegmentIndex }, - - /// Segment reconstruction error - #[error("Segment reconstruction error: {source:?}")] - SegmentReconstruction { - #[from] - source: ReconstructorError, - }, - - /// Segment decoding error - #[error("Segment data decoding error: {source:?}")] - SegmentDecoding { - #[from] - source: parity_scale_codec::Error, - }, -} - -/// Concurrently downloads the pieces for `segment_index`, and reconstructs the segment. -pub async fn download_segment( - segment_index: SegmentIndex, - piece_getter: &PG, - erasure_coding: ErasureCoding, -) -> Result -where - PG: ObjectPieceGetter, -{ - let reconstructor = Reconstructor::new(erasure_coding); - - let segment_pieces = download_segment_pieces(segment_index, piece_getter).await?; - - let segment = spawn_blocking(move || reconstructor.reconstruct_segment(&segment_pieces)) - .await - .expect("Panic if blocking task panicked")?; - - Ok(segment) -} - -/// Concurrently downloads the pieces for `segment_index`. -// This code was copied and modified from subspace_service::sync_from_dsn::download_and_reconstruct_blocks(): -// -// -// TODO: pass a lower concurrency limit into this function, to avoid overwhelming residential routers or slow connections -pub async fn download_segment_pieces( - segment_index: SegmentIndex, - piece_getter: &PG, -) -> Result>, SegmentGetterError> -where - PG: ObjectPieceGetter, -{ - debug!(%segment_index, "Retrieving pieces of the segment"); - - let semaphore = &Semaphore::new(RecordedHistorySegment::NUM_RAW_RECORDS); - - let mut received_segment_pieces = segment_index - .segment_piece_indexes_source_first() - .into_iter() - .map(|piece_index| { - // Source pieces will acquire permit here right away - let maybe_permit = semaphore.try_acquire(); - - async move { - let permit = match maybe_permit { - Some(permit) => permit, - None => { - // Other pieces will acquire permit here instead - semaphore.acquire().await - } - }; - let piece = match piece_getter.get_piece(piece_index).await { - Ok(Some(piece)) => piece, - Ok(None) => { - trace!(?piece_index, "Piece not found"); - return None; - } - Err(error) => { - trace!( - %error, - ?piece_index, - "Piece request failed", - ); - return None; - } - }; - - trace!(?piece_index, "Piece request succeeded"); - - // Piece was received successfully, "remove" this slot from semaphore - permit.forget(); - Some((piece_index, piece)) - } - }) - .collect::>(); - - let mut segment_pieces = vec![None::; ArchivedHistorySegment::NUM_PIECES]; - let mut pieces_received = 0; - - while let Some(maybe_piece) = received_segment_pieces.next().await { - let Some((piece_index, piece)) = maybe_piece else { - continue; - }; - - segment_pieces - .get_mut(piece_index.position() as usize) - .expect("Piece position is by definition within segment; qed") - .replace(piece); - - pieces_received += 1; - - if pieces_received >= RecordedHistorySegment::NUM_RAW_RECORDS { - trace!(%segment_index, "Received half of the segment."); - break; - } - } - - if pieces_received < RecordedHistorySegment::NUM_RAW_RECORDS { - debug!( - %segment_index, - pieces_received, - pieces_needed = RecordedHistorySegment::NUM_RAW_RECORDS, - "Failed to get half of the pieces in the segment" - ); - - Err(SegmentGetterError::PieceGetter { segment_index }) - } else { - trace!( - %segment_index, - pieces_received, - pieces_needed = RecordedHistorySegment::NUM_RAW_RECORDS, - "Successfully retrieved enough pieces of the segment" - ); - - Ok(segment_pieces) - } -} diff --git a/shared/subspace-logging/Cargo.toml b/shared/subspace-logging/Cargo.toml new file mode 100644 index 0000000000..9345b5fc44 --- /dev/null +++ b/shared/subspace-logging/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "subspace-logging" +description = "Ensure all logging uilities are centralized for the whole project" +license = "Apache-2.0" +version = "0.0.1" +authors = ["Nazar Mokrynskyi "] +edition = "2021" +include = [ + "/src", + "/Cargo.toml", +] + +[dependencies] +tracing = "0.1.40" +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +supports-color = "3.0.1" diff --git a/shared/subspace-logging/src/lib.rs b/shared/subspace-logging/src/lib.rs new file mode 100644 index 0000000000..4f6e7f87e4 --- /dev/null +++ b/shared/subspace-logging/src/lib.rs @@ -0,0 +1,23 @@ +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::{fmt, EnvFilter, Layer}; + +pub fn init_logger() { + // TODO: Workaround for https://github.com/tokio-rs/tracing/issues/2214, also on + // Windows terminal doesn't support the same colors as bash does + let enable_color = if cfg!(windows) { + false + } else { + supports_color::on(supports_color::Stream::Stderr).is_some() + }; + tracing_subscriber::registry() + .with( + fmt::layer().with_ansi(enable_color).with_filter( + EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .from_env_lossy(), + ), + ) + .init(); +} diff --git a/test/subspace-test-client/src/lib.rs b/test/subspace-test-client/src/lib.rs index cc5cb1c1f4..0cf710b79e 100644 --- a/test/subspace-test-client/src/lib.rs +++ b/test/subspace-test-client/src/lib.rs @@ -239,7 +239,7 @@ where let plotted_sector = plot_sector(PlotSectorOptions { public_key: &public_key, sector_index, - piece_getter: &archived_segment.pieces, + piece_getter: &archived_segment, farmer_protocol_info, kzg: &kzg, erasure_coding, diff --git a/test/subspace-test-runtime/src/lib.rs b/test/subspace-test-runtime/src/lib.rs index 99d4f1dc81..383e0e9d17 100644 --- a/test/subspace-test-runtime/src/lib.rs +++ b/test/subspace-test-runtime/src/lib.rs @@ -73,6 +73,7 @@ use sp_messenger::messages::{ BlockMessagesWithStorageKey, ChainId, ChannelId, CrossDomainMessage, FeeModel, MessageId, MessageKey, }; +use sp_messenger::{ChannelNonce, XdmId}; use sp_messenger_host_functions::{get_storage_key, StorageKeyRequest}; use sp_mmr_primitives::EncodableOpaqueLeaf; use sp_runtime::traits::{ @@ -1386,10 +1387,6 @@ impl_runtime_apis! { }) } - fn operator_id_by_signing_key(signing_key: OperatorPublicKey) -> Option { - Domains::operator_signing_key(signing_key) - } - fn receipt_hash(domain_id: DomainId, domain_number: DomainNumber) -> Option { Domains::receipt_hash(domain_id, domain_number) } @@ -1507,6 +1504,22 @@ impl_runtime_apis! { fn domain_chains_allowlist_update(domain_id: DomainId) -> Option{ Messenger::domain_chains_allowlist_update(domain_id) } + + fn xdm_id(ext: &::Extrinsic) -> Option { + match &ext.function { + RuntimeCall::Messenger(pallet_messenger::Call::relay_message { msg })=> { + Some(XdmId::RelayMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + RuntimeCall::Messenger(pallet_messenger::Call::relay_message_response { msg }) => { + Some(XdmId::RelayResponseMessage((msg.src_chain_id, msg.channel_id, msg.nonce))) + } + _ => None, + } + } + + fn channel_nonce(chain_id: ChainId, channel_id: ChannelId) -> Option { + Messenger::channel_nonce(chain_id, channel_id) + } } impl sp_messenger::RelayerApi::Hash> for Runtime { diff --git a/test/subspace-test-service/src/lib.rs b/test/subspace-test-service/src/lib.rs index 5ab623bc57..8e0efe006a 100644 --- a/test/subspace-test-service/src/lib.rs +++ b/test/subspace-test-service/src/lib.rs @@ -518,25 +518,17 @@ impl MockConsensusNode { tracing_unbounded("consensus_message_channel", 100); // Start cross domain message listener for Consensus chain to receive messages from domains in the network - let consensus_listener = cross_domain_message_gossip::start_cross_chain_message_listener::< - _, - _, - _, - _, - _, - DomainBlock, - _, - _, - >( - ChainId::Consensus, - client.clone(), - client.clone(), - transaction_pool.clone(), - network_service.clone(), - consensus_msg_receiver, - domain_executor, - sync_service.clone(), - ); + let consensus_listener = + cross_domain_message_gossip::start_cross_chain_message_listener::<_, _, _, _, _, _, _>( + ChainId::Consensus, + client.clone(), + client.clone(), + transaction_pool.clone(), + network_service.clone(), + consensus_msg_receiver, + domain_executor, + sync_service.clone(), + ); task_manager .spawn_essential_handle()