From 83f47bf3f4edbab4fef463c8ee0db59dcdaa4b75 Mon Sep 17 00:00:00 2001 From: bochaco Date: Thu, 11 Apr 2024 11:47:04 -0300 Subject: [PATCH] draft: minor fixes and adding unit tests for the algorithm formulas --- sn_networking/src/lib.rs | 8 ++---- sn_networking/src/sybil.rs | 51 +++++++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/sn_networking/src/lib.rs b/sn_networking/src/lib.rs index 60fe95a171..947eb2482b 100644 --- a/sn_networking/src/lib.rs +++ b/sn_networking/src/lib.rs @@ -825,12 +825,8 @@ impl Network { match self.get_closest_peers(&random_addr, true).await { Ok(closest_peers) => { - if check_for_sybil_attack( - &closest_peers, - random_addr.as_kbucket_key(), - &BTreeMap::default(), - ) - .await + if check_for_sybil_attack(&closest_peers, random_addr.as_kbucket_key(), &vec![]) + .await { info!(">>> Sybil attack detected around addr: {random_addr}"); } diff --git a/sn_networking/src/sybil.rs b/sn_networking/src/sybil.rs index 7f3ab9ad75..785a658059 100644 --- a/sn_networking/src/sybil.rs +++ b/sn_networking/src/sybil.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use itertools::Itertools; use libp2p::{ @@ -25,7 +25,7 @@ const ITERATIONS_FOR_NET_SIZE_ESTIMATION: usize = 50; // of its K_VALUE closest peers, sorted by increasing distance. This order // is a prerequisite for the functions this container is used by, // i.e. their result is dependant on the correct ordering of these values. -pub(super) type RandomKeysAndClosestPeerIds = BTreeMap>, Vec>; +pub(super) type RandomKeysAndClosestPeerIds = Vec<(KBucketKey>, Vec)>; // Given the set of closest K peers ids to the passed content address, return 'true' // if there is probabilistically a sybil attack around that CID address. @@ -48,6 +48,7 @@ pub(super) async fn check_for_sybil_attack( let q = |x| cpls_freqs.get(&x).cloned().unwrap_or(0) as f64 / k as f64; let n = get_net_size_estimate(random_keys); + info!(">>> NET SIZE ESTIMATE: {n}"); let model_dist = compute_model_distribution(n); let p = |x| model_dist.get(&x).cloned().unwrap_or(0f64) / k as f64; @@ -79,10 +80,11 @@ fn average_between_keys_and_i_th_closest_peer( fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize { let mut best_n_found = 0; let mut smallest_value_found = f64::MAX; + // FIXME: this iteration needs to be smarter for n in 0..ITERATIONS_FOR_NET_SIZE_ESTIMATION { - let value = (1..=K_VALUE.get()).fold(0f64, |acc, i| { + let value = (0..K_VALUE.get()).fold(0f64, |acc, i| { let d_i = average_between_keys_and_i_th_closest_peer(i, random_keys); - let dist: f64 = d_i - ((2f64.pow(256) * i as f64) / (n + 1) as f64); + let dist: f64 = d_i - ((2f64.pow(256) * (i + 1) as f64) / (n + 1) as f64); acc + dist.pow(2) }); if value < smallest_value_found { @@ -96,7 +98,7 @@ fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize { // Formula 3 in page 7 fn distrib_j_th_largest_prefix_length(n: usize, j: usize, x: usize) -> f64 { - (0..j).fold(0f64, |acc, i| { + (0..=j).fold(0f64, |acc, i| { acc + (binomial(n, i) as f64 * (1f64 - 0.5.pow((x + 1) as f64)).pow((n - i) as f64) * 0.5.pow(((x + 1) * i) as f64)) @@ -107,7 +109,7 @@ fn distrib_j_th_largest_prefix_length(n: usize, j: usize, x: usize) -> f64 { // Returns a map of common prefix lengths to their probabilistically expected frequency. fn compute_model_distribution(n: usize) -> HashMap { let f = |x| { - (1..=K_VALUE.get()).fold(0f64, |acc, j| { + (0..K_VALUE.get()).fold(0f64, |acc, j| { acc + distrib_j_th_largest_prefix_length(n, j, x) - distrib_j_th_largest_prefix_length(n, j, x - 1) }) @@ -151,3 +153,40 @@ fn common_prefix_length(lhs: &[u8], rhs: &[u8]) -> usize { } common_prefix_length } + +#[cfg(test)] +mod tests { + use super::common_prefix_length; + + use xor_name::XorName; + + #[test] + fn test_common_prefix_length() { + // we use XorName utilities as it's easier to build test data with them + let mut rng = rand::thread_rng(); + let mut lhs = XorName::random(&mut rng); + assert_eq!(common_prefix_length(&lhs, &lhs), 256); + + let mut rhs = !lhs; + // let's first make sure lhs != rhs in every bit + assert_eq!(common_prefix_length(&lhs, &rhs), 0); + + for i in 0..=255 { + lhs = lhs.with_bit(i, true); + rhs = rhs.with_bit(i, true); + assert_eq!( + i as usize + 1, + common_prefix_length(&lhs, &rhs), + "unexpected result from common_prefix_length fn" + ); + } + } + + #[test] + fn test_net_size_estimate() { + // Build a map with 256 random keys, one for each Kbucket + // with their corresponding K-closest peers to a random CID; + // e.g. in Kbucket #2 get the 20 closest peers to a random CID that shares 2 bits as a prefix. + todo!(); + } +}