From 83f47bf3f4edbab4fef463c8ee0db59dcdaa4b75 Mon Sep 17 00:00:00 2001
From: bochaco <gabrielviganotti@gmail.com>
Date: Thu, 11 Apr 2024 11:47:04 -0300
Subject: [PATCH] draft: minor fixes and adding unit tests for the algorithm
 formulas

---
 sn_networking/src/lib.rs   |  8 ++----
 sn_networking/src/sybil.rs | 51 +++++++++++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 12 deletions(-)
diff --git a/sn_networking/src/lib.rs b/sn_networking/src/lib.rs
index 60fe95a171..947eb2482b 100644
--- a/sn_networking/src/lib.rs
+++ b/sn_networking/src/lib.rs
@@ -825,12 +825,8 @@ impl Network {
 
         match self.get_closest_peers(&random_addr, true).await {
             Ok(closest_peers) => {
-                if check_for_sybil_attack(
-                    &closest_peers,
-                    random_addr.as_kbucket_key(),
-                    &BTreeMap::default(),
-                )
-                .await
+                if check_for_sybil_attack(&closest_peers, random_addr.as_kbucket_key(), &vec![])
+                    .await
                 {
                     info!(">>> Sybil attack detected around addr: {random_addr}");
                 }
diff --git a/sn_networking/src/sybil.rs b/sn_networking/src/sybil.rs
index 7f3ab9ad75..785a658059 100644
--- a/sn_networking/src/sybil.rs
+++ b/sn_networking/src/sybil.rs
@@ -6,7 +6,7 @@
 // KIND, either express or implied. Please review the Licences for the specific language governing
 // permissions and limitations relating to use of the SAFE Network Software.
 
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
 
 use itertools::Itertools;
 use libp2p::{
@@ -25,7 +25,7 @@ const ITERATIONS_FOR_NET_SIZE_ESTIMATION: usize = 50;
 // of its K_VALUE closest peers, sorted by increasing distance. This order
 // is a prerequisite for the functions this container is used by,
 // i.e. their result is dependant on the correct ordering of these values.
-pub(super) type RandomKeysAndClosestPeerIds = BTreeMap<KBucketKey<Vec<u8>>, Vec<PeerId>>;
+pub(super) type RandomKeysAndClosestPeerIds = Vec<(KBucketKey<Vec<u8>>, Vec<PeerId>)>;
 
 // Given the set of closest K peers ids to the passed content address, return 'true'
 // if there is probabilistically a sybil attack around that CID address.
@@ -48,6 +48,7 @@ pub(super) async fn check_for_sybil_attack(
     let q = |x| cpls_freqs.get(&x).cloned().unwrap_or(0) as f64 / k as f64;
 
     let n = get_net_size_estimate(random_keys);
+    info!(">>> NET SIZE ESTIMATE: {n}");
     let model_dist = compute_model_distribution(n);
     let p = |x| model_dist.get(&x).cloned().unwrap_or(0f64) / k as f64;
 
@@ -79,10 +80,11 @@ fn average_between_keys_and_i_th_closest_peer(
 fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize {
     let mut best_n_found = 0;
     let mut smallest_value_found = f64::MAX;
+    // FIXME: this iteration needs to be smarter
     for n in 0..ITERATIONS_FOR_NET_SIZE_ESTIMATION {
-        let value = (1..=K_VALUE.get()).fold(0f64, |acc, i| {
+        let value = (0..K_VALUE.get()).fold(0f64, |acc, i| {
             let d_i = average_between_keys_and_i_th_closest_peer(i, random_keys);
-            let dist: f64 = d_i - ((2f64.pow(256) * i as f64) / (n + 1) as f64);
+            let dist: f64 = d_i - ((2f64.pow(256) * (i + 1) as f64) / (n + 1) as f64);
             acc + dist.pow(2)
         });
         if value < smallest_value_found {
@@ -96,7 +98,7 @@ fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize {
 
 // Formula 3 in page 7
 fn distrib_j_th_largest_prefix_length(n: usize, j: usize, x: usize) -> f64 {
-    (0..j).fold(0f64, |acc, i| {
+    (0..=j).fold(0f64, |acc, i| {
         acc + (binomial(n, i) as f64
             * (1f64 - 0.5.pow((x + 1) as f64)).pow((n - i) as f64)
             * 0.5.pow(((x + 1) * i) as f64))
@@ -107,7 +109,7 @@ fn distrib_j_th_largest_prefix_length(n: usize, j: usize, x: usize) -> f64 {
 // Returns a map of common prefix lengths to their probabilistically expected frequency.
 fn compute_model_distribution(n: usize) -> HashMap<usize, f64> {
     let f = |x| {
-        (1..=K_VALUE.get()).fold(0f64, |acc, j| {
+        (0..K_VALUE.get()).fold(0f64, |acc, j| {
             acc + distrib_j_th_largest_prefix_length(n, j, x)
                 - distrib_j_th_largest_prefix_length(n, j, x - 1)
         })
@@ -151,3 +153,40 @@ fn common_prefix_length(lhs: &[u8], rhs: &[u8]) -> usize {
     }
     common_prefix_length
 }
+
+#[cfg(test)]
+mod tests {
+    use super::common_prefix_length;
+
+    use xor_name::XorName;
+
+    #[test]
+    fn test_common_prefix_length() {
+        // we use XorName utilities as it's easier to build test data with them
+        let mut rng = rand::thread_rng();
+        let mut lhs = XorName::random(&mut rng);
+        assert_eq!(common_prefix_length(&lhs, &lhs), 256);
+
+        let mut rhs = !lhs;
+        // let's first make sure lhs != rhs in every bit
+        assert_eq!(common_prefix_length(&lhs, &rhs), 0);
+
+        for i in 0..=255 {
+            lhs = lhs.with_bit(i, true);
+            rhs = rhs.with_bit(i, true);
+            assert_eq!(
+                i as usize + 1,
+                common_prefix_length(&lhs, &rhs),
+                "unexpected result from common_prefix_length fn"
+            );
+        }
+    }
+
+    #[test]
+    fn test_net_size_estimate() {
+        // Build a map with 256 random keys, one for each Kbucket
+        // with their corresponding K-closest peers to a random CID;
+        // e.g. in Kbucket #2 get the 20 closest peers to a random CID that shares 2 bits as a prefix.
+        todo!();
+    }
+}