maidsafe · bochaco · Apr 5, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 9, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/sn_networking/Cargo.toml b/sn_networking/Cargo.toml
@@ -49,6 +49,7 @@ hyper = { version = "0.14", features = [
 ], optional = true }
 itertools = "~0.12.1"
 custom_debug = "~0.6.1"
+num = "0.4.1"
 prometheus-client = { version = "0.22", optional = true }
 rand = { version = "~0.8.5", features = ["small_rng"] }
 rayon = "1.8.0"

diff --git a/sn_networking/src/cmd.rs b/sn_networking/src/cmd.rs
@@ -379,6 +379,8 @@
                     .payment_received();
             }
             SwarmCmd::GetLocalRecord { key, sender } => {
+                // TODO: eclipse content if sybil was set, if sybil xorname set is close to the key ....?
+
                 cmd_string = "GetLocalRecord";
                 let record = self
                     .swarm
@@ -394,6 +396,8 @@
                 sender,
                 quorum,
             } => {
+                // TODO: eclipse content if sybil was set, if sybil xorname set is close to the key ....?
+
                 cmd_string = "PutRecord";
                 let record_key = PrettyPrintRecordKey::from(&record.key).into_owned();
                 trace!(
@@ -446,6 +450,8 @@
                 }
             }
             SwarmCmd::PutLocalRecord { record } => {
+                // TODO: eclipse content if sybil was set, if sybil xorname set is close to the key ....?
+
                 cmd_string = "PutLocalRecord";
                 let key = record.key.clone();
                 let record_key = PrettyPrintRecordKey::from(&key);

diff --git a/sn_networking/src/driver.rs b/sn_networking/src/driver.rs
@@ -217,6 +217,7 @@ pub struct NetworkBuilder {
     metrics_server_port: Option<u16>,
     #[cfg(feature = "upnp")]
     upnp: bool,
+    sybil: Option<XorName>,
 }
 
 impl NetworkBuilder {
@@ -236,6 +237,7 @@ impl NetworkBuilder {
             metrics_server_port: None,
             #[cfg(feature = "upnp")]
             upnp: false,
+            sybil: None,
         }
     }
 
@@ -274,6 +276,10 @@ impl NetworkBuilder {
         self.upnp = upnp;
     }
 
+    pub fn set_sybil_mode(&mut self, sybil: Option<XorName>) {
+        self.sybil = sybil;
+    }
+
     /// Creates a new `SwarmDriver` instance, along with a `Network` handle
     /// for sending commands and an `mpsc::Receiver<NetworkEvent>` for receiving
     /// network events. It initializes the swarm, sets up the transport, and
@@ -596,6 +602,7 @@ impl NetworkBuilder {
             replication_fetcher,
             #[cfg(feature = "open-metrics")]
             network_metrics,
+            sybil: self.sybil,
             cmd_receiver: swarm_cmd_receiver,
             event_sender: network_event_sender,
             pending_get_closest_peers: Default::default(),
@@ -640,6 +647,8 @@ pub struct SwarmDriver {
     #[cfg(feature = "open-metrics")]
     pub(crate) network_metrics: Option<NetworkMetrics>,
 
+    sybil: Option<XorName>,
+
     cmd_receiver: mpsc::Receiver<SwarmCmd>,
     event_sender: mpsc::Sender<NetworkEvent>, // Use `self.send_event()` to send a NetworkEvent.
 

diff --git a/sn_networking/src/lib.rs b/sn_networking/src/lib.rs
@@ -26,6 +26,7 @@ mod record_store_api;
 mod relay_manager;
 mod replication_fetcher;
 mod spends;
+mod sybil;
 pub mod target_arch;
 mod transfers;
 mod transport;
@@ -45,7 +46,7 @@ pub use self::{
     transfers::{get_raw_signed_spends_from_record, get_signed_spend_from_record},
 };
 
-use self::{cmd::SwarmCmd, error::Result};
+use self::{cmd::SwarmCmd, error::Result, sybil::check_for_sybil_attack};
 use backoff::{Error as BackoffError, ExponentialBackoff};
 use futures::future::select_all;
 use libp2p::{
@@ -58,7 +59,7 @@ use rand::Rng;
 use sn_protocol::{
     error::Error as ProtocolError,
     messages::{ChunkProof, Cmd, Nonce, Query, QueryResponse, Request, Response},
-    storage::{RecordType, RetryStrategy},
+    storage::{ChunkAddress, RecordType, RetryStrategy},
     NetworkAddress, PrettyPrintKBucketKey, PrettyPrintRecordKey,
 };
 use sn_transfers::{MainPubkey, NanoTokens, PaymentQuote, QuotingMetrics};
@@ -67,13 +68,15 @@ use std::{
     path::PathBuf,
     sync::Arc,
 };
-use tokio::sync::{
-    mpsc::{self, Sender},
-    oneshot,
+use tokio::{
+    sync::{
+        mpsc::{self, Sender},
+        oneshot,
+    },
+    time::Duration,
 };
-
-use tokio::time::Duration;
 use tracing::trace;
+use xor_name::XorName;
 
 /// The type of quote for a selected payee.
 pub type PayeeQuote = (PeerId, MainPubkey, PaymentQuote);
@@ -863,6 +866,26 @@ impl Network {
         Ok(closest_peers.into_iter().cloned().collect())
     }
 
+    /// Using a random address, check if there is a sybil attack around it
+    pub async fn perform_sybil_attack_check(&self) {
+        let random_addr = {
+            let mut rng = rand::thread_rng();
+            let cid = XorName::random(&mut rng);
+            NetworkAddress::from_chunk_address(ChunkAddress::new(cid))
+        };
+
+        match self.get_closest_peers(&random_addr, true).await {
+            Ok(closest_peers) => {
+                if check_for_sybil_attack(&closest_peers, random_addr.as_kbucket_key(), &vec![])
+                    .await
+                {
+                    info!(">>> Sybil attack detected around addr: {random_addr}");
+                }
+            }
+            Err(err) => error!(">>> Failed to get closes peer to check for sybil attack: {err:?}"),
+        }
+    }
+
     /// Send a `Request` to the provided set of peers and wait for their responses concurrently.
     /// If `get_all_responses` is true, we wait for the responses from all the peers.
     /// NB TODO: Will return an error if the request timeouts.

diff --git a/sn_networking/src/sybil.rs b/sn_networking/src/sybil.rs
@@ -0,0 +1,223 @@
+// Copyright 2024 MaidSafe.net limited.
+//
+// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3.
+// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed
+// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. Please review the Licences for the specific language governing
+// permissions and limitations relating to use of the SAFE Network Software.
+
+use std::collections::HashMap;
+
+use itertools::Itertools;
+use libp2p::{
+    kad::{KBucketKey, K_VALUE},
+    PeerId,
+};
+use num::{integer::binomial, pow::Pow};
+
+// Threshold to determine if there is an attack using Kullback-Liebler (KL) divergence
+// between model peer ids distribution vs. actual distribution around any point in the address space.
+const KL_DIVERGENCE_THRESHOLD: f64 = 10f64; // TODO: find a proper value
+
+const ITERATIONS_FOR_NET_SIZE_ESTIMATION: usize = 500;
+
+// The container maps each random KAD Key to the ordered list
+// of its K_VALUE closest peers, sorted by increasing distance. This order
+// is a prerequisite for the functions this container is used by,
+// i.e. their result is dependant on the correct ordering of these values.
+pub(super) type RandomKeysAndClosestPeerIds = Vec<(KBucketKey<Vec<u8>>, Vec<PeerId>)>;
+
+// Given the set of closest K peers ids to the passed content address, return 'true'
+// if there is probabilistically a sybil attack around that CID address.
+// This implements the algorithm proposed in https://ssg.lancs.ac.uk/wp-content/uploads/ndss_preprint.pdf
+pub(super) async fn check_for_sybil_attack(
+    peers: &[PeerId],
+    cid: KBucketKey<Vec<u8>>,
+    random_keys: &RandomKeysAndClosestPeerIds,
+) -> bool {
+    let k = peers.len();
+    info!(">>> CHECKING SYBIL ATTACK WITH {k} PEERS: {peers:?}");
+
+    // FIXME: return error if we don't have at least K peer ids per key
+    assert!(k >= K_VALUE.get());
+    assert!(random_keys
+        .iter()
+        .all(|(_, peers)| peers.len() >= K_VALUE.get()));
+
+    let cpls_freqs = num_peers_per_cpl(peers, cid.clone());
+    let q = |x| cpls_freqs.get(&x).cloned().unwrap_or(0) as f64 / k as f64;
+
+    let n = get_net_size_estimate(random_keys);
+    info!(">>> NET SIZE ESTIMATE: {n}");
+    let model_dist = compute_model_distribution(n);
+    let p = |x| model_dist.get(&x).cloned().unwrap_or(0f64) / k as f64;
+
+    let kl_divergence = compute_kl_divergence(&p, &q);
+
+    kl_divergence > KL_DIVERGENCE_THRESHOLD
+}
+
+// Formula 1 in page 3
+// Compute the average distance between each of the passed random keys,
+// and their i-th closest peer
+fn average_between_keys_and_i_th_closest_peer(
+    i: usize,
+    random_keys: &RandomKeysAndClosestPeerIds,
+) -> f64 {
+    let m = random_keys.len() as f64;
+    let distances = random_keys.iter().fold(0f64, |acc, (key_j, peers)| {
+        let i_th_peer: KBucketKey<PeerId> = peers[i].into();
+        let distance = key_j.distance(&i_th_peer).ilog2().unwrap_or(0) as f64;
+        acc + distance
+    });
+
+    distances / m
+}
+
+// Formula 2 in page 3
+// Estimates the network size based on the distances between the provided
+// random KAD Keys and their closest PeerIds.
+fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize {
+    let mut best_n_found = 0;
+    let mut smallest_value_found = f64::MAX;
+    // FIXME: this iteration needs to be smarter
+    for n in 0..ITERATIONS_FOR_NET_SIZE_ESTIMATION {
+        let value = (0..K_VALUE.get()).fold(0f64, |acc, i| {
+            let d_i = average_between_keys_and_i_th_closest_peer(i, random_keys);
+            let dist: f64 = d_i - ((2f64.pow(256) * (i + 1) as f64) / (n + 1) as f64);
+            acc + dist.pow(2)
+        });
+        println!(">>> EVAL FOR N {n} gives {value} -- smallest so far {smallest_value_found}");
+        if value < smallest_value_found {
+            smallest_value_found = value;
+            best_n_found = n;
+        }
+    }
+
+    best_n_found
+}
+
+// Formula 3 in page 7
+fn distrib_j_th_largest_prefix_length(n: usize, j: usize, x: usize) -> f64 {
+    (0..=j).fold(0f64, |acc, i| {
+        acc + (binomial(n, i) as f64
+            * (1f64 - 0.5.pow((x + 1) as f64)).pow((n - i) as f64)
+            * 0.5.pow(((x + 1) * i) as f64))
+    })
+}
+
+// Formula 4 (partially) in page 7
+// Returns a map of common prefix lengths to their probabilistically expected frequency.
+fn compute_model_distribution(n: usize) -> HashMap<usize, f64> {
+    let f = |x| {
+        (0..K_VALUE.get()).fold(0f64, |acc, j| {
+            acc + distrib_j_th_largest_prefix_length(n, j, x)
+                - distrib_j_th_largest_prefix_length(n, j, x - 1)
+        })
+    };
+
+    (0..=256).map(|x| (x, f(x))).collect()
+}
+
+// Formula 5 in page 7
+// Compute the Kullback-Liebler (KL) divergence between the two given distribution
+fn compute_kl_divergence(p: &dyn Fn(usize) -> f64, q: &dyn Fn(usize) -> f64) -> f64 {
+    (0..256).fold(0f64, |acc, x| {
+        let q_x = q(x);
+        acc + (q_x * (q_x / p(x)).ln())
+    })
+}
+
+// Formula 6 (partially) in page 7
+// Returns a map with common prefix lengths of given peers and their frequency.
+fn num_peers_per_cpl(peers: &[PeerId], cid: KBucketKey<Vec<u8>>) -> HashMap<usize, usize> {
+    let cid_bytes = cid.hashed_bytes();
+    peers
+        .iter()
+        .map(|peer| {
+            let peer_key: KBucketKey<PeerId> = (*peer).into();
+            common_prefix_length(peer_key.hashed_bytes(), cid_bytes)
+        })
+        .counts()
+}
+
+// Helper to calculate number of common prefix bits between two slices
+fn common_prefix_length(lhs: &[u8], rhs: &[u8]) -> usize {
+    let mut common_prefix_length = 0usize;
+    for byte_index in 0..32 {
+        if lhs[byte_index] == rhs[byte_index] {
+            common_prefix_length += 8;
+        } else {
+            common_prefix_length += (lhs[byte_index] ^ rhs[byte_index]).leading_zeros() as usize;
+            break;
+        }
+    }
+    common_prefix_length
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::sort_peers_by_address;
+
+    use super::{common_prefix_length, get_net_size_estimate, RandomKeysAndClosestPeerIds};
+
+    use libp2p::{kad::K_VALUE, PeerId};
+    use sn_protocol::{storage::ChunkAddress, NetworkAddress};
+
+    // we use XorName utilities just because it's easier to build test data with them
+    use xor_name::XorName;
+
+    #[test]
+    fn test_common_prefix_length() {
+        let mut rng = rand::thread_rng();
+        let mut lhs = XorName::random(&mut rng);
+        assert_eq!(common_prefix_length(&lhs, &lhs), 256);
+
+        let mut rhs = !lhs;
+        // let's first make sure lhs != rhs in every bit
+        assert_eq!(common_prefix_length(&lhs, &rhs), 0);
+
+        for i in 0..=255 {
+            lhs = lhs.with_bit(i, true);
+            rhs = rhs.with_bit(i, true);
+            assert_eq!(
+                i as usize + 1,
+                common_prefix_length(&lhs, &rhs),
+                "unexpected result from common_prefix_length fn"
+            );
+        }
+    }
+
+    #[test]
+    fn test_net_size_estimate() {
+        // Build a map with 256 random keys, one for each Kbucket
+        // with their corresponding K-closest peers to a random CID;
+        // e.g. in Kbucket #2 get the 20 closest peers to a random CID that shares 2 bits as a prefix.
+        const NUM_OF_KBUCKETS: usize = 256;
+        const NUM_OF_PEERS: usize = 200;
+        let random_peers: Vec<PeerId> = (0..NUM_OF_PEERS).map(|_| PeerId::random()).collect();
+        let mut random_keys = RandomKeysAndClosestPeerIds::default();
+
+        let mut rng = rand::thread_rng();
+        for _i in 0..NUM_OF_KBUCKETS {
+            let xorname = XorName::random(&mut rng);
+            let address = NetworkAddress::ChunkAddress(ChunkAddress::new(xorname));
+            let sorted_peers: Vec<PeerId> =
+                sort_peers_by_address(&random_peers, &address, K_VALUE.get())
+                    .unwrap()
+                    .iter()
+                    .map(|p| (**p))
+                    .collect();
+            random_keys.push((address.as_kbucket_key(), sorted_peers));
+        }
+
+        assert_eq!(
+            random_keys.iter().filter(|(_, c)| c.len() >= 20).count(),
+            NUM_OF_KBUCKETS
+        );
+
+        let estimate = get_net_size_estimate(&random_keys);
+        println!(">>> NET SIZE ESTIMATE: {estimate}");
+        assert_eq!(estimate, NUM_OF_PEERS);
+    }
+}