Skip to content

Commit

Permalink
draft: implement network size estimator formula
Browse files Browse the repository at this point in the history
  • Loading branch information
bochaco committed Apr 10, 2024
1 parent 7add612 commit cf6dbb6
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 32 deletions.
15 changes: 10 additions & 5 deletions sn_networking/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -817,17 +817,22 @@ impl Network {

/// Using a random address, check if there is a sybil attack around it
pub async fn perform_sybil_attack_check(&self) {
let (random_addr, cid) = {
let random_addr = {
let mut rng = rand::thread_rng();
let cid = XorName::random(&mut rng);
let chunk_addr = ChunkAddress::new(cid);
(NetworkAddress::from_chunk_address(chunk_addr), cid)
NetworkAddress::from_chunk_address(ChunkAddress::new(cid))
};

match self.get_closest_peers(&random_addr, true).await {
Ok(closest_peers) => {
if check_for_sybil_attack(&closest_peers, &cid).await {
info!(">>> Sybil attack detected around xorname: {cid}");
if check_for_sybil_attack(
&closest_peers,
random_addr.as_kbucket_key(),
&BTreeMap::default(),
)
.await
{
info!(">>> Sybil attack detected around addr: {random_addr}");
}
}
Err(err) => error!(">>> Failed to get closes peer to check for sybil attack: {err:?}"),
Expand Down
113 changes: 86 additions & 27 deletions sn_networking/src/sybil.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,48 @@
// KIND, either express or implied. Please review the Licences for the specific language governing
// permissions and limitations relating to use of the SAFE Network Software.

use libp2p::PeerId;
use std::collections::BTreeMap;

use libp2p::{
kad::{KBucketKey, K_VALUE},
PeerId,
};
use num::{integer::binomial, pow::Pow};
use xor_name::{XorName, XOR_NAME_LEN};

// Threshold to determine if there is an attack using Kullback-Liebler (KL) divergence
// between model peer ids distribution vs. actual distribution around any point in the address space.
const KL_DIVERGENCE_THRESHOLD: f64 = 10f64; // TODO: find a good value

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment

const K: usize = 20;
const N: usize = 25; // TODO: replace with network size estimation;

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
const ITERATIONS_FOR_NET_SIZE_ESTIMATION: usize = 50;

// The container maps each random KAD Key to the ordered list
// of its K_VALUE closest peers, sorted by increasing distance. This order
// is a prerequisite for the functions this container is used by,
// i.e. their result is dependant on the correct ordering of these values.
pub(super) type RandomKeysAndClosestPeerIds = BTreeMap<KBucketKey<Vec<u8>>, Vec<PeerId>>;

// Given the set of closest K peers ids to the passed content address, return 'true'
// if there is probabilistically a sybil attack around that CID address.
pub(super) async fn check_for_sybil_attack(peers: &[PeerId], cid: &XorName) -> bool {
// TODO: do we go ahead even if we don't have at least K peer ids...?
// This implements the algorithm proposed in https://ssg.lancs.ac.uk/wp-content/uploads/ndss_preprint.pdf
pub(super) async fn check_for_sybil_attack(
peers: &[PeerId],
cid: KBucketKey<Vec<u8>>,
random_keys: &RandomKeysAndClosestPeerIds,
) -> bool {
info!(
">>> CHECKING SYBIL ATTACK WITH {} PEERS: {peers:?}",
peers.len()
);

// FIXME: do we go ahead even if we don't have at least K peer ids...?

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
assert!(peers.len() >= K_VALUE.get());
assert!(random_keys
.iter()
.all(|(_, peers)| peers.len() >= K_VALUE.get()));

let q = num_peers_per_cpl(peers, cid);
let n = get_net_size_estimate();
let n = get_net_size_estimate(random_keys);
let p = compute_model_distribution(n);
info!(">>> MODEL DIST WITH {} PEERS: {p}", peers.len());
let kl_divergence = compute_kl_divergence(p, q);
Expand All @@ -35,52 +56,90 @@ pub(super) async fn check_for_sybil_attack(peers: &[PeerId], cid: &XorName) -> b
}

// Formula 6 in page 7
fn num_peers_per_cpl(peers: &[PeerId], cid: &XorName) -> usize {
let peers_per_cpl = peers.iter().fold(0, |acc, peer| {
let peer_kad_id = XorName::from_content(&peer.to_bytes());
acc + common_prefix(&peer_kad_id, cid)
fn num_peers_per_cpl(peers: &[PeerId], cid: KBucketKey<Vec<u8>>) -> f64 {
let k = peers.len() as f64;
let peers_per_cpl = peers.iter().fold(0f64, |acc, peer| {
// XorName::from_content(IeerId) is equivalent to converting PeerId into KBucketKey
let peer_key: KBucketKey<PeerId> = (*peer).into();
acc + common_prefix_length(peer_key.hashed_bytes(), cid.hashed_bytes()) as f64
});

peers_per_cpl / K
peers_per_cpl / k
}

// TODO: use released https://github.com/maidsafe/xor_name/pull/96 instead
fn common_prefix(lhs: &XorName, rhs: &XorName) -> usize {
for byte_index in 0..XOR_NAME_LEN {
if lhs[byte_index] != rhs[byte_index] {
return (byte_index * 8) + (lhs[byte_index] ^ rhs[byte_index]).leading_zeros() as usize;
// Helper to calculate number of common prefix bits between two slices
fn common_prefix_length(lhs: &[u8], rhs: &[u8]) -> u8 {
let mut common_prefix_length = 0u8;
for byte_index in 0..32 {
if lhs[byte_index] == rhs[byte_index] {
common_prefix_length += 8;
} else {
common_prefix_length += (lhs[byte_index] ^ rhs[byte_index]).leading_zeros() as u8;
break;
}
}
8 * XOR_NAME_LEN
common_prefix_length
}

// Formula 1 and 2 in page ??
fn get_net_size_estimate() -> usize {
// TODO!
N
// Formula 1 in page 3
// Compute the average distance between each of the passed random keys,
// and their i-th closest peer
fn average_between_keys_and_i_th_closest_peer(
i: usize,
random_keys: &RandomKeysAndClosestPeerIds,
) -> f64 {
let m = random_keys.len() as f64;
let distances = random_keys.iter().fold(0f64, |acc, (key_j, peers)| {
let i_th_peer: KBucketKey<PeerId> = peers[i].into();
let distance = key_j.distance(&i_th_peer).ilog2().unwrap_or(0) as f64;
acc + distance
});

distances / m
}

// Formula 2 in page 3
// Estimates the network size based on the distances between the provided
// random KAD Keys and their closest PeerIds.
fn get_net_size_estimate(random_keys: &RandomKeysAndClosestPeerIds) -> usize {
let mut best_n_found = 0;
let mut smallest_value_found = f64::MAX;
for n in 0..ITERATIONS_FOR_NET_SIZE_ESTIMATION {
let value = (1..=K_VALUE.get()).fold(0f64, |acc, i| {
let d_i = average_between_keys_and_i_th_closest_peer(i, random_keys);
let dist: f64 = d_i - ((2f64.pow(256) * i as f64) / (N + 1) as f64);
acc + dist.pow(2)
});
if value < smallest_value_found {
smallest_value_found = value;
best_n_found = n;
}
}

best_n_found
}

// Formula 3 in page 7
fn distrib_j_th_largest_prefix_length(j: usize, x: usize) -> f64 {
(0..j).fold(0f64, |acc, i| {
acc + binomial(N, i) as f64
acc + (binomial(N, i) as f64
* (1f64 - 0.5.pow((x + 1) as f64)).pow((N - i) as f64)
* 0.5.pow(((x + 1) * i) as f64)
* 0.5.pow(((x + 1) * i) as f64))
})
}

// Formula 4 in page 7
fn compute_model_distribution(x: usize) -> f64 {
let model_dist = (1..K + 1).fold(0f64, |acc, j| {
let model_dist = (1..=K_VALUE.get()).fold(0f64, |acc, j| {
acc + distrib_j_th_largest_prefix_length(j, x)
- distrib_j_th_largest_prefix_length(j, x - 1)
});

model_dist / K as f64
model_dist / K_VALUE.get() as f64
}

// Formula 5 in page 7
fn compute_kl_divergence(model_dist: f64, peers_per_cpl: usize) -> f64 {
fn compute_kl_divergence(model_dist: f64, peers_per_cpl: f64) -> f64 {
// TODO!

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
model_dist * peers_per_cpl as f64
model_dist * peers_per_cpl
}

0 comments on commit cf6dbb6

Please sign in to comment.