From 3d60d1d27b79a30b95aedbd35ed80409bc9efe09 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 18 Nov 2024 14:10:59 +0300 Subject: [PATCH 001/170] bump prod startup probe to 35 minutes (#692) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 139b7c6b2..65bf6a5e5 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -33,7 +33,7 @@ readinessProbe: startupProbe: initialDelaySeconds: 900 - failureThreshold: 20 + failureThreshold: 40 periodSeconds: 30 httpGet: path: /health From 9c398006cfcf4f01eb72ba3ebf0c4b56f21f1232 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 18 Nov 2024 12:47:20 +0100 Subject: [PATCH 002/170] scale pods to 0 (#693) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 65bf6a5e5..aabdd2963 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.9.10" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From 7bfb832666fa94dac9f8893836978451b263fb47 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 18 Nov 2024 15:05:43 +0300 Subject: [PATCH 003/170] scale up pods to 1 (#694) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index aabdd2963..65bf6a5e5 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.9.10" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From 8e4272bb5bbdcabb8c8bbcab5cbb48cf26f87265 Mon Sep 17 00:00:00 2001 From: iliailia Date: Tue, 19 Nov 2024 11:25:44 +0100 Subject: [PATCH 004/170] Make local vector store independent of networking (#668) --- iris-mpc-cpu/benches/hnsw.rs | 74 ++- iris-mpc-cpu/src/execution/local.rs | 55 +- iris-mpc-cpu/src/execution/player.rs | 2 +- iris-mpc-cpu/src/execution/session.rs | 15 +- iris-mpc-cpu/src/hawkers/galois_store.rs | 697 +++++++++++------------ iris-mpc-cpu/src/network/local.rs | 1 + iris-mpc-cpu/src/protocol/binary.rs | 4 +- iris-mpc-cpu/src/protocol/ops.rs | 52 +- iris-mpc-cpu/src/shares/share.rs | 19 +- 9 files changed, 491 insertions(+), 428 deletions(-) diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index c4fcce26d..15013dad6 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -89,8 +89,7 @@ fn bench_hnsw_primitives(c: &mut Criterion) { let t1 = create_random_sharing(&mut rng, 10_u16); let t2 = create_random_sharing(&mut rng, 10_u16); - let runtime = LocalRuntime::replicated_test_config(); - let ready_sessions = runtime.create_player_sessions().await.unwrap(); + let runtime = LocalRuntime::replicated_test_config().await.unwrap(); let mut jobs = JoinSet::new(); for (index, player) in runtime.identities.iter().enumerate() { @@ -98,7 +97,7 @@ fn bench_hnsw_primitives(c: &mut Criterion) { let d2i = d2[index].clone(); let t1i = t1[index].clone(); let t2i = t2[index].clone(); - let mut player_session = ready_sessions.get(player).unwrap().clone(); + let mut player_session = runtime.sessions.get(player).unwrap().clone(); jobs.spawn(async move { cross_compare(&mut player_session, d1i, t1i, d2i, t2i) .await @@ -117,8 +116,7 @@ fn bench_gr_primitives(c: &mut Criterion) { .build() .unwrap(); b.to_async(&rt).iter(|| async move { - let runtime = LocalRuntime::replicated_test_config(); - let ready_sessions = runtime.create_player_sessions().await.unwrap(); + let runtime = LocalRuntime::replicated_test_config().await.unwrap(); let mut rng = AesRng::seed_from_u64(0); let iris_db = IrisDB::new_random_rng(4, &mut rng).db; @@ -135,7 +133,7 @@ fn bench_gr_primitives(c: &mut Criterion) { let x2 = x2[index].clone(); let mut y2 = y2[index].clone(); - let mut player_session = ready_sessions.get(player).unwrap().clone(); + let mut player_session = runtime.sessions.get(player).unwrap().clone(); jobs.spawn(async move { y1.code.preprocess_iris_code_query_share(); y1.mask.preprocess_mask_code_query_share(); @@ -186,25 +184,39 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { |b| { b.to_async(&rt).iter_batched( || secret_searcher.clone(), - |(mut db_vectors, mut db_graph)| async move { + |vectors_graphs| async move { let searcher = HawkSearcher::default(); let mut rng = AesRng::seed_from_u64(0_u64); let on_the_fly_query = IrisDB::new_random_rng(1, &mut rng).db[0].clone(); let raw_query = generate_galois_iris_shares(&mut rng, on_the_fly_query); - let query = db_vectors.prepare_query(raw_query); - let neighbors = searcher - .search_to_insert(&mut db_vectors, &mut db_graph, &query) - .await; - searcher - .insert_from_search_results( - &mut db_vectors, - &mut db_graph, - &mut rng, - query, - neighbors, - ) - .await; + let mut jobs = JoinSet::new(); + + for (vector_store, graph_store) in vectors_graphs.into_iter() { + let mut vector_store = vector_store; + let mut graph_store = graph_store; + + let player_index = vector_store.get_owner_index(); + let query = vector_store.prepare_query(raw_query[player_index].clone()); + let searcher = searcher.clone(); + let mut rng = rng.clone(); + jobs.spawn(async move { + let neighbors = searcher + .search_to_insert(&mut vector_store, &mut graph_store, &query) + .await; + searcher + .insert_from_search_results( + &mut vector_store, + &mut graph_store, + &mut rng, + query, + neighbors, + ) + .await; + }); + } + + jobs.join_all().await; }, criterion::BatchSize::SmallInput, ) @@ -216,17 +228,27 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { |b| { b.to_async(&rt).iter_batched( || secret_searcher.clone(), - |(mut db_vectors, mut db_graph)| async move { + |vectors_graphs| async move { let searcher = HawkSearcher::default(); let mut rng = AesRng::seed_from_u64(0_u64); let on_the_fly_query = IrisDB::new_random_rng(1, &mut rng).db[0].clone(); let raw_query = generate_galois_iris_shares(&mut rng, on_the_fly_query); - let query = db_vectors.prepare_query(raw_query); - let neighbors = searcher - .search_to_insert(&mut db_vectors, &mut db_graph, &query) - .await; - searcher.is_match(&mut db_vectors, &neighbors).await; + let mut jobs = JoinSet::new(); + for (vector_store, graph_store) in vectors_graphs.into_iter() { + let mut vector_store = vector_store; + let mut graph_store = graph_store; + let player_index = vector_store.get_owner_index(); + let query = vector_store.prepare_query(raw_query[player_index].clone()); + let searcher = searcher.clone(); + jobs.spawn(async move { + let neighbors = searcher + .search_to_insert(&mut vector_store, &mut graph_store, &query) + .await; + searcher.is_match(&mut vector_store, &neighbors).await; + }); + } + jobs.join_all().await; }, criterion::BatchSize::SmallInput, ) diff --git a/iris-mpc-cpu/src/execution/local.rs b/iris-mpc-cpu/src/execution/local.rs index cc400076b..12e9f8a99 100644 --- a/iris-mpc-cpu/src/execution/local.rs +++ b/iris-mpc-cpu/src/execution/local.rs @@ -4,57 +4,55 @@ use crate::{ session::{BootSession, Session, SessionHandles, SessionId}, }, network::local::LocalNetworkingStore, - protocol::{ - ops::setup_replicated_prf, - prf::{Prf, PrfSeed}, - }, + protocol::{ops::setup_replicated_prf, prf::PrfSeed}, }; use std::{collections::HashMap, sync::Arc}; use tokio::task::JoinSet; +pub fn generate_local_identities() -> Vec { + vec![ + Identity::from("alice"), + Identity::from("bob"), + Identity::from("charlie"), + ] +} + #[derive(Debug, Clone)] pub struct LocalRuntime { pub identities: Vec, pub role_assignments: RoleAssignment, - pub prf_setups: Option>, pub seeds: Vec, + // only one session per player is created + pub sessions: HashMap, } impl LocalRuntime { - pub fn replicated_test_config() -> Self { + pub async fn replicated_test_config() -> eyre::Result { let num_parties = 3; - let identities: Vec = vec!["alice".into(), "bob".into(), "charlie".into()]; + let identities = generate_local_identities(); let mut seeds = Vec::new(); for i in 0..num_parties { let mut seed = [0_u8; 16]; seed[0] = i; seeds.push(seed); } - LocalRuntime::new(identities, seeds) + LocalRuntime::new(identities, seeds).await } - pub fn new(identities: Vec, seeds: Vec) -> Self { + + pub async fn new(identities: Vec, seeds: Vec) -> eyre::Result { let role_assignments: RoleAssignment = identities .iter() .enumerate() .map(|(index, id)| (Role::new(index), id.clone())) .collect(); - LocalRuntime { - identities, - role_assignments, - prf_setups: None, - seeds, - } - } - - pub async fn create_player_sessions(&self) -> eyre::Result> { - let network = LocalNetworkingStore::from_host_ids(&self.identities); + let network = LocalNetworkingStore::from_host_ids(&identities); let sess_id = SessionId::from(0_u128); - let boot_sessions: Vec = (0..self.seeds.len()) + let boot_sessions: Vec = (0..seeds.len()) .map(|i| { - let identity = self.identities[i].clone(); + let identity = identities[i].clone(); BootSession { session_id: sess_id, - role_assignments: Arc::new(self.role_assignments.clone()), + role_assignments: Arc::new(role_assignments.clone()), networking: Arc::new(network.get_local_network(identity.clone())), own_identity: identity, } @@ -63,21 +61,26 @@ impl LocalRuntime { let mut jobs = JoinSet::new(); for (player_id, boot_session) in boot_sessions.iter().enumerate() { - let player_seed = self.seeds[player_id]; + let player_seed = seeds[player_id]; let sess = boot_session.clone(); jobs.spawn(async move { let prf = setup_replicated_prf(&sess, player_seed).await.unwrap(); (sess, prf) }); } - let mut complete_sessions = HashMap::new(); + let mut sessions = HashMap::new(); while let Some(t) = jobs.join_next().await { let (boot_session, prf) = t.unwrap(); - complete_sessions.insert(boot_session.own_identity(), Session { + sessions.insert(boot_session.own_identity(), Session { boot_session, setup: prf, }); } - Ok(complete_sessions) + Ok(LocalRuntime { + identities, + role_assignments, + seeds, + sessions, + }) } } diff --git a/iris-mpc-cpu/src/execution/player.rs b/iris-mpc-cpu/src/execution/player.rs index 49690755a..94364f853 100644 --- a/iris-mpc-cpu/src/execution/player.rs +++ b/iris-mpc-cpu/src/execution/player.rs @@ -40,7 +40,7 @@ impl Role { } /// Retrieve index of Role (zero indexed) - pub fn zero_based(&self) -> usize { + pub fn index(&self) -> usize { self.0 as usize } diff --git a/iris-mpc-cpu/src/execution/session.rs b/iris-mpc-cpu/src/execution/session.rs index 8d7f05dd6..2b857d9f3 100644 --- a/iris-mpc-cpu/src/execution/session.rs +++ b/iris-mpc-cpu/src/execution/session.rs @@ -5,7 +5,7 @@ use crate::{ }; use eyre::eyre; use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashMap, fmt::Debug, sync::Arc}; #[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct SessionId(pub u128); @@ -18,7 +18,7 @@ impl From for SessionId { pub type NetworkingImpl = Arc; -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct Session { pub boot_session: BootSession, pub setup: Prf, @@ -32,6 +32,17 @@ pub struct BootSession { pub own_identity: Identity, } +impl Debug for BootSession { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // TODO: incorporate networking into debug output + f.debug_struct("BootSession") + .field("session_id", &self.session_id) + .field("role_assignments", &self.role_assignments) + .field("own_identity", &self.own_identity) + .finish() + } +} + pub trait SessionHandles { fn session_id(&self) -> SessionId; fn own_role(&self) -> eyre::Result; diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index 4fd542753..8e975a377 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -1,12 +1,17 @@ use super::plaintext_store::PlaintextStore; use crate::{ database_generators::{generate_galois_iris_shares, GaloisRingSharedIris}, - execution::{local::LocalRuntime, player::Identity, session::Session}, + execution::{ + local::{generate_local_identities, LocalRuntime}, + player::Identity, + session::Session, + }, hawkers::plaintext_store::PointId, protocol::ops::{ - cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3, is_dot_zero, + compare_threshold_and_open, cross_compare, galois_ring_pairwise_distance, + galois_ring_to_rep3, }, - shares::{int_ring::IntRing2k, share::Share}, + shares::share::{DistanceShare, Share}, }; use aes_prng::AesRng; use hawk_pack::{ @@ -16,8 +21,7 @@ use hawk_pack::{ }; use iris_mpc_common::iris_db::{db::IrisDB, iris::IrisCode}; use rand::{CryptoRng, RngCore, SeedableRng}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::{collections::HashMap, fmt::Debug, vec}; use tokio::task::JoinSet; #[derive(Default, Clone)] @@ -68,83 +72,86 @@ pub fn setup_local_player_preloaded_db( Ok(aby3_store) } -pub fn setup_local_aby3_players_with_preloaded_db( +pub async fn setup_local_aby3_players_with_preloaded_db( rng: &mut R, database: Vec, -) -> eyre::Result { - let mut p0 = Vec::new(); - let mut p1 = Vec::new(); - let mut p2 = Vec::new(); +) -> eyre::Result> { + let identities = generate_local_identities(); + + let mut shared_irises = vec![vec![]; identities.len()]; for iris in database { let all_shares = generate_galois_iris_shares(rng, iris); - p0.push(all_shares[0].clone()); - p1.push(all_shares[1].clone()); - p2.push(all_shares[2].clone()); + for (i, shares) in all_shares.iter().enumerate() { + shared_irises[i].push(shares.clone()); + } } - let player_0 = setup_local_player_preloaded_db(p0)?; - let player_1 = setup_local_player_preloaded_db(p1)?; - let player_2 = setup_local_player_preloaded_db(p2)?; - let players = HashMap::from([ - (Identity::from("alice"), player_0), - (Identity::from("bob"), player_1), - (Identity::from("charlie"), player_2), - ]); - let runtime = LocalRuntime::replicated_test_config(); - Ok(LocalNetAby3NgStoreProtocol { runtime, players }) + let storages: Vec = shared_irises + .into_iter() + .map(|player_irises| setup_local_player_preloaded_db(player_irises).unwrap()) + .collect(); + let runtime = LocalRuntime::replicated_test_config().await?; + + let local_stores = identities + .into_iter() + .zip(storages.into_iter()) + .map(|(identity, storage)| LocalNetAby3NgStoreProtocol { + runtime: runtime.clone(), + storage, + owner: identity, + }) + .collect(); + + Ok(local_stores) } #[derive(Debug, Clone)] pub struct LocalNetAby3NgStoreProtocol { - pub players: HashMap, + pub owner: Identity, + pub storage: Aby3NgStorePlayer, pub runtime: LocalRuntime, } -pub fn setup_local_store_aby3_players() -> eyre::Result { - let player_0 = Aby3NgStorePlayer::default(); - let player_1 = Aby3NgStorePlayer::default(); - let player_2 = Aby3NgStorePlayer::default(); - let runtime = LocalRuntime::replicated_test_config(); - let players = HashMap::from([ - (Identity::from("alice"), player_0), - (Identity::from("bob"), player_1), - (Identity::from("charlie"), player_2), - ]); - Ok(LocalNetAby3NgStoreProtocol { runtime, players }) -} - impl LocalNetAby3NgStoreProtocol { - pub fn prepare_query(&mut self, code: Vec) -> PointId { - assert_eq!(code.len(), 3); - assert_eq!(self.players.len(), 3); - let pid0 = self - .players - .get_mut(&Identity::from("alice")) - .unwrap() - .prepare_query(code[0].clone()); - let pid1 = self - .players - .get_mut(&Identity::from("bob")) - .unwrap() - .prepare_query(code[1].clone()); - let pid2 = self - .players - .get_mut(&Identity::from("charlie")) + pub fn get_owner_session(&self) -> Session { + self.runtime.sessions.get(&self.owner).unwrap().clone() + } + + pub fn get_owner_index(&self) -> usize { + self.runtime + .role_assignments + .iter() + .find_map(|(role, id)| { + if id.clone() == self.owner { + Some(role.clone()) + } else { + None + } + }) .unwrap() - .prepare_query(code[2].clone()); - assert_eq!(pid0, pid1); - assert_eq!(pid1, pid2); - pid0 + .index() } } -#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] -#[serde(bound = "")] -pub struct DistanceShare { - code_dot: Share, - mask_dot: Share, - player: Identity, +pub async fn setup_local_store_aby3_players() -> eyre::Result> { + let runtime = LocalRuntime::replicated_test_config().await?; + let players = generate_local_identities(); + let local_stores = players + .into_iter() + .map(|identity| LocalNetAby3NgStoreProtocol { + runtime: runtime.clone(), + storage: Aby3NgStorePlayer::default(), + owner: identity, + }) + .collect(); + Ok(local_stores) +} + +impl LocalNetAby3NgStoreProtocol { + pub fn prepare_query(&mut self, code: GaloisRingSharedIris) -> PointId { + self.storage.prepare_query(code) + } } async fn eval_pairwise_distances( @@ -166,13 +173,11 @@ async fn eval_pairwise_distances( impl VectorStore for LocalNetAby3NgStoreProtocol { type QueryRef = PointId; // Vector ID, pending insertion. type VectorRef = PointId; // Vector ID, inserted. - type DistanceRef = Vec>; // Distance represented as shares. + type DistanceRef = DistanceShare; // Distance represented as shares. async fn insert(&mut self, query: &Self::QueryRef) -> Self::VectorRef { // The query is now accepted in the store. It keeps the same ID. - for (_id, storage) in self.players.iter_mut() { - storage.insert(query); - } + self.storage.insert(query); *query } @@ -181,24 +186,14 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { query: &Self::QueryRef, vector: &Self::VectorRef, ) -> Self::DistanceRef { - let ready_sessions = self.runtime.create_player_sessions().await.unwrap(); - let mut jobs = JoinSet::new(); - for player in self.runtime.identities.clone() { - let mut player_session = ready_sessions.get(&player).unwrap().clone(); - let storage = self.players.get(&player).unwrap(); - let query_point = storage.points[*query].clone(); - let vector_point = storage.points[*vector].clone(); - let pairs = vec![(query_point.data, vector_point.data)]; - jobs.spawn(async move { - let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; - DistanceShare { - code_dot: ds_and_ts[0].clone(), - mask_dot: ds_and_ts[1].clone(), - player: player.clone(), - } - }); - } - jobs.join_all().await + let mut player_session = self.get_owner_session(); + // TODO: decouple queries and vectors. Ideally, queries should be kept in a + // separate store. + let query_point = self.storage.points[*query].clone(); + let vector_point = self.storage.points[*vector].clone(); + let pairs = vec![(query_point.data, vector_point.data)]; + let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; + DistanceShare::new(ds_and_ts[0].clone(), ds_and_ts[1].clone()) } async fn eval_distance_batch( @@ -206,68 +201,29 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { query: &Self::QueryRef, vectors: &[Self::VectorRef], ) -> Vec { - let ready_sessions = self.runtime.create_player_sessions().await.unwrap(); - let mut jobs = JoinSet::new(); - for player in self.runtime.identities.clone() { - let mut player_session = ready_sessions.get(&player).unwrap().clone(); - let storage = self.players.get(&player).unwrap(); - let query_point = storage.points[*query].clone(); - let pairs = vectors - .iter() - .map(|vector_id| { - let vector_point = storage.points[*vector_id].clone(); - (query_point.data.clone(), vector_point.data) - }) - .collect::>(); - jobs.spawn(async move { - let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; - ds_and_ts - .chunks(2) - .map(|dot_products| DistanceShare { - code_dot: dot_products[0].clone(), - mask_dot: dot_products[1].clone(), - player: player.clone(), - }) - .collect::>() - }); - } - // Now we have a vector of 3 vectors of DistanceShares, we need to transpose it - // to a vector of DistanceRef - let mut all_shares = jobs - .join_all() - .await - .into_iter() - .map(|player_shares| player_shares.into_iter()) + let mut player_session = self.get_owner_session(); + let query_point = self.storage.points[*query].clone(); + let pairs = vectors + .iter() + .map(|vector_id| { + let vector_point = self.storage.points[*vector_id].clone(); + (query_point.data.clone(), vector_point.data) + }) .collect::>(); - (0..vectors.len()) - .map(|_| { - all_shares - .iter_mut() - .map(|player_shares| player_shares.next().unwrap()) - .collect::() + let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; + ds_and_ts + .chunks(2) + .map(|dot_products| { + DistanceShare::new(dot_products[0].clone(), dot_products[1].clone()) }) - .collect::>() + .collect::>() } async fn is_match(&mut self, distance: &Self::DistanceRef) -> bool { - let ready_sessions = self.runtime.create_player_sessions().await.unwrap(); - let mut jobs = JoinSet::new(); - for distance_share in distance.iter() { - let mut player_session = ready_sessions.get(&distance_share.player).unwrap().clone(); - let code_dot = distance_share.code_dot.clone(); - let mask_dot = distance_share.mask_dot.clone(); - jobs.spawn(async move { - is_dot_zero(&mut player_session, code_dot, mask_dot) - .await - .unwrap() - }); - } - let r0 = jobs.join_next().await.unwrap().unwrap(); - let r1 = jobs.join_next().await.unwrap().unwrap(); - let r2 = jobs.join_next().await.unwrap().unwrap(); - assert_eq!(r0, r1); - assert_eq!(r1, r2); - r0 + let mut player_session = self.get_owner_session(); + compare_threshold_and_open(&mut player_session, distance.clone()) + .await + .unwrap() } async fn less_than( @@ -275,41 +231,27 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { distance1: &Self::DistanceRef, distance2: &Self::DistanceRef, ) -> bool { - let ready_sessions = self.runtime.create_player_sessions().await.unwrap(); - let mut jobs = JoinSet::new(); - for share1 in distance1.iter() { - for share2 in distance2.iter() { - if share1.player == share2.player { - let mut player_session = ready_sessions.get(&share1.player).unwrap().clone(); - let code_dot1 = share1.code_dot.clone(); - let mask_dot1 = share1.mask_dot.clone(); - let code_dot2 = share2.code_dot.clone(); - let mask_dot2 = share2.mask_dot.clone(); - jobs.spawn(async move { - cross_compare( - &mut player_session, - code_dot1, - mask_dot1, - code_dot2, - mask_dot2, - ) - .await - .unwrap() - }); - } - } - } - let res = jobs.join_all().await; - assert_eq!(res[0], res[1]); - assert_eq!(res[0], res[2]); - res[0] + let mut player_session = self.get_owner_session(); + let code_dot1 = distance1.code_dot.clone(); + let mask_dot1 = distance1.mask_dot.clone(); + let code_dot2 = distance2.code_dot.clone(); + let mask_dot2 = distance2.mask_dot.clone(); + cross_compare( + &mut player_session, + code_dot1, + mask_dot1, + code_dot2, + mask_dot2, + ) + .await + .unwrap() } } impl LocalNetAby3NgStoreProtocol { async fn graph_from_plain( &mut self, - graph_store: GraphMem, + graph_store: &GraphMem, ) -> GraphMem { let ep = graph_store.get_entry_point().await; @@ -323,15 +265,17 @@ impl LocalNetAby3NgStoreProtocol { let mut shared_queue = vec![]; for (target_v, _) in queue.as_vec_ref() { // recompute distances of graph edges from scratch - let shared_distance = self.eval_distance(source_v, target_v).await; - shared_queue.push((*target_v, shared_distance)); + let distance = self.eval_distance(source_v, target_v).await; + shared_queue.push((*target_v, distance.clone())); } - shared_links.insert(*source_v, FurthestQueue::from_ascending_vec(shared_queue)); + shared_links.insert( + *source_v, + FurthestQueue::from_ascending_vec(shared_queue.clone()), + ); } shared_layers.push(Layer::from_links(shared_links)); } - - GraphMem::from_precomputed(ep, shared_layers) + GraphMem::from_precomputed(ep.clone(), shared_layers) } } @@ -340,10 +284,10 @@ pub async fn gr_create_ready_made_hawk_searcher( database_size: usize, ) -> eyre::Result<( (PlaintextStore, GraphMem), - ( + Vec<( LocalNetAby3NgStoreProtocol, GraphMem, - ), + )>, )> { // makes sure the searcher produces same graph structure by having the same rng let mut rng_searcher1 = AesRng::from_rng(rng.clone())?; @@ -374,54 +318,74 @@ pub async fn gr_create_ready_made_hawk_searcher( .await; } - let mut protocol_store = setup_local_aby3_players_with_preloaded_db(rng, cleartext_database)?; - let protocol_graph = protocol_store - .graph_from_plain(plaintext_graph_store.clone()) - .await; - + let protocol_stores = + setup_local_aby3_players_with_preloaded_db(rng, cleartext_database).await?; + + let mut jobs = JoinSet::new(); + for store in protocol_stores.into_iter() { + let mut store = store; + let plaintext_graph_store = plaintext_graph_store.clone(); + jobs.spawn(async move { + let graph = store.graph_from_plain(&plaintext_graph_store).await; + (store, graph) + }); + } + let mut secret_shared_stores = jobs.join_all().await; + secret_shared_stores.sort_by_key(|(store, _)| store.get_owner_index()); let plaintext = (plaintext_vector_store, plaintext_graph_store); - let secret = (protocol_store, protocol_graph); - Ok((plaintext, secret)) + Ok((plaintext, secret_shared_stores)) } pub async fn ng_create_from_scratch_hawk_searcher( rng: &mut R, database_size: usize, -) -> eyre::Result<( - LocalNetAby3NgStoreProtocol, - GraphMem, -)> { - let mut rng_searcher = AesRng::from_rng(rng.clone())?; +) -> eyre::Result< + Vec<( + LocalNetAby3NgStoreProtocol, + GraphMem, + )>, +> { + let rng_searcher = AesRng::from_rng(rng.clone())?; let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; let shared_irises: Vec<_> = (0..database_size) .map(|id| generate_galois_iris_shares(rng, cleartext_database[id].clone())) .collect(); - let searcher = HawkSearcher::default(); - let mut aby3_store_protocol = setup_local_store_aby3_players().unwrap(); - let mut graph_store = GraphMem::new(); - - let queries = (0..database_size) - .map(|id| aby3_store_protocol.prepare_query(shared_irises[id].clone())) - .collect::>(); + let local_stores = setup_local_store_aby3_players().await?; - // insert queries - for query in queries.iter() { - let neighbors = searcher - .search_to_insert(&mut aby3_store_protocol, &mut graph_store, query) - .await; - searcher - .insert_from_search_results( - &mut aby3_store_protocol, - &mut graph_store, - &mut rng_searcher, - *query, - neighbors, - ) - .await; + let mut jobs = JoinSet::new(); + for store in local_stores.into_iter() { + let mut store = store; + let role = store.get_owner_index(); + let mut rng_searcher = rng_searcher.clone(); + let queries = (0..database_size) + .map(|id| store.prepare_query(shared_irises[id][role].clone())) + .collect::>(); + jobs.spawn(async move { + let mut graph_store = GraphMem::new(); + let searcher = HawkSearcher::default(); + // insert queries + for query in queries.iter() { + let neighbors = searcher + .search_to_insert(&mut store, &mut graph_store, query) + .await; + searcher + .insert_from_search_results( + &mut store, + &mut graph_store, + &mut rng_searcher, + *query, + neighbors, + ) + .await; + } + (store, graph_store) + }); } - - Ok((aby3_store_protocol, graph_store)) + let mut result = jobs.join_all().await; + // preserve order of players + result.sort_by_key(|(store, _)| store.get_owner_index()); + Ok(result) } #[cfg(test)] @@ -430,7 +394,6 @@ mod tests { use crate::database_generators::generate_galois_iris_shares; use aes_prng::AesRng; use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher}; - use iris_mpc_common::iris_db::db::IrisDB; use itertools::Itertools; use rand::SeedableRng; use tracing_test::traced_test; @@ -440,47 +403,61 @@ mod tests { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 10; let cleartext_database = IrisDB::new_random_rng(database_size, &mut rng).db; + let shared_irises: Vec<_> = cleartext_database + .iter() + .map(|iris| generate_galois_iris_shares(&mut rng, iris.clone())) + .collect(); - let mut aby3_store = setup_local_store_aby3_players().unwrap(); - let mut aby3_graph = GraphMem::new(); - let db = HawkSearcher::default(); + let mut stores = setup_local_store_aby3_players().await.unwrap(); - let queries = (0..database_size) - .map(|id| { - aby3_store.prepare_query(generate_galois_iris_shares( - &mut rng, - cleartext_database[id].clone(), - )) - }) - .collect::>(); - - // insert queries - for query in queries.iter() { - let neighbors = db - .search_to_insert(&mut aby3_store, &mut aby3_graph, query) - .await; - db.insert_from_search_results( - &mut aby3_store, - &mut aby3_graph, - &mut rng, - *query, - neighbors, - ) - .await; + let mut jobs = JoinSet::new(); + for store in stores.iter_mut() { + let player_index = store.get_owner_index(); + let queries = (0..database_size) + .map(|id| store.prepare_query(shared_irises[id][player_index].clone())) + .collect::>(); + let mut store = store.clone(); + let mut rng = rng.clone(); + jobs.spawn(async move { + let mut aby3_graph = GraphMem::new(); + let db = HawkSearcher::default(); + + // insert queries + for query in queries.iter() { + let neighbors = db + .search_to_insert(&mut store, &mut aby3_graph, query) + .await; + db.insert_from_search_results( + &mut store, + &mut aby3_graph, + &mut rng, + *query, + neighbors, + ) + .await; + } + println!("FINISHED INSERTING"); + // Search for the same codes and find matches. + let mut matching_results = vec![]; + for query in queries.iter() { + let neighbors = db + .search_to_insert(&mut store, &mut aby3_graph, query) + .await; + tracing::debug!("Finished query"); + matching_results.push(db.is_match(&mut store, &neighbors).await) + } + matching_results + }); } - println!("FINISHED INSERTING"); - // Search for the same codes and find matches. - for (index, query) in queries.iter().enumerate() { - let neighbors = db - .search_to_insert(&mut aby3_store, &mut aby3_graph, query) - .await; - // assert_eq!(false, true); - tracing::debug!("Finished query"); - assert!( - db.is_match(&mut aby3_store, &neighbors).await, - "failed at index {:?}", - index - ); + let matching_results = jobs.join_all().await; + for (party_id, party_results) in matching_results.iter().enumerate() { + for (index, result) in party_results.iter().enumerate() { + assert!( + *result, + "Failed at index {:?} for party {:?}", + index, party_id + ); + } } } @@ -489,56 +466,21 @@ mod tests { async fn test_gr_premade_hnsw() { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 10; - let (mut cleartext_data, mut secret_data) = + let (mut cleartext_data, secret_data) = gr_create_ready_made_hawk_searcher(&mut rng, database_size) .await .unwrap(); let mut rng = AesRng::seed_from_u64(0_u64); - let (mut vector_store, mut graph_store) = - ng_create_from_scratch_hawk_searcher(&mut rng, database_size) - .await - .unwrap(); + let vector_graph_stores = ng_create_from_scratch_hawk_searcher(&mut rng, database_size) + .await + .unwrap(); - assert_eq!( - vector_store - .players - .get(&Identity::from("alice")) - .unwrap() - .points, - secret_data - .0 - .players - .get(&Identity::from("alice")) - .unwrap() - .points - ); - assert_eq!( - vector_store - .players - .get(&Identity::from("bob")) - .unwrap() - .points, - secret_data - .0 - .players - .get(&Identity::from("bob")) - .unwrap() - .points - ); - assert_eq!( - vector_store - .players - .get(&Identity::from("charlie")) - .unwrap() - .points, - secret_data - .0 - .players - .get(&Identity::from("charlie")) - .unwrap() - .points - ); + for ((v_from_scratch, _), (premade_v, _)) in + vector_graph_stores.iter().zip(secret_data.iter()) + { + assert_eq!(v_from_scratch.storage.points, premade_v.storage.points); + } let hawk_searcher = HawkSearcher::default(); for i in 0..database_size { @@ -551,23 +493,39 @@ mod tests { .await, ); - let secret_neighbors = hawk_searcher - .search_to_insert(&mut secret_data.0, &mut secret_data.1, &i.into()) - .await; - assert!( - hawk_searcher - .is_match(&mut secret_data.0, &secret_neighbors) - .await - ); + let mut jobs = JoinSet::new(); + for (v, g) in vector_graph_stores.iter() { + let hawk_searcher = hawk_searcher.clone(); + let mut v = v.clone(); + let mut g = g.clone(); + jobs.spawn(async move { + let secret_neighbors = hawk_searcher + .search_to_insert(&mut v, &mut g, &i.into()) + .await; + + hawk_searcher.is_match(&mut v, &secret_neighbors).await + }); + } + let scratch_results = jobs.join_all().await; + + let mut jobs = JoinSet::new(); + for (v, g) in secret_data.iter() { + let hawk_searcher = hawk_searcher.clone(); + let mut v = v.clone(); + let mut g = g.clone(); + jobs.spawn(async move { + let secret_neighbors = hawk_searcher + .search_to_insert(&mut v, &mut g, &i.into()) + .await; + + hawk_searcher.is_match(&mut v, &secret_neighbors).await + }); + } + let premade_results = jobs.join_all().await; - let scratch_secret_neighbors = hawk_searcher - .search_to_insert(&mut vector_store, &mut graph_store, &i.into()) - .await; - assert!( - hawk_searcher - .is_match(&mut vector_store, &scratch_secret_neighbors) - .await, - ); + for (premade_res, scratch_res) in scratch_results.iter().zip(premade_results.iter()) { + assert!(*premade_res && *scratch_res); + } } } @@ -577,21 +535,11 @@ mod tests { let mut rng = AesRng::seed_from_u64(0_u64); let db_dim = 4; let cleartext_database = IrisDB::new_random_rng(db_dim, &mut rng).db; - - let mut aby3_store_protocol = setup_local_store_aby3_players().unwrap(); - - let aby3_preps: Vec<_> = (0..db_dim) - .map(|id| { - aby3_store_protocol.prepare_query(generate_galois_iris_shares( - &mut rng, - cleartext_database[id].clone(), - )) - }) + let shared_irises: Vec<_> = cleartext_database + .iter() + .map(|iris| generate_galois_iris_shares(&mut rng, iris.clone())) .collect(); - let mut aby3_inserts = Vec::new(); - for p in aby3_preps.iter() { - aby3_inserts.push(aby3_store_protocol.insert(p).await); - } + let mut local_stores = setup_local_store_aby3_players().await.unwrap(); // Now do the work for the plaintext store let mut plaintext_store = PlaintextStore::default(); let plaintext_preps: Vec<_> = (0..db_dim) @@ -601,31 +549,70 @@ mod tests { for p in plaintext_preps.iter() { plaintext_inserts.push(plaintext_store.insert(p).await); } + + // pairs of indices to compare let it1 = (0..db_dim).combinations(2); let it2 = (0..db_dim).combinations(2); - for comb1 in it1 { + + let mut plain_results = HashMap::new(); + for comb1 in it1.clone() { for comb2 in it2.clone() { - let dist1_aby3 = aby3_store_protocol - .eval_distance(&aby3_inserts[comb1[0]], &aby3_inserts[comb1[1]]) - .await; - let dist2_aby3 = aby3_store_protocol - .eval_distance(&aby3_inserts[comb2[0]], &aby3_inserts[comb2[1]]) - .await; + // compute distances in plaintext let dist1_plain = plaintext_store .eval_distance(&plaintext_inserts[comb1[0]], &plaintext_inserts[comb1[1]]) .await; let dist2_plain = plaintext_store .eval_distance(&plaintext_inserts[comb2[0]], &plaintext_inserts[comb2[1]]) .await; - assert_eq!( - aby3_store_protocol - .less_than(&dist1_aby3, &dist2_aby3) - .await, - plaintext_store.less_than(&dist1_plain, &dist2_plain).await, - "Failed at combo: {:?}, {:?}", - comb1, - comb2 - ) + let bit = plaintext_store.less_than(&dist1_plain, &dist2_plain).await; + plain_results.insert((comb1.clone(), comb2.clone()), bit); + } + } + + let mut aby3_inserts = vec![]; + for store in local_stores.iter_mut() { + let player_index = store.get_owner_index(); + let player_preps: Vec<_> = (0..db_dim) + .map(|id| store.prepare_query(shared_irises[id][player_index].clone())) + .collect(); + let mut player_inserts = vec![]; + for p in player_preps.iter() { + player_inserts.push(store.insert(p).await); + } + aby3_inserts.push(player_inserts); + } + + for comb1 in it1 { + for comb2 in it2.clone() { + let mut jobs = JoinSet::new(); + for store in local_stores.iter() { + let player_index = store.get_owner_index(); + let player_inserts = aby3_inserts[player_index].clone(); + let mut store = store.clone(); + let index10 = comb1[0]; + let index11 = comb1[1]; + let index20 = comb2[0]; + let index21 = comb2[1]; + jobs.spawn(async move { + let dist1_aby3 = store + .eval_distance(&player_inserts[index10], &player_inserts[index11]) + .await; + let dist2_aby3 = store + .eval_distance(&player_inserts[index20], &player_inserts[index21]) + .await; + store.less_than(&dist1_aby3, &dist2_aby3).await + }); + } + let res = jobs.join_all().await; + for bit in res { + assert_eq!( + bit, + plain_results[&(comb1.clone(), comb2.clone())], + "Failed at combo: {:?}, {:?}", + comb1, + comb2 + ) + } } } } @@ -636,19 +623,27 @@ mod tests { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 2; let searcher = HawkSearcher::default(); - let (mut vector, mut graph) = ng_create_from_scratch_hawk_searcher(&mut rng, database_size) + let mut vectors_and_graphs = ng_create_from_scratch_hawk_searcher(&mut rng, database_size) .await .unwrap(); for i in 0..database_size { - let secret_neighbors = searcher - .search_to_insert(&mut vector, &mut graph, &i.into()) - .await; - assert!( - searcher.is_match(&mut vector, &secret_neighbors).await, - "Failed at index {:?}", - i - ); + let mut jobs = JoinSet::new(); + for (store, graph) in vectors_and_graphs.iter_mut() { + let mut store = store.clone(); + let mut graph = graph.clone(); + let searcher = searcher.clone(); + jobs.spawn(async move { + let secret_neighbors = searcher + .search_to_insert(&mut store, &mut graph, &i.into()) + .await; + searcher.is_match(&mut store, &secret_neighbors).await + }); + } + let res = jobs.join_all().await; + for (party_index, r) in res.iter().enumerate() { + assert!(r, "Failed at index {:?} by party {:?}", i, party_index); + } } } } diff --git a/iris-mpc-cpu/src/network/local.rs b/iris-mpc-cpu/src/network/local.rs index 22ad2fcf5..dbdd9e36e 100644 --- a/iris-mpc-cpu/src/network/local.rs +++ b/iris-mpc-cpu/src/network/local.rs @@ -51,6 +51,7 @@ impl LocalNetworkingStore { } } +#[derive(Debug)] pub struct LocalNetworking { p2p_channels: P2PChannels, pub owner: Identity, diff --git a/iris-mpc-cpu/src/protocol/binary.rs b/iris-mpc-cpu/src/protocol/binary.rs index 5059a828f..3e55308a2 100644 --- a/iris-mpc-cpu/src/protocol/binary.rs +++ b/iris-mpc-cpu/src/protocol/binary.rs @@ -29,7 +29,7 @@ pub(crate) fn a2b_pre( let mut x2 = Share::zero(); let mut x3 = Share::zero(); - match session.own_role()?.zero_based() { + match session.own_role()?.index() { 0 => { x1.a = a; x3.b = b; @@ -384,7 +384,7 @@ pub(crate) async fn bit_inject_ot_2round( session: &mut Session, input: VecShare, ) -> Result, Error> { - let res = match session.own_role()?.zero_based() { + let res = match session.own_role()?.index() { 0 => { // OT Helper bit_inject_ot_2round_helper(session, input).await? diff --git a/iris-mpc-cpu/src/protocol/ops.rs b/iris-mpc-cpu/src/protocol/ops.rs index fefb66ad1..c52e227ee 100644 --- a/iris-mpc-cpu/src/protocol/ops.rs +++ b/iris-mpc-cpu/src/protocol/ops.rs @@ -7,7 +7,12 @@ use crate::{ binary::{lift, mul_lift_2k, open_bin}, prf::{Prf, PrfSeed}, }, - shares::{bit::Bit, ring_impl::RingElement, share::Share, vecshare::VecShare}, + shares::{ + bit::Bit, + ring_impl::RingElement, + share::{DistanceShare, Share}, + vecshare::VecShare, + }, }; use eyre::eyre; @@ -264,13 +269,12 @@ pub async fn galois_ring_is_match( Ok(opened.convert()) } -/// Checks that the given dot product is zero. -pub async fn is_dot_zero( +/// Compares the given distance to a threshold and reveal the result. +pub async fn compare_threshold_and_open( session: &mut Session, - code_dot: Share, - mask_dot: Share, + distance: DistanceShare, ) -> eyre::Result { - let bit = compare_threshold(session, code_dot, mask_dot).await?; + let bit = compare_threshold(session, distance.code_dot, distance.mask_dot).await?; let opened = open_bin(session, bit).await?; Ok(opened.convert()) } @@ -280,7 +284,10 @@ mod tests { use super::*; use crate::{ database_generators::generate_galois_iris_shares, - execution::{local::LocalRuntime, player::Identity}, + execution::{ + local::{generate_local_identities, LocalRuntime}, + player::Identity, + }, hawkers::plaintext_store::PlaintextIris, protocol::ops::NetworkValue::RingElement32, shares::{int_ring::IntRing2k, ring_impl::RingElement}, @@ -352,15 +359,16 @@ mod tests { #[tokio::test] async fn test_async_prf_setup() { let num_parties = 3; - let identities: Vec = vec!["alice".into(), "bob".into(), "charlie".into()]; + let identities = generate_local_identities(); let mut seeds = Vec::new(); for i in 0..num_parties { let mut seed = [0_u8; 16]; seed[0] = i; seeds.push(seed); } - let local = LocalRuntime::new(identities.clone(), seeds.clone()); - let mut ready_sessions = local.create_player_sessions().await.unwrap(); + let mut runtime = LocalRuntime::new(identities.clone(), seeds.clone()) + .await + .unwrap(); // check whether parties have sent/received the correct seeds. // P0: [seed_0, seed_2] @@ -368,7 +376,8 @@ mod tests { // P2: [seed_2, seed_1] // This is done by calling next() on the PRFs and see whether they match with // the ones created from scratch. - let prf0 = ready_sessions + let prf0 = runtime + .sessions .get_mut(&"alice".into()) .unwrap() .prf_as_mut(); @@ -381,7 +390,11 @@ mod tests { Prf::new(seeds[0], seeds[2]).get_prev_prf().next_u64() ); - let prf1 = ready_sessions.get_mut(&"bob".into()).unwrap().prf_as_mut(); + let prf1 = runtime + .sessions + .get_mut(&"bob".into()) + .unwrap() + .prf_as_mut(); assert_eq!( prf1.get_my_prf().next_u64(), Prf::new(seeds[1], seeds[0]).get_my_prf().next_u64() @@ -391,7 +404,8 @@ mod tests { Prf::new(seeds[1], seeds[0]).get_prev_prf().next_u64() ); - let prf2 = ready_sessions + let prf2 = runtime + .sessions .get_mut(&"charlie".into()) .unwrap() .prf_as_mut(); @@ -464,12 +478,13 @@ mod tests { seed[0] = i; seeds.push(seed); } - let local = LocalRuntime::new(identities.clone(), seeds.clone()); - let ready_sessions = local.create_player_sessions().await.unwrap(); + let runtime = LocalRuntime::new(identities.clone(), seeds.clone()) + .await + .unwrap(); let mut jobs = JoinSet::new(); for player in identities.iter() { - let mut player_session = ready_sessions.get(player).unwrap().clone(); + let mut player_session = runtime.sessions.get(player).unwrap().clone(); let four_shares = four_share_map.get(player).unwrap().clone(); jobs.spawn(async move { let out_shared = cross_mul_via_lift( @@ -537,8 +552,7 @@ mod tests { #[case(1)] #[case(2)] async fn test_galois_ring_to_rep3(#[case] seed: u64) { - let runtime = LocalRuntime::replicated_test_config(); - let ready_sessions = runtime.create_player_sessions().await.unwrap(); + let runtime = LocalRuntime::replicated_test_config().await.unwrap(); let mut rng = AesRng::seed_from_u64(seed); let iris_db = IrisDB::new_random_rng(2, &mut rng).db; @@ -548,7 +562,7 @@ mod tests { let mut jobs = JoinSet::new(); for (index, player) in runtime.identities.iter().cloned().enumerate() { - let mut player_session = ready_sessions.get(&player).unwrap().clone(); + let mut player_session = runtime.sessions.get(&player).unwrap().clone(); let mut own_shares = vec![(first_entry[index].clone(), second_entry[index].clone())]; own_shares.iter_mut().for_each(|(_x, y)| { y.code.preprocess_iris_code_query_share(); diff --git a/iris-mpc-cpu/src/shares/share.rs b/iris-mpc-cpu/src/shares/share.rs index 4092760b4..2b5a584df 100644 --- a/iris-mpc-cpu/src/shares/share.rs +++ b/iris-mpc-cpu/src/shares/share.rs @@ -29,7 +29,7 @@ impl Share { } pub fn add_assign_const_role(&mut self, other: T, role: Role) { - match role.zero_based() { + match role.index() { 0 => self.a += RingElement(other), 1 => self.b += RingElement(other), 2 => {} @@ -319,3 +319,20 @@ impl Shl for Share { } } } + +// Additive share of a Hamming distance value +#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[serde(bound = "")] +pub struct DistanceShare { + pub code_dot: Share, + pub mask_dot: Share, +} + +impl DistanceShare +where + T: IntRing2k, +{ + pub fn new(code_dot: Share, mask_dot: Share) -> Self { + DistanceShare { code_dot, mask_dot } + } +} From f371fa82871588e47fcf392a4bb8c0469498912b Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 20 Nov 2024 00:51:48 -0800 Subject: [PATCH 005/170] filter out matches outside batchsize (#695) --- iris-mpc-gpu/src/server/actor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index df4814f1a..97e00b9a2 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -784,7 +784,7 @@ impl ServerActor { .iter() .map(|ids| { ids.iter() - .filter(|&&x| x > (u32::MAX - self.max_batch_size as u32)) + .filter(|&&x| x > (u32::MAX - batch_size as u32)) // ignore matches outside the batch size (dummy matches) .map(|&x| batch.request_ids[(u32::MAX - x) as usize].clone()) .collect::>() }) From 5432e57e76b90b43c078563681576d3f527920db Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 20 Nov 2024 10:50:54 +0100 Subject: [PATCH 006/170] scale pods to 0 and update image (#696) --- deploy/prod/common-values-iris-mpc.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 65bf6a5e5..6a4b77744 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.9.10" +image: "ghcr.io/worldcoin/iris-mpc:v0.9.11" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From 72b0b104cfc1a5f310560f071ca4b94309686799 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:40:39 +0100 Subject: [PATCH 007/170] chore: Scale up prod (#697) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 6a4b77744..ff27b1fd5 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.9.11" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From 4928580f2b23bcc8cf68de75b5a77239462badd5 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 20 Nov 2024 13:58:43 -0800 Subject: [PATCH 008/170] destroy cuda events (#700) * destroy cuda events * fix --- iris-mpc-gpu/src/helpers/device_manager.rs | 6 ++++++ iris-mpc-gpu/src/server/actor.rs | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/iris-mpc-gpu/src/helpers/device_manager.rs b/iris-mpc-gpu/src/helpers/device_manager.rs index f7f7b098e..fe3f7563f 100644 --- a/iris-mpc-gpu/src/helpers/device_manager.rs +++ b/iris-mpc-gpu/src/helpers/device_manager.rs @@ -102,6 +102,12 @@ impl DeviceManager { events } + pub fn destroy_events(&self, events: Vec) { + for event in events { + unsafe { event::destroy(event).unwrap() }; + } + } + pub fn record_event(&self, streams: &[CudaStream], events: &[CUevent]) { for idx in 0..self.devices.len() { unsafe { diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 97e00b9a2..5779a2858 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -1204,6 +1204,11 @@ impl ServerActor { // ---- END PHASE 2 ---- + // Destroy events + self.device_manager.destroy_events(current_dot_event); + self.device_manager.destroy_events(current_exchange_event); + self.device_manager.destroy_events(current_phase2_event); + // Update events for synchronization current_dot_event = next_dot_event; current_exchange_event = next_exchange_event; From 209b1154609a3b0b4495f5e7bc4af61b46495410 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Thu, 21 Nov 2024 09:23:45 +0100 Subject: [PATCH 009/170] Add metric for DB sync rollbacks (#699) --- iris-mpc/src/bin/server.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 09026a092..221655405 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -731,6 +731,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { return Ok(()); } }; + tracing::info!("Database store length is: {}", store_len); if let Some(db_len) = sync_result.must_rollback_storage() { tracing::error!("Databases are out-of-sync: {:?}", sync_result); @@ -741,8 +742,13 @@ async fn server_main(config: Config) -> eyre::Result<()> { db_len, )); } + tracing::warn!( + "Rolling back from database length {} to other nodes length {}", + store_len, + db_len + ); tokio::runtime::Handle::current().block_on(async { store.rollback(db_len).await })?; - tracing::error!("Rolled back to db_len={}", db_len); + metrics::counter!("db.sync.rollback").increment(1); } tracing::info!("Starting server actor"); From 00b4e6175458101ed90cf6ed446f5a43ae9b6ae7 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Thu, 21 Nov 2024 00:26:58 -0800 Subject: [PATCH 010/170] metric about used and free gpu memory (#701) * metric about used and free gpu memory * improve --- iris-mpc-gpu/src/server/actor.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 5779a2858..6452f28e7 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -16,7 +16,7 @@ use crate::{ use cudarc::{ cublas::CudaBlas, driver::{ - result::{self, event::elapsed}, + result::{self, event::elapsed, mem_get_info}, sys::CUevent, CudaDevice, CudaSlice, CudaStream, DevicePtr, DeviceSlice, }, @@ -933,6 +933,21 @@ impl ServerActor { metrics::gauge!("batch_size").set(batch_size as f64); metrics::gauge!("max_batch_size").set(self.max_batch_size as f64); + // Update GPU memory metrics + let mut sum_free = 0; + let mut sum_total = 0; + for i in 0..self.device_manager.device_count() { + let device = self.device_manager.device(i); + unsafe { result::ctx::set_current(*device.cu_primary_ctx()) }.unwrap(); + let (free, total) = mem_get_info()?; + metrics::gauge!(format!("gpu_memory_free_{}", i)).set(free as f64); + metrics::gauge!(format!("gpu_memory_total_{}", i)).set(total as f64); + sum_free += free; + sum_total += total; + } + metrics::gauge!("gpu_memory_free_sum").set(sum_free as f64); + metrics::gauge!("gpu_memory_total_sum").set(sum_total as f64); + Ok(()) } From e1ff515d12b9cf6b51324a97c55f1b169a286023 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Thu, 21 Nov 2024 04:17:31 -0800 Subject: [PATCH 011/170] manually manage serial id (#702) * manually manage serial id * fix * . * . * . * . * . * make i64 --- .../20241121084719_remove_sequence.sql | 1 + iris-mpc-store/src/lib.rs | 115 ++++++++---------- .../src/bin/tcp_upgrade_server.rs | 8 -- iris-mpc-upgrade/src/lib.rs | 6 - iris-mpc/src/bin/server.rs | 51 +++----- 5 files changed, 66 insertions(+), 115 deletions(-) create mode 100644 iris-mpc-store/migrations/20241121084719_remove_sequence.sql diff --git a/iris-mpc-store/migrations/20241121084719_remove_sequence.sql b/iris-mpc-store/migrations/20241121084719_remove_sequence.sql new file mode 100644 index 000000000..b78151277 --- /dev/null +++ b/iris-mpc-store/migrations/20241121084719_remove_sequence.sql @@ -0,0 +1 @@ +ALTER TABLE irises ALTER COLUMN id DROP IDENTITY IF EXISTS; \ No newline at end of file diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index 9a0fdd883..8e5904b4c 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -69,6 +69,7 @@ impl StoredIris { #[derive(Clone)] pub struct StoredIrisRef<'a> { + pub id: i64, pub left_code: &'a [u16], pub left_mask: &'a [u16], pub right_code: &'a [u16], @@ -190,9 +191,10 @@ impl Store { return Ok(vec![]); } let mut query = sqlx::QueryBuilder::new( - "INSERT INTO irises (left_code, left_mask, right_code, right_mask)", + "INSERT INTO irises (id, left_code, left_mask, right_code, right_mask)", ); query.push_values(codes_and_masks, |mut query, iris| { + query.push_bind(iris.id); query.push_bind(cast_slice::(iris.left_code)); query.push_bind(cast_slice::(iris.left_mask)); query.push_bind(cast_slice::(iris.right_code)); @@ -290,27 +292,6 @@ DO UPDATE SET right_code = EXCLUDED.right_code, right_mask = EXCLUDED.right_mask Ok(()) } - async fn set_sequence_id( - &self, - id: usize, - executor: impl sqlx::Executor<'_, Database = Postgres>, - ) -> Result<()> { - if id == 0 { - // If requested id is 0 (only used in tests), reset the sequence to 1 with - // advance_nextval set to false. This is because serial id starts from 1. - sqlx::query("SELECT setval(pg_get_serial_sequence('irises', 'id'), 1, false)") - .execute(executor) - .await?; - } else { - sqlx::query("SELECT setval(pg_get_serial_sequence('irises', 'id'), $1, true)") - .bind(id as i64) - .execute(executor) - .await?; - } - - Ok(()) - } - pub async fn rollback(&self, db_len: usize) -> Result<()> { let mut tx = self.pool.begin().await?; @@ -319,18 +300,12 @@ DO UPDATE SET right_code = EXCLUDED.right_code, right_mask = EXCLUDED.right_mask .execute(&mut *tx) .await?; - self.set_sequence_id(db_len, &mut *tx).await?; - tx.commit().await?; Ok(()) } - pub async fn set_irises_sequence_id(&self, id: usize) -> Result<()> { - self.set_sequence_id(id, &self.pool).await - } - - pub async fn get_irises_sequence_id(&self) -> Result { - let id: (i64,) = sqlx::query_as("SELECT last_value FROM irises_id_seq") + pub async fn get_max_serial_id(&self) -> Result { + let id: (i64,) = sqlx::query_as("SELECT MAX(id) FROM irises") .fetch_one(&self.pool) .await?; Ok(id.0 as usize) @@ -353,16 +328,6 @@ DO UPDATE SET right_code = EXCLUDED.right_code, right_mask = EXCLUDED.right_mask Ok(()) } - pub async fn update_iris_id_sequence(&self) -> Result<()> { - sqlx::query( - "SELECT setval(pg_get_serial_sequence('irises', 'id'), COALESCE(MAX(id), 0), true) \ - FROM irises", - ) - .execute(&self.pool) - .await?; - Ok(()) - } - pub async fn last_results(&self, count: usize) -> Result> { let mut result_events: Vec = sqlx::query_scalar("SELECT result_event FROM results ORDER BY id DESC LIMIT $1") @@ -442,6 +407,7 @@ DO UPDATE SET right_code = EXCLUDED.right_code, right_mask = EXCLUDED.right_mask // inserting shares and masks in the db. Reusing the same share and mask for // left and right self.insert_irises(&mut tx, &[StoredIrisRef { + id: (i + 1) as i64, left_code: &share.coefs, left_mask: &mask.coefs, right_code: &share.coefs, @@ -505,18 +471,21 @@ mod tests { let codes_and_masks = &[ StoredIrisRef { + id: 1, left_code: &[1, 2, 3, 4], left_mask: &[5, 6, 7, 8], right_code: &[9, 10, 11, 12], right_mask: &[13, 14, 15, 16], }, StoredIrisRef { + id: 2, left_code: &[1117, 18, 19, 20], left_mask: &[21, 1122, 23, 24], right_code: &[25, 26, 1127, 28], right_mask: &[29, 30, 31, 1132], }, StoredIrisRef { + id: 3, left_code: &[17, 18, 19, 20], left_mask: &[21, 22, 23, 24], // Empty is allowed until stereo is implemented. @@ -568,18 +537,23 @@ mod tests { #[tokio::test] async fn test_insert_many() -> Result<()> { - let count = 1 << 3; + let count: usize = 1 << 3; let schema_name = temporary_name(); let store = Store::new(&test_db_url()?, &schema_name).await?; - let iris = StoredIrisRef { - left_code: &[123_u16; 12800], - left_mask: &[456_u16; 12800], - right_code: &[789_u16; 12800], - right_mask: &[101_u16; 12800], - }; - let codes_and_masks = vec![iris; count]; + let mut codes_and_masks = vec![]; + + for i in 0..count { + let iris = StoredIrisRef { + id: (i + 1) as i64, + left_code: &[123_u16; 12800], + left_mask: &[456_u16; 12800], + right_code: &[789_u16; 12800], + right_mask: &[101_u16; 12800], + }; + codes_and_masks.push(iris); + } let result_event = serde_json::to_string(&UniquenessResult::new( 0, @@ -641,15 +615,20 @@ mod tests { let schema_name = temporary_name(); let store = Store::new(&test_db_url()?, &schema_name).await?; - let iris = StoredIrisRef { - left_code: &[123_u16; 12800], - left_mask: &[456_u16; 12800], - right_code: &[789_u16; 12800], - right_mask: &[101_u16; 12800], - }; + let mut irises = vec![]; + for i in 0..10 { + let iris = StoredIrisRef { + id: (i + 1) as i64, + left_code: &[123_u16; 12800], + left_mask: &[456_u16; 12800], + right_code: &[789_u16; 12800], + right_mask: &[101_u16; 12800], + }; + irises.push(iris); + } let mut tx = store.tx().await?; - store.insert_irises(&mut tx, &vec![iris; 10]).await?; + store.insert_irises(&mut tx, &irises).await?; tx.commit().await?; store.rollback(5).await?; @@ -779,31 +758,37 @@ mod tests { let store = Store::new(&test_db_url()?, &schema_name).await?; // insert two irises into db - let iris = StoredIrisRef { + let iris1 = StoredIrisRef { + id: 1, left_code: &[123_u16; 12800], left_mask: &[456_u16; 6400], right_code: &[789_u16; 12800], right_mask: &[101_u16; 6400], }; + let mut iris2 = iris1.clone(); + iris2.id = 2; + let mut tx = store.tx().await?; - store.insert_irises(&mut tx, &vec![iris.clone(); 2]).await?; + store + .insert_irises(&mut tx, &[iris1, iris2.clone()]) + .await?; tx.commit().await?; // update iris with id 1 in db let updated_left_code = GaloisRingIrisCodeShare { - id: 0, + id: 1, coefs: [666_u16; 12800], }; let updated_left_mask = GaloisRingTrimmedMaskCodeShare { - id: 0, + id: 1, coefs: [777_u16; 6400], }; let updated_right_code = GaloisRingIrisCodeShare { - id: 0, + id: 1, coefs: [888_u16; 12800], }; let updated_right_mask = GaloisRingTrimmedMaskCodeShare { - id: 0, + id: 1, coefs: [999_u16; 6400], }; store @@ -825,10 +810,10 @@ mod tests { assert_eq!(cast_u8_to_u16(&got[0].right_mask), updated_right_mask.coefs); // assert the other iris in db is not updated - assert_eq!(cast_u8_to_u16(&got[1].left_code), iris.left_code); - assert_eq!(cast_u8_to_u16(&got[1].left_mask), iris.left_mask); - assert_eq!(cast_u8_to_u16(&got[1].right_code), iris.right_code); - assert_eq!(cast_u8_to_u16(&got[1].right_mask), iris.right_mask); + assert_eq!(cast_u8_to_u16(&got[1].left_code), iris2.left_code); + assert_eq!(cast_u8_to_u16(&got[1].left_mask), iris2.left_mask); + assert_eq!(cast_u8_to_u16(&got[1].right_code), iris2.right_code); + assert_eq!(cast_u8_to_u16(&got[1].right_mask), iris2.right_mask); cleanup(&store, &schema_name).await?; Ok(()) diff --git a/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs b/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs index ec30ce02e..bc16cfe07 100644 --- a/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs +++ b/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs @@ -212,10 +212,6 @@ async fn main() -> eyre::Result<()> { client_stream1.write_u8(FINAL_BATCH_SUCCESSFUL_ACK).await?; tracing::info!("Sent final ACK to client1"); - tracing::info!("Updating iris id sequence"); - sink.update_iris_id_sequence().await?; - tracing::info!("Iris id sequence updated"); - Ok(()) } @@ -252,8 +248,4 @@ impl NewIrisShareSink for IrisShareDbSink { } } } - - async fn update_iris_id_sequence(&self) -> eyre::Result<()> { - self.store.update_iris_id_sequence().await - } } diff --git a/iris-mpc-upgrade/src/lib.rs b/iris-mpc-upgrade/src/lib.rs index 73676b66f..b979cdbf4 100644 --- a/iris-mpc-upgrade/src/lib.rs +++ b/iris-mpc-upgrade/src/lib.rs @@ -42,8 +42,6 @@ pub trait NewIrisShareSink { code_share: &[u16; IRIS_CODE_LENGTH], mask_share: &[u16; MASK_CODE_LENGTH], ) -> Result<()>; - - async fn update_iris_id_sequence(&self) -> Result<()>; } #[derive(Debug, Clone)] @@ -83,10 +81,6 @@ impl NewIrisShareSink for IrisShareTestFileSink { file.flush()?; Ok(()) } - - async fn update_iris_id_sequence(&self) -> Result<()> { - Ok(()) - } } #[derive(Clone)] diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 221655405..774669f5a 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -660,36 +660,19 @@ async fn server_main(config: Config) -> eyre::Result<()> { tracing::info!("Size of the database after init: {}", store_len); // Check if the sequence id is consistent with the number of irises - let iris_sequence_id = store.get_irises_sequence_id().await?; - if iris_sequence_id != store_len { - tracing::warn!( - "Detected inconsistent iris sequence id {} != {}, resetting...", - iris_sequence_id, + let max_serial_id = store.get_max_serial_id().await?; + if max_serial_id != store_len { + tracing::error!( + "Detected inconsistency between max serial id {} and db size {}.", + max_serial_id, store_len ); - // Reset the sequence id - store.set_irises_sequence_id(store_len).await?; - - // Fetch again and check that the sequence id is consistent now - let store_len = store.count_irises().await?; - let iris_sequence_id = store.get_irises_sequence_id().await?; - - // If db is empty, we set the sequence id to 1 with advance_nextval false - let empty_db_sequence_ok = store_len == 0 && iris_sequence_id == 1; - - if iris_sequence_id != store_len && !empty_db_sequence_ok { - tracing::error!( - "Iris sequence id is still inconsistent: {} != {}", - iris_sequence_id, - store_len - ); - eyre::bail!( - "Iris sequence id is still inconsistent: {} != {}", - iris_sequence_id, - store_len - ); - } + eyre::bail!( + "Detected inconsistency between max serial id {} and db size {}.", + max_serial_id, + store_len + ); } if store_len > config.max_db_size { @@ -912,7 +895,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { .collect::>>()?; // Insert non-matching queries into the persistent store. - let (memory_serial_ids, codes_and_masks): (Vec, Vec) = matches + let (memory_serial_ids, codes_and_masks): (Vec, Vec) = matches .iter() .enumerate() .filter_map( @@ -920,8 +903,10 @@ async fn server_main(config: Config) -> eyre::Result<()> { |(query_idx, is_match)| if !is_match { Some(query_idx) } else { None }, ) .map(|query_idx| { + let serial_id = (merged_results[query_idx] + 1) as i64; // Get the original vectors from `receive_batch`. - (merged_results[query_idx] + 1, StoredIrisRef { + (serial_id, StoredIrisRef { + id: serial_id, left_code: &store_left.code[query_idx].coefs[..], left_mask: &store_left.mask[query_idx].coefs[..], right_code: &store_right.code[query_idx].coefs[..], @@ -937,13 +922,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { .await?; if !codes_and_masks.is_empty() && !config_bg.disable_persistence { - let db_serial_ids = store_bg - .insert_irises(&mut tx, &codes_and_masks) - .await - .wrap_err("failed to persist queries")? - .iter() - .map(|&x| x as u32) - .collect::>(); + let db_serial_ids = store_bg.insert_irises(&mut tx, &codes_and_masks).await?; // Check if the serial_ids match between memory and db. if memory_serial_ids != db_serial_ids { From 7bec862b6a10e4faebb5bca9ed34955da86482f3 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Thu, 21 Nov 2024 04:31:13 -0800 Subject: [PATCH 012/170] release 0.10.0 (#703) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 5b02b4bf2..0501d8f6c 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.9.10" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.0" environment: stage replicaCount: 1 From 27a083ed9557be1311b6e15e20e4f0471c3f0f9c Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Thu, 21 Nov 2024 06:28:26 -0800 Subject: [PATCH 013/170] release 0.10.0 to prod (#704) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index ff27b1fd5..aac347afc 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.9.11" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.0" environment: prod replicaCount: 1 From 47a08a85c6c36b4b8eab1edef9ee15f0aed9703a Mon Sep 17 00:00:00 2001 From: Daniel Kales <11509575+dkales@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:57:23 +0100 Subject: [PATCH 014/170] Add a ReSharing protocol to both re-share to existing parties and create shares for additional parties (#684) * refactor: no longer have degree 2 galois ring sharing in library at all * feat: add more points to exceptional sequence of galois sharing feat: more generic inverse computation for degree 4 galois rings * test: more exhaustive tests for galois rings * feat: basic reshare protocol implemented To be integrated into client/server infrastructure * feat: also send and check eye in reshare message * Revert "feat: also send and check eye in reshare message" This reverts commit 4f3562abac1a92a32f677375d1918a84487598f0. * feat: include both eyes in the sent message * feat: basic reshare client * feat: basic server and seed binary for v2 dbs * feat: working basic reshare client and server probably lacks robustness + tracing * fix: cargo clippy fixes * fix: more clippy fixes * build(deps): add protobuf-compiler to docker build images and ci * fix: clippy and missing protobuf for docs ci * docs: simple readme for reshare protocol run * docs: update documentation for reshare * Update iris-mpc-upgrade/src/bin/README.md Co-authored-by: Carlo Mazzaferro * reshare deploy values * reshare common values * feat: compute allowed proto message size from batch size * refactor: remove unused config values, make client retry backoff time configurable * document config * feat: derive common seed from AWS KMS * test derive shared secret with localstack * feat: add sanity check on correlated randomness * workaround for localstack bug for local testing * clippy fix * unify the store functionality to not have 2 similar structs due to new main changes * reshare common values * consolidate reshare server values * add server and client to dockerfiles * typo --------- Co-authored-by: Carlo Mazzaferro Co-authored-by: Carlo Mazzaferro --- .github/workflows/build-all-targets.yml | 6 +- .github/workflows/doc.yml | 4 +- .github/workflows/lint-clippy.yaml | 4 +- .github/workflows/run-unit-tests.yaml | 6 +- .github/workflows/test-gpu.yaml | 4 +- Cargo.lock | 232 +++++- Dockerfile | 5 +- Dockerfile.base | 10 +- Dockerfile.debug | 1 + Dockerfile.nccl | 1 + Dockerfile.nocuda | 4 +- Dockerfile.shares-encoding | 1 + .../stage/common-values-reshare-server.yaml | 128 +++ .../smpcv2-1-stage/values-reshare-server.yaml | 51 ++ iris-mpc-common/src/galois.rs | 450 ++-------- iris-mpc-common/src/galois_engine.rs | 32 +- iris-mpc-store/src/lib.rs | 30 + iris-mpc-upgrade/Cargo.toml | 27 +- iris-mpc-upgrade/build.rs | 11 + iris-mpc-upgrade/protos/reshare.proto | 35 + iris-mpc-upgrade/src/bin/.gitignore | 1 + iris-mpc-upgrade/src/bin/README.md | 76 +- iris-mpc-upgrade/src/bin/docker-compose.yaml | 16 +- iris-mpc-upgrade/src/bin/reshare-client.rs | 154 ++++ .../src/bin/reshare-protocol-local.sh | 43 + iris-mpc-upgrade/src/bin/reshare-server.rs | 56 ++ iris-mpc-upgrade/src/bin/seed_v2_dbs.rs | 145 ++++ iris-mpc-upgrade/src/config.rs | 100 +++ iris-mpc-upgrade/src/lib.rs | 2 + .../src/proto/iris_mpc_reshare.rs | 368 +++++++++ iris-mpc-upgrade/src/proto/mod.rs | 30 + iris-mpc-upgrade/src/reshare.rs | 773 ++++++++++++++++++ 32 files changed, 2410 insertions(+), 396 deletions(-) create mode 100644 deploy/stage/common-values-reshare-server.yaml create mode 100644 deploy/stage/smpcv2-1-stage/values-reshare-server.yaml create mode 100644 iris-mpc-upgrade/build.rs create mode 100644 iris-mpc-upgrade/protos/reshare.proto create mode 100644 iris-mpc-upgrade/src/bin/reshare-client.rs create mode 100755 iris-mpc-upgrade/src/bin/reshare-protocol-local.sh create mode 100644 iris-mpc-upgrade/src/bin/reshare-server.rs create mode 100644 iris-mpc-upgrade/src/bin/seed_v2_dbs.rs create mode 100644 iris-mpc-upgrade/src/proto/iris_mpc_reshare.rs create mode 100644 iris-mpc-upgrade/src/proto/mod.rs create mode 100644 iris-mpc-upgrade/src/reshare.rs diff --git a/.github/workflows/build-all-targets.yml b/.github/workflows/build-all-targets.yml index 368ec8d38..ad47edf97 100644 --- a/.github/workflows/build-all-targets.yml +++ b/.github/workflows/build-all-targets.yml @@ -4,7 +4,7 @@ on: push: concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" cancel-in-progress: true jobs: @@ -13,6 +13,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + - name: Install Dependencies + run: sudo apt install protobuf-compiler - name: Cache build products uses: Swatinem/rust-cache@v2.7.3 with: @@ -31,6 +33,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + - name: Install Dependencies + run: sudo apt install protobuf-compiler - name: Cache build products uses: Swatinem/rust-cache@v2.7.3 with: diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index b776b29cd..6a0d0ac49 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -4,7 +4,7 @@ on: push: concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" cancel-in-progress: true jobs: @@ -13,6 +13,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + - name: Install Dependencies + run: sudo apt install protobuf-compiler - name: Show errors inline uses: r7kamura/rust-problem-matchers@v1 - name: Install Rust nightly diff --git a/.github/workflows/lint-clippy.yaml b/.github/workflows/lint-clippy.yaml index 760c79520..d5620c21c 100644 --- a/.github/workflows/lint-clippy.yaml +++ b/.github/workflows/lint-clippy.yaml @@ -4,7 +4,7 @@ on: push: concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" cancel-in-progress: true permissions: @@ -21,6 +21,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + - name: Install Dependencies + run: sudo apt install protobuf-compiler - name: Install Rust nightly run: rustup toolchain install nightly-2024-07-10 - name: Set Rust nightly as default diff --git a/.github/workflows/run-unit-tests.yaml b/.github/workflows/run-unit-tests.yaml index 382511b2a..bf26e176e 100644 --- a/.github/workflows/run-unit-tests.yaml +++ b/.github/workflows/run-unit-tests.yaml @@ -5,10 +5,10 @@ on: branches: - main pull_request: - types: [ opened, synchronize ] + types: [opened, synchronize] concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}" cancel-in-progress: true jobs: @@ -28,6 +28,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + - name: Install Dependencies + run: sudo apt install protobuf-compiler - name: Cache build products uses: Swatinem/rust-cache@v2.7.3 with: diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml index a0f9af714..633cf7010 100644 --- a/.github/workflows/test-gpu.yaml +++ b/.github/workflows/test-gpu.yaml @@ -30,8 +30,8 @@ jobs: sudo ln -sf /usr/bin/gcc-11 /usr/bin/gcc gcc --version - - name: Install OpenSSL && pkg-config - run: sudo apt-get update && sudo apt-get install -y pkg-config libssl-dev + - name: Install OpenSSL && pkg-config && protobuf-compiler + run: sudo apt-get update && sudo apt-get install -y pkg-config libssl-dev protobuf-compiler - name: Install CUDA and NCCL dependencies if: steps.cache-cuda-nccl.outputs.cache-hit != 'true' diff --git a/Cargo.lock b/Cargo.lock index fb3c06e6f..811329012 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -42,7 +42,7 @@ dependencies = [ [[package]] name = "aes-prng" version = "0.2.1" -source = "git+https://github.com/tf-encrypted/aes-prng.git?branch=dragos/display#ebe79b1173ab6698c69d18dc464294f1893b44bb" +source = "git+https://github.com/tf-encrypted/aes-prng.git?branch=dragos%2Fdisplay#ebe79b1173ab6698c69d18dc464294f1893b44bb" dependencies = [ "aes", "byteorder", @@ -147,6 +147,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + [[package]] name = "arraydeque" version = "0.5.1" @@ -175,6 +181,28 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + [[package]] name = "async-trait" version = "0.1.83" @@ -726,7 +754,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.1", "tokio", - "tower", + "tower 0.5.1", "tower-layer", "tower-service", "tracing", @@ -886,7 +914,7 @@ dependencies = [ "base64 0.13.1", "bitvec", "hex", - "indexmap", + "indexmap 2.6.0", "js-sys", "once_cell", "rand", @@ -1755,6 +1783,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -1869,7 +1903,7 @@ version = "7.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b724496da7c26fcce66458526ce68fc2ecf4aaaa994281cf322ded5755520c" dependencies = [ - "fixedbitset", + "fixedbitset 0.5.7", "futures-buffered", "futures-core", "futures-lite", @@ -2043,7 +2077,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -2062,7 +2096,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -2079,6 +2113,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" @@ -2372,6 +2412,19 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.5.0", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.5.0" @@ -2476,6 +2529,16 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.6.0" @@ -2624,7 +2687,7 @@ dependencies = [ name = "iris-mpc-cpu" version = "0.1.0" dependencies = [ - "aes-prng 0.2.1 (git+https://github.com/tf-encrypted/aes-prng.git?branch=dragos/display)", + "aes-prng 0.2.1 (git+https://github.com/tf-encrypted/aes-prng.git?branch=dragos%2Fdisplay)", "async-channel", "async-trait", "bincode", @@ -2710,19 +2773,25 @@ dependencies = [ "float_eq", "futures", "futures-concurrency", + "hkdf", "indicatif", "iris-mpc-common", "iris-mpc-store", "itertools 0.13.0", "mpc", + "prost", "rand", "rand_chacha", "rcgen", "serde", "serde-big-array", + "sha2", "sqlx", + "thiserror", "tokio", "tokio-native-tls", + "tonic", + "tonic-build", "tracing", "tracing-subscriber", ] @@ -3028,7 +3097,7 @@ dependencies = [ "hyper 1.5.0", "hyper-rustls 0.27.3", "hyper-util", - "indexmap", + "indexmap 2.6.0", "ipnet", "metrics 0.23.0", "metrics-util", @@ -3204,6 +3273,12 @@ dependencies = [ "url", ] +[[package]] +name = "multimap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + [[package]] name = "native-tls" version = "0.2.12" @@ -3422,7 +3497,7 @@ dependencies = [ "ahash", "futures-core", "http 1.1.0", - "indexmap", + "indexmap 2.6.0", "itertools 0.11.0", "itoa", "once_cell", @@ -3651,6 +3726,16 @@ dependencies = [ "sha2", ] +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.6.0", +] + [[package]] name = "pin-project" version = "1.1.7" @@ -3806,6 +3891,59 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.13.0", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 2.0.85", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + [[package]] name = "quanta" version = "0.12.3" @@ -4273,6 +4411,7 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", + "ring", "rustls-pki-types", "rustls-webpki 0.102.8", "subtle", @@ -4504,7 +4643,7 @@ version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ - "indexmap", + "indexmap 2.6.0", "itoa", "memchr", "ryu", @@ -4771,7 +4910,7 @@ dependencies = [ "hashbrown 0.14.5", "hashlink 0.9.1", "hex", - "indexmap", + "indexmap 2.6.0", "log", "memchr", "native-tls", @@ -5320,13 +5459,80 @@ version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap", + "indexmap 2.6.0", "serde", "serde_spanned", "toml_datetime", "winnow", ] +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.0", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs 0.8.0", + "rustls-pemfile 2.2.0", + "socket2 0.5.7", + "tokio", + "tokio-rustls 0.26.0", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.5.1" diff --git a/Dockerfile b/Dockerfile index 4ea9b9ef3..147996daf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \ devscripts \ debhelper \ ca-certificates \ + protobuf-compiler \ wget RUN curl https://sh.rustup.rs -sSf | sh -s -- -y @@ -27,7 +28,7 @@ RUN cargo install cargo-build-deps \ FROM --platform=linux/amd64 build-image as build-app WORKDIR /src/gpu-iris-mpc COPY . . -RUN cargo build --release --target x86_64-unknown-linux-gnu --bin nccl --bin server --bin client --bin key-manager --bin upgrade-server --bin upgrade-client --bin upgrade-checker +RUN cargo build --release --target x86_64-unknown-linux-gnu --bin nccl --bin server --bin client --bin key-manager --bin upgrade-server --bin upgrade-client --bin upgrade-checker --bin reshare-server --bin reshare-client FROM --platform=linux/amd64 ghcr.io/worldcoin/iris-mpc-base:cuda12_2-nccl2_22_3_1 ENV DEBIAN_FRONTEND=noninteractive @@ -40,6 +41,8 @@ COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/ COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-server /bin/upgrade-server COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-client /bin/upgrade-client COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-checker /bin/upgrade-checker +COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/reshare-server /bin/reshare-server +COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/reshare-client /bin/reshare-client USER 65534 ENTRYPOINT ["/bin/server"] diff --git a/Dockerfile.base b/Dockerfile.base index c82f08130..5ddd4c845 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -1,6 +1,6 @@ FROM --platform=linux/amd64 ubuntu:22.04 as build-image -RUN apt-get update && apt-get install -y pkg-config wget libssl-dev ca-certificates \ +RUN apt-get update && apt-get install -y pkg-config wget libssl-dev ca-certificates protobuf-compiler \ && rm -rf /var/lib/apt/lists/* RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \ && dpkg -i cuda-keyring_1.1-1_all.deb \ @@ -41,8 +41,8 @@ RUN cd /tmp \ && git checkout ${AWS_OFI_NCCL_VERSION} \ && ./autogen.sh \ && ./configure --prefix=/opt/aws-ofi-nccl/install \ - --with-libfabric=/opt/amazon/efa/ \ - --with-cuda=/usr/local/cuda \ - --with-nccl=/tmp/nccl/build \ - --with-mpi=/opt/amazon/openmpi/ \ + --with-libfabric=/opt/amazon/efa/ \ + --with-cuda=/usr/local/cuda \ + --with-nccl=/tmp/nccl/build \ + --with-mpi=/opt/amazon/openmpi/ \ && make && make install diff --git a/Dockerfile.debug b/Dockerfile.debug index 83758e93c..2e1a1bd76 100644 --- a/Dockerfile.debug +++ b/Dockerfile.debug @@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y \ devscripts \ debhelper \ ca-certificates \ + protobuf-compiler \ wget RUN curl https://sh.rustup.rs -sSf | sh -s -- -y diff --git a/Dockerfile.nccl b/Dockerfile.nccl index edf79ecc3..d6c5a3db7 100644 --- a/Dockerfile.nccl +++ b/Dockerfile.nccl @@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y \ build-essential \ libssl-dev \ ca-certificates \ + protobuf-compiler \ wget RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \ diff --git a/Dockerfile.nocuda b/Dockerfile.nocuda index 00adf1498..22585d42d 100644 --- a/Dockerfile.nocuda +++ b/Dockerfile.nocuda @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \ devscripts \ debhelper \ ca-certificates \ + protobuf-compiler \ wget RUN curl https://sh.rustup.rs -sSf | sh -s -- -y @@ -28,7 +29,7 @@ FROM --platform=linux/amd64 build-image as build-app WORKDIR /src/gpu-iris-mpc COPY . . -RUN cargo build --release --target x86_64-unknown-linux-gnu --bin seed-v1-dbs --bin upgrade-server --bin upgrade-client --bin upgrade-checker +RUN cargo build --release --target x86_64-unknown-linux-gnu --bin seed-v1-dbs --bin upgrade-server --bin upgrade-client --bin upgrade-checker --bin reshare-server FROM --platform=linux/amd64 ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive @@ -41,6 +42,7 @@ COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/ COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-server /bin/upgrade-server COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-client /bin/upgrade-client COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-checker /bin/upgrade-checker +COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/reshare-server /bin/reshare-server USER 65534 ENTRYPOINT ["/bin/upgrade-server"] diff --git a/Dockerfile.shares-encoding b/Dockerfile.shares-encoding index 719a5d2e7..ee7a39d4f 100644 --- a/Dockerfile.shares-encoding +++ b/Dockerfile.shares-encoding @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \ devscripts \ debhelper \ ca-certificates \ + protobuf-compiler \ wget RUN curl https://sh.rustup.rs -sSf | sh -s -- -y diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml new file mode 100644 index 000000000..1082b1063 --- /dev/null +++ b/deploy/stage/common-values-reshare-server.yaml @@ -0,0 +1,128 @@ +image: "ghcr.io/worldcoin/iris-mpc:v0.8.25" + +environment: stage +replicaCount: 1 + +strategy: + type: Recreate + +datadog: + enabled: true + +# Nginx exposes the only port required here +ports: + - containerPort: 3000 + name: health + protocol: TCP + +startupProbe: + httpGet: + path: /health + port: health + +livenessProbe: + httpGet: + path: /health + port: health + +readinessProbe: + periodSeconds: 30 + failureThreshold: 10 + httpGet: + path: /health + port: health + +resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 1 + memory: 1Gi + +imagePullSecrets: + - name: github-secret + +nodeSelector: + kubernetes.io/arch: amd64 + beta.kubernetes.io/instance-type: t3.2xlarge + +podSecurityContext: + runAsUser: 65534 + runAsGroup: 65534 + +serviceAccount: + create: true + +command: [ "/bin/reshare-server" ] + +env: + - name: SMPC__DATABASE__URL + valueFrom: + secretKeyRef: + key: DATABASE_AURORA_URL + name: application + - name: RUST_LOG + value: info + - name: ENVIRONMENT + value: stage + +service: + enabled: false + +nginxSidecar: + enabled: true + port: 8443 + secrets: + enabled: true + volumeMount: + - name: mounted-secret-name + mountPath: /etc/nginx/cert + volume: + - name: mounted-secret-name + secret: + secretName: application + items: + - key: certificate.crt + path: certificate.crt + - key: key.pem + path: key.pem + optional: false + config: + nginx.conf: | + worker_processes auto; + + error_log /dev/stderr notice; + pid /tmp/nginx.pid; + + events { + worker_connections 1024; + } + + http { + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent"'; + + access_log /dev/stdout main; + + server { + listen 8443 http2 ssl; + + ssl_certificate /etc/nginx/cert/certificate.crt; + ssl_certificate_key /etc/nginx/cert/key.pem; + + ssl_protocols TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + # Enable session resumption to improve performance + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1h; + + location / { + # Forward gRPC traffic to the gRPC server on port 8000 + grpc_pass grpc://127.0.0.1:8000; + } + } + } +} diff --git a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml new file mode 100644 index 000000000..c320df97f --- /dev/null +++ b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml @@ -0,0 +1,51 @@ +args: + - "--bind-addr" + - "0.0.0.0:8000" + - "--db-url" + - "$(SMPC__DATABASE__URL)" + - "--party-id" + - "1" + - "--environment" + - "$(ENVIRONMENT)" + +initContainer: + enabled: true + image: "amazon/aws-cli:2.17.62" + name: "reshare-proto-dns-records-updater" + env: + - name: PARTY_ID + value: "2" + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + configMap: + init.sh: | + #!/usr/bin/env bash + + # Set up environment variables + HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name "$PARTY_ID".stage.smpcv2.worldcoin.dev --query "HostedZones[].Id" --output text) + + # Generate the JSON content in memory + BATCH_JSON=$(cat < Self { - GaloisRingElement { coefs: rng.gen() } - } - - pub fn inverse(&self) -> Self { - // hard-coded inverses for some elements we need - // too lazy to implement the general case in rust - // and we do not need the general case, since this is only used for the lagrange - // polys, which can be pre-computed anyway - - if *self == GaloisRingElement::ZERO { - panic!("Division by zero"); - } - - if *self == GaloisRingElement::ONE { - return GaloisRingElement::ONE; - } - - if *self == -GaloisRingElement::ONE { - return -GaloisRingElement::ONE; - } - if *self == (GaloisRingElement { coefs: [0, 1] }) { - return GaloisRingElement { coefs: [65535, 1] }; - } - if *self == (GaloisRingElement { coefs: [0, 65535] }) { - return GaloisRingElement { coefs: [1, 65535] }; - } - if *self == (GaloisRingElement { coefs: [1, 1] }) { - return GaloisRingElement { coefs: [2, 65535] }; - } - if *self == (GaloisRingElement { coefs: [1, 65535] }) { - return GaloisRingElement { coefs: [0, 65535] }; - } - if *self == (GaloisRingElement { coefs: [65535, 1] }) { - return GaloisRingElement { coefs: [0, 1] }; - } - - panic!("No inverse for {:?} in LUT", self); - } - } - - impl std::ops::Add for GaloisRingElement { - type Output = Self; - fn add(self, rhs: Self) -> Self::Output { - self.add(&rhs) - } - } - impl std::ops::Add<&GaloisRingElement> for GaloisRingElement { - type Output = Self; - fn add(mut self, rhs: &Self) -> Self::Output { - for i in 0..2 { - self.coefs[i] = self.coefs[i].wrapping_add(rhs.coefs[i]); - } - self - } - } - - impl std::ops::Sub for GaloisRingElement { - type Output = Self; - fn sub(self, rhs: Self) -> Self::Output { - self.sub(&rhs) - } - } - impl std::ops::Sub<&GaloisRingElement> for GaloisRingElement { - type Output = Self; - fn sub(mut self, rhs: &Self) -> Self::Output { - for i in 0..2 { - self.coefs[i] = self.coefs[i].wrapping_sub(rhs.coefs[i]); - } - self - } - } - - impl std::ops::Neg for GaloisRingElement { - type Output = Self; - - fn neg(self) -> Self::Output { - GaloisRingElement { - coefs: [self.coefs[0].wrapping_neg(), self.coefs[1].wrapping_neg()], - } - } - } - - impl std::ops::Mul for GaloisRingElement { - type Output = Self; - fn mul(self, rhs: Self) -> Self::Output { - self.mul(&rhs) - } - } - impl std::ops::Mul<&GaloisRingElement> for GaloisRingElement { - type Output = Self; - fn mul(self, rhs: &Self) -> Self::Output { - GaloisRingElement { - coefs: [ - (self.coefs[0].wrapping_mul(rhs.coefs[0])) - .wrapping_add(self.coefs[1].wrapping_mul(rhs.coefs[1])), - (self.coefs[0].wrapping_mul(rhs.coefs[1])) - .wrapping_add(self.coefs[1].wrapping_mul(rhs.coefs[0])) - .wrapping_add(self.coefs[1].wrapping_mul(rhs.coefs[1])), - ], - } - } - } - - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - pub struct ShamirGaloisRingShare { - pub id: usize, - pub y: GaloisRingElement, - } - impl std::ops::Add for ShamirGaloisRingShare { - type Output = Self; - fn add(self, rhs: Self) -> Self::Output { - assert_eq!(self.id, rhs.id, "ids must be euqal"); - ShamirGaloisRingShare { - id: self.id, - y: self.y + rhs.y, - } - } - } - impl std::ops::Mul for ShamirGaloisRingShare { - type Output = Self; - fn mul(self, rhs: Self) -> Self::Output { - assert_eq!(self.id, rhs.id, "ids must be euqal"); - ShamirGaloisRingShare { - id: self.id, - y: self.y * rhs.y, - } - } - } - impl std::ops::Sub for ShamirGaloisRingShare { - type Output = Self; - fn sub(self, rhs: Self) -> Self::Output { - assert_eq!(self.id, rhs.id, "ids must be euqal"); - ShamirGaloisRingShare { - id: self.id, - y: self.y - rhs.y, - } - } - } - - impl ShamirGaloisRingShare { - pub fn encode_3( - input: &GaloisRingElement, - rng: &mut R, - ) -> [ShamirGaloisRingShare; 3] { - let coefs = [*input, GaloisRingElement::random(rng)]; - (1..=3) - .map(|i| { - let element = GaloisRingElement::EXCEPTIONAL_SEQUENCE[i]; - let share = coefs[0] + coefs[1] * element; - ShamirGaloisRingShare { id: i, y: share } - }) - .collect::>() - .as_slice() - .try_into() - .unwrap() - } - - pub fn encode_3_mat( - input: &[u16; 2], - rng: &mut R, - ) -> [ShamirGaloisRingShare; 3] { - let invec = [input[0], input[1], rng.gen(), rng.gen()]; - let share1 = ShamirGaloisRingShare { - id: 1, - y: GaloisRingElement { - coefs: [ - invec[0].wrapping_add(invec[2]), - invec[1].wrapping_add(invec[3]), - ], - }, - }; - let share2 = ShamirGaloisRingShare { - id: 2, - y: GaloisRingElement { - coefs: [ - invec[0].wrapping_add(invec[3]), - invec[1].wrapping_add(invec[2]).wrapping_add(invec[3]), - ], - }, - }; - let share3 = ShamirGaloisRingShare { - id: 3, - y: GaloisRingElement { - coefs: [ - share2.y.coefs[0].wrapping_add(invec[2]), - share2.y.coefs[1].wrapping_add(invec[3]), - ], - }, - }; - [share1, share2, share3] - } - - pub fn deg_1_lagrange_polys_at_zero( - my_id: PartyID, - other_id: PartyID, - ) -> GaloisRingElement { - let mut res = GaloisRingElement::ONE; - let i = usize::from(my_id) + 1; - let j = usize::from(other_id) + 1; - res = res * (-GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]); - res = res - * (GaloisRingElement::EXCEPTIONAL_SEQUENCE[i] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]) - .inverse(); - res - } - - pub fn deg_2_lagrange_polys_at_zero() -> [GaloisRingElement; 3] { - let mut res = [GaloisRingElement::ONE; 3]; - for i in 1..=3 { - for j in 1..=3 { - if j != i { - res[i - 1] = res[i - 1] * (-GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]); - res[i - 1] = res[i - 1] - * (GaloisRingElement::EXCEPTIONAL_SEQUENCE[i] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]) - .inverse(); - } - } - } - res - } - - pub fn reconstruct_deg_2_shares(shares: &[ShamirGaloisRingShare; 3]) -> GaloisRingElement { - let lagrange_polys_at_zero = Self::deg_2_lagrange_polys_at_zero(); - shares - .iter() - .map(|s| s.y * lagrange_polys_at_zero[s.id - 1]) - .reduce(|a, b| a + b) - .unwrap() - } - } - - #[cfg(test)] - mod tests { - use super::{GaloisRingElement, ShamirGaloisRingShare}; - use rand::thread_rng; - - #[test] - fn inverses() { - for g_e in [ - GaloisRingElement::ONE, - -GaloisRingElement::ONE, - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2], - ] { - assert_eq!(g_e.inverse() * g_e, GaloisRingElement::ONE); - } - } - #[test] - fn sharing() { - let input1 = GaloisRingElement::random(&mut rand::thread_rng()); - let input2 = GaloisRingElement::random(&mut rand::thread_rng()); - - let shares1 = ShamirGaloisRingShare::encode_3(&input1, &mut thread_rng()); - let shares2 = ShamirGaloisRingShare::encode_3(&input2, &mut thread_rng()); - let shares_mul = [ - shares1[0] * shares2[0], - shares1[1] * shares2[1], - shares1[2] * shares2[2], - ]; - - let reconstructed = ShamirGaloisRingShare::reconstruct_deg_2_shares(&shares_mul); - let expected = input1 * input2; - - assert_eq!(reconstructed, expected); - } - #[test] - fn sharing_mat() { - let input1 = GaloisRingElement::random(&mut rand::thread_rng()); - let input2 = GaloisRingElement::random(&mut rand::thread_rng()); - - let shares1 = ShamirGaloisRingShare::encode_3_mat(&input1.coefs, &mut thread_rng()); - let shares2 = ShamirGaloisRingShare::encode_3_mat(&input2.coefs, &mut thread_rng()); - let shares_mul = [ - shares1[0] * shares2[0], - shares1[1] * shares2[1], - shares1[2] * shares2[2], - ]; - - let reconstructed = ShamirGaloisRingShare::reconstruct_deg_2_shares(&shares_mul); - let expected = input1 * input2; - - assert_eq!(reconstructed, expected); - } - } -} - pub mod degree4 { use crate::id::PartyID; use basis::{Basis, Monomial}; @@ -354,12 +34,23 @@ pub mod degree4 { coefs: [1, 0, 0, 0], basis: PhantomData, }; - pub const EXCEPTIONAL_SEQUENCE: [GaloisRingElement; 5] = [ - GaloisRingElement::ZERO, - GaloisRingElement::ONE, + pub const EXCEPTIONAL_SEQUENCE: [GaloisRingElement; 16] = [ + GaloisRingElement::from_coefs([0, 0, 0, 0]), + GaloisRingElement::from_coefs([1, 0, 0, 0]), GaloisRingElement::from_coefs([0, 1, 0, 0]), GaloisRingElement::from_coefs([1, 1, 0, 0]), GaloisRingElement::from_coefs([0, 0, 1, 0]), + GaloisRingElement::from_coefs([1, 0, 1, 0]), + GaloisRingElement::from_coefs([0, 1, 1, 0]), + GaloisRingElement::from_coefs([1, 1, 1, 0]), + GaloisRingElement::from_coefs([0, 0, 0, 1]), + GaloisRingElement::from_coefs([1, 0, 0, 1]), + GaloisRingElement::from_coefs([0, 1, 0, 1]), + GaloisRingElement::from_coefs([1, 1, 0, 1]), + GaloisRingElement::from_coefs([0, 0, 1, 1]), + GaloisRingElement::from_coefs([1, 0, 1, 1]), + GaloisRingElement::from_coefs([0, 1, 1, 1]), + GaloisRingElement::from_coefs([1, 1, 1, 1]), ]; pub fn encode1(x: &[u16]) -> Option> { if x.len() % 4 != 0 { @@ -388,40 +79,47 @@ pub mod degree4 { ) } + /// Inverse of the element, if it exists + /// + /// # Panics + /// + /// This function panics if the element has no inverse pub fn inverse(&self) -> Self { // hard-coded inverses for some elements we need // too lazy to implement the general case in rust // and we do not need the general case, since this is only used for the lagrange // polys, which can be pre-computed anyway - if *self == GaloisRingElement::ZERO { - panic!("Division by zero"); + if self.coefs.iter().all(|x| x % 2 == 0) { + panic!("Element has no inverse"); } - if *self == GaloisRingElement::ONE { - return GaloisRingElement::ONE; - } + // inversion by exponentition by (p^r -1) * p^(m-1) - 1, with p = 2, r = 4, m = + // 16 + const P: u32 = 2; + const R: u32 = 4; + const M: u32 = 16; + const EXP: u32 = (P.pow(R) - 1) * P.pow(M - 1) - 1; - if *self == -GaloisRingElement::ONE { - return -GaloisRingElement::ONE; - } - if *self == GaloisRingElement::from_coefs([0, 1, 0, 0]) { - return GaloisRingElement::from_coefs([65535, 0, 0, 1]); - } - if *self == GaloisRingElement::from_coefs([0, 65535, 0, 0]) { - return GaloisRingElement::from_coefs([1, 0, 0, 65535]); - } - if *self == GaloisRingElement::from_coefs([1, 1, 0, 0]) { - return GaloisRingElement::from_coefs([2, 65535, 1, 65535]); - } - if *self == GaloisRingElement::from_coefs([1, 65535, 0, 0]) { - return GaloisRingElement::from_coefs([0, 65535, 65535, 65535]); + self.pow(EXP) + } + + /// Basic exponentiation by squaring, not constant time + pub fn pow(&self, mut exp: u32) -> Self { + if exp == 0 { + return Self::ONE; } - if *self == GaloisRingElement::from_coefs([65535, 1, 0, 0]) { - return GaloisRingElement::from_coefs([0, 1, 1, 1]); + let mut x = *self; + let mut y = Self::ONE; + while exp > 1 { + if exp % 2 == 1 { + y = x * y; + exp -= 1; + } + x = x * x; + exp /= 2; } - - panic!("No inverse for {:?} in LUT", self); + x * y } #[allow(non_snake_case)] @@ -719,6 +417,28 @@ pub mod degree4 { res } + // zero-indexed party ids here, party i will map to i+1 in the exceptional + // sequence + pub fn deg_1_lagrange_poly_at_v( + my_id: usize, + other_id: usize, + v: usize, + ) -> GaloisRingElement { + assert!(my_id < 15); + assert!(other_id < 15); + assert!(v < 15); + let i = my_id + 1; + let j = other_id + 1; + let v = v + 1; + let mut res = GaloisRingElement::EXCEPTIONAL_SEQUENCE[v] + - GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]; + res = res + * (GaloisRingElement::EXCEPTIONAL_SEQUENCE[i] + - GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]) + .inverse(); + res + } + pub fn deg_2_lagrange_polys_at_zero() -> [GaloisRingElement; 3] { let mut res = [GaloisRingElement::ONE; 3]; for i in 1..=3 { @@ -753,25 +473,25 @@ pub mod degree4 { use crate::galois::degree4::basis; #[test] - fn inverses() { - for g_e in [ - GaloisRingElement::ONE, - -GaloisRingElement::ONE, - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[1], - GaloisRingElement::EXCEPTIONAL_SEQUENCE[3] - - GaloisRingElement::EXCEPTIONAL_SEQUENCE[2], - ] { + fn exceptional_sequence_is_pairwise_diff_invertible() { + for i in 0..GaloisRingElement::EXCEPTIONAL_SEQUENCE.len() { + for j in 0..GaloisRingElement::EXCEPTIONAL_SEQUENCE.len() { + if i != j { + let diff = GaloisRingElement::EXCEPTIONAL_SEQUENCE[i] + - GaloisRingElement::EXCEPTIONAL_SEQUENCE[j]; + assert_eq!(diff.inverse() * diff, GaloisRingElement::ONE); + } + } + } + } + + #[test] + fn random_inverses() { + for _ in 0..100 { + let mut g_e = GaloisRingElement::random(&mut rand::thread_rng()); + // make it have an inverse + g_e.coefs.iter_mut().for_each(|x| *x |= 1); + assert_eq!(g_e.inverse() * g_e, GaloisRingElement::ONE); } } diff --git a/iris-mpc-common/src/galois_engine.rs b/iris-mpc-common/src/galois_engine.rs index c50927192..dce573ed8 100644 --- a/iris-mpc-common/src/galois_engine.rs +++ b/iris-mpc-common/src/galois_engine.rs @@ -3,7 +3,7 @@ pub type CompactGaloisRingShares = Vec>; pub mod degree4 { use crate::{ galois::degree4::{basis, GaloisRingElement, ShamirGaloisRingShare}, - iris_db::iris::IrisCodeArray, + iris_db::iris::{IrisCode, IrisCodeArray}, IRIS_CODE_LENGTH, MASK_CODE_LENGTH, }; use base64::{prelude::BASE64_STANDARD, Engine}; @@ -316,6 +316,36 @@ pub mod degree4 { } } + pub struct FullGaloisRingIrisCodeShare { + pub code: GaloisRingIrisCodeShare, + pub mask: GaloisRingTrimmedMaskCodeShare, + } + + impl FullGaloisRingIrisCodeShare { + pub fn encode_iris_code( + iris: &IrisCode, + rng: &mut (impl Rng + CryptoRng), + ) -> [FullGaloisRingIrisCodeShare; 3] { + let [code0, code1, code2] = + GaloisRingIrisCodeShare::encode_iris_code(&iris.code, &iris.mask, rng); + let [mask0, mask1, mask2] = GaloisRingIrisCodeShare::encode_mask_code(&iris.mask, rng); + [ + FullGaloisRingIrisCodeShare { + code: code0, + mask: mask0.into(), + }, + FullGaloisRingIrisCodeShare { + code: code1, + mask: mask1.into(), + }, + FullGaloisRingIrisCodeShare { + code: code2, + mask: mask2.into(), + }, + ] + } + } + #[cfg(test)] mod tests { use crate::{ diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index 8e5904b4c..15cca4345 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -214,6 +214,36 @@ impl Store { Ok(ids) } + pub async fn insert_irises_overriding( + &self, + tx: &mut Transaction<'_, Postgres>, + codes_and_masks: &[StoredIrisRef<'_>], + ) -> Result<()> { + if codes_and_masks.is_empty() { + return Ok(()); + } + let mut query = sqlx::QueryBuilder::new( + "INSERT INTO irises (id, left_code, left_mask, right_code, right_mask)", + ); + query.push_values(codes_and_masks, |mut query, iris| { + query.push_bind(iris.id); + query.push_bind(cast_slice::(iris.left_code)); + query.push_bind(cast_slice::(iris.left_mask)); + query.push_bind(cast_slice::(iris.right_code)); + query.push_bind(cast_slice::(iris.right_mask)); + }); + query.push( + r#" +ON CONFLICT (id) +DO UPDATE SET left_code = EXCLUDED.left_code, left_mask = EXCLUDED.left_mask, right_code = EXCLUDED.right_code, right_mask = EXCLUDED.right_mask; +"#, + ); + + query.build().execute(tx.deref_mut()).await?; + + Ok(()) + } + /// Update existing iris with given shares. pub async fn update_iris( &self, diff --git a/iris-mpc-upgrade/Cargo.toml b/iris-mpc-upgrade/Cargo.toml index cdc5b2a12..fe1c040f8 100644 --- a/iris-mpc-upgrade/Cargo.toml +++ b/iris-mpc-upgrade/Cargo.toml @@ -11,7 +11,7 @@ repository.workspace = true axum.workspace = true iris-mpc-common = { path = "../iris-mpc-common" } iris-mpc-store = { path = "../iris-mpc-store" } -clap.workspace = true +clap = { workspace = true, features = ["env"] } eyre.workspace = true bytemuck.workspace = true sqlx.workspace = true @@ -30,10 +30,23 @@ mpc-uniqueness-check = { package = "mpc", git = "https://github.com/worldcoin/mp indicatif = "0.17.8" rcgen = "0.13.1" tokio-native-tls = "0.3.1" +tonic = { version = "0.12.3", features = [ + "tls", + "tls-native-roots", + "transport", +] } +prost = "0.13.3" +sha2 = "0.10.8" +thiserror.workspace = true +hkdf = "0.12.4" [dev-dependencies] float_eq = "1" + +[build-dependencies] +tonic-build = "0.12.3" + [[bin]] name = "upgrade-checker" path = "src/bin/checker.rs" @@ -49,3 +62,15 @@ path = "src/bin/tcp_ssl_upgrade_client.rs" [[bin]] name = "seed-v1-dbs" path = "src/bin/seed_v1_dbs.rs" + +[[bin]] +name = "seed-v2-dbs" +path = "src/bin/seed_v2_dbs.rs" + +[[bin]] +name = "reshare-server" +path = "src/bin/reshare-server.rs" + +[[bin]] +name = "reshare-client" +path = "src/bin/reshare-client.rs" diff --git a/iris-mpc-upgrade/build.rs b/iris-mpc-upgrade/build.rs new file mode 100644 index 000000000..8a43d83c9 --- /dev/null +++ b/iris-mpc-upgrade/build.rs @@ -0,0 +1,11 @@ +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=protos/reshare.proto"); + tonic_build::configure() + .out_dir("src/proto/") + .compile_protos( + &["reshare.proto"], // Files in the path + &["protos"], // The include path to search + ) + .unwrap(); +} diff --git a/iris-mpc-upgrade/protos/reshare.proto b/iris-mpc-upgrade/protos/reshare.proto new file mode 100644 index 000000000..3ceef37ab --- /dev/null +++ b/iris-mpc-upgrade/protos/reshare.proto @@ -0,0 +1,35 @@ + +syntax = "proto3"; +package iris_mpc_reshare; + +message IrisCodeReShare { + bytes LeftIrisCodeShare = 1; + bytes LeftMaskShare = 2; + bytes RightIrisCodeShare = 3; + bytes RightMaskShare = 4; +} + +message IrisCodeReShareRequest { + uint64 SenderId = 1; + uint64 OtherId = 2; + uint64 ReceiverId = 3; + int64 IdRangeStartInclusive = 4; + int64 IdRangeEndNonInclusive = 5; + repeated IrisCodeReShare IrisCodeReShares = 6; + bytes ClientCorrelationSanityCheck = 7; +} + +message IrisCodeReShareResponse { + IrisCodeReShareStatus Status = 1; + string Message = 2; +} + +enum IrisCodeReShareStatus { + IRIS_CODE_RE_SHARE_STATUS_OK = 0; + IRIS_CODE_RE_SHARE_STATUS_FULL_QUEUE = 1; + IRIS_CODE_RE_SHARE_STATUS_ERROR = 2; +} + +service IrisCodeReShareService { + rpc ReShare(IrisCodeReShareRequest) returns (IrisCodeReShareResponse); +} diff --git a/iris-mpc-upgrade/src/bin/.gitignore b/iris-mpc-upgrade/src/bin/.gitignore index c3d47bb9d..cc0468a13 100644 --- a/iris-mpc-upgrade/src/bin/.gitignore +++ b/iris-mpc-upgrade/src/bin/.gitignore @@ -1,3 +1,4 @@ out0/ out1/ out2/ +*.log diff --git a/iris-mpc-upgrade/src/bin/README.md b/iris-mpc-upgrade/src/bin/README.md index caa02666e..53e25c6c8 100644 --- a/iris-mpc-upgrade/src/bin/README.md +++ b/iris-mpc-upgrade/src/bin/README.md @@ -19,7 +19,7 @@ cargo run --release --bin seed-v1-dbs -- --side left --shares-db-urls postgres:/ ## Upgrade for left eye -### Run the 3 upgrade servers +### Run the 3 upgrade servers Concurrently run: @@ -71,12 +71,86 @@ Concurrently run: ```bash cargo run --release --bin upgrade-client -- --server1 127.0.0.1:8000 --server2 127.0.0.1:8001 --server3 127.0.0.1:8002 --db-start 0 --db-end 10000 --party-id 0 --eye right --shares-db-url postgres://postgres:postgres@localhost:6100 --masks-db-url postgres://postgres:postgres@localhost:6111 ``` + ```bash cargo run --release --bin upgrade-client -- --server1 127.0.0.1:8000 --server2 127.0.0.1:8001 --server3 127.0.0.1:8002 --db-start 0 --db-end 10000 --party-id 1 --eye right --shares-db-url postgres://postgres:postgres@localhost:6101 --masks-db-url postgres://postgres:postgres@localhost:6111 ``` + ## Check the upgrade was successful ```bash cargo run --release --bin upgrade-checker -- --environment dev --num-elements 10000 --db-urls postgres://postgres:postgres@localhost:6100 --db-urls postgres://postgres:postgres@localhost:6101 --db-urls postgres://postgres:postgres@localhost:6111 --db-urls postgres://postgres:postgres@localhost:6200 --db-urls postgres://postgres:postgres@localhost:6201 --db-urls postgres://postgres:postgres@localhost:6202 ``` + +# Reshare Protocol + +The aim of the reshare protocol is to allow 2 existing parties in SMPCv2 to work together to recover the share of another party using a simple MPC functionality. + +## Internal Server structure + +The current internal structure of this service works as follows: + +* The receiving party hosts a GRPC server to receive reshare batches from the two sending parties. +* The two sending parties send reshare batches via GRPC. +* The GPRC server collects reshare request batches from the two clients and stores it internally. +* Once matching requests from both parties are collected, the server processes the requests and stores them to the DB. + +Currently, the matching is not very robust and requires that both clients send batches for the exact ranges (i.e., client 1 and 2 send batch for ids 1-100, it cannot handle client 1 sending 1-100 and client 2 sending 1-50 and 51-100). + +## Example Protocol run + +In this example we start a reshare process where parties 0 and 1 are the senders (i.e., clients) and party 2 is the receiver (i.e., server). + +### Bring up some DBs and seed them + +Here, the seed-v2-dbs binary just creates fully replicated DB for 3 parties, in DBs with ports 6200,6201,6202. Additionally, there is also another DB at 6203, which we will use as a target for the reshare protocol to fill into. + +```bash +docker-compose up -d +cargo run --release --bin seed-v2-dbs -- --db-url-party1 postgres://postgres:postgres@localhost:6200 --db-url-party2 postgres://postgres:postgres@localhost:6201 --db-url-party3 postgres://postgres:postgres@localhost:6202 --schema-name-party1 SMPC_testing_0 --schema-name-party2 SMPC_testing_1 --schema-name-party3 SMPC_testing_2 --fill-to 10000 --batch-size 100 +``` + +### Start a server for the receiving party + +```bash +cargo run --release --bin reshare-server -- --party-id 2 --sender1-party-id 0 --sender2-party-id 1 --bind-addr 0.0.0.0:7000 --environment testing --db-url postgres://postgres:postgres@localhost:6203 --db-start 1 --db-end 10001 --batch-size 100 +``` + +Short rundown of the parameters: + +* `party-id`: the 0-indexed party id of the receiving party. This corresponds to the (i+1)-th point on the exceptional sequence for Shamir poly evaluation +* `sender1-party-id`: The party id of the first sender, just for sanity checks against received packets. (Order between sender1 and sender2 does not matter here) +* `sender2-party-id`: The party id of the second sender, just for sanity checks against received packets. +* `bind-addr`: Socket addr to bind to for gGRPC server. +* `environment`: Which environment are we running in, used for DB schema name +* `db-url`: Postgres connection string. We save the results in this DB +* `db-start`: Expected range of DB entries to receive, just used for sanity checks. Start is inclusive. +* `db-end`: Expected range of DB entries to receive, just used for sanity checks. End is exclusive. +* `batch-size`: maximum size of received reshare batches + +### Start clients for the sending parties + +```bash +cargo run --release --bin reshare-client -- --party-id 0 --other-party-id 1 --target-party-id 2 --server-url http://localhost:7000 --environment testing --db-url postgres://postgres:postgres@localhost:6200 --db-start 1 --db-end 10001 --batch-size 100 +``` + +```bash +cargo run --release --bin reshare-client -- --party-id 1 --other-party-id 0 --target-party-id 2 --server-url http://localhost:7000 --environment testing --db-url postgres://postgres:postgres@localhost:6201 --db-start 1 --db-end 10001 --batch-size 100 +``` + +Short rundown of the parameters: + +* `party-id`: the 0-indexed party id of our own client party. This corresponds to the (i+1)-th point on the exceptional sequence for Shamir poly evaluation +* `other-party-id`: the 0-indexed party id of the other client party. This needs to be passed for the correct calculation of lagrange interpolation polynomials. +* `target-party-id`: the 0-indexed party id of the receiving party. This needs to be passed for the correct calculation of lagrange interpolation polynomials. +* `server-url`: Url where to reach the GRPC server (can also be https, client supports both). +* `environment`: Which environment are we running in, used for DB schema name +* `db-url`: Postgres connection string. We load our shares from this DB +* `db-start`: Range of DB entries to send. Start is inclusive. +* `db-end`: Range of DB entries to send. End is exclusive. +* `batch-size`: maximum size of sent reshare batches + +### Checking results + +Since the shares on a given shamir poly are deterministic given the party ids, the above upgrade process can be checked by comparing the databases at port 6202 and 6203 for equality. diff --git a/iris-mpc-upgrade/src/bin/docker-compose.yaml b/iris-mpc-upgrade/src/bin/docker-compose.yaml index cab988519..40198e699 100644 --- a/iris-mpc-upgrade/src/bin/docker-compose.yaml +++ b/iris-mpc-upgrade/src/bin/docker-compose.yaml @@ -1,5 +1,4 @@ services: - old-db-shares-1: image: postgres:16 ports: @@ -42,3 +41,18 @@ services: environment: POSTGRES_USER: "postgres" POSTGRES_PASSWORD: "postgres" + new-db-4: + image: postgres:16 + ports: + - "6203:5432" + environment: + POSTGRES_USER: "postgres" + POSTGRES_PASSWORD: "postgres" + localstack: + image: localstack/localstack + ports: + - "127.0.0.1:4566:4566" + - "127.0.0.1:4571:4571" + environment: + - SERVICES=kms + - DEFAULT_REGION=us-east-1 diff --git a/iris-mpc-upgrade/src/bin/reshare-client.rs b/iris-mpc-upgrade/src/bin/reshare-client.rs new file mode 100644 index 000000000..8cd751fee --- /dev/null +++ b/iris-mpc-upgrade/src/bin/reshare-client.rs @@ -0,0 +1,154 @@ +use clap::Parser; +use futures::StreamExt; +use hkdf::Hkdf; +use iris_mpc_common::{ + galois_engine::degree4::{GaloisRingIrisCodeShare, GaloisRingTrimmedMaskCodeShare}, + helpers::kms_dh::derive_shared_secret, +}; +use iris_mpc_store::Store; +use iris_mpc_upgrade::{ + config::ReShareClientConfig, + proto::{ + self, + iris_mpc_reshare::{ + iris_code_re_share_service_client::IrisCodeReShareServiceClient, IrisCodeReShareStatus, + }, + }, + reshare::IrisCodeReshareSenderHelper, + utils::install_tracing, +}; +use sha2::Sha256; + +const APP_NAME: &str = "SMPC"; + +async fn derive_common_seed(config: &ReShareClientConfig) -> eyre::Result<[u8; 32]> { + let shared_secret = if config.environment == "testing" { + // TODO: remove once localstack fixes KMS bug that returns different shared + // secrets + [0u8; 32] + } else { + derive_shared_secret(&config.my_kms_key_arn, &config.other_kms_key_arn).await? + }; + + let hk = Hkdf::::new( + // sesstion id is used as salt + Some(config.reshare_run_session_id.as_bytes()), + &shared_secret, + ); + let mut common_seed = [0u8; 32]; + // expand the common seed bound to the context "ReShare-Protocol-Client" + hk.expand(b"ReShare-Protocol-Client", &mut common_seed) + .map_err(|e| eyre::eyre!("error during HKDF expansion: {}", e))?; + Ok(common_seed) +} + +#[tokio::main] +async fn main() -> eyre::Result<()> { + install_tracing(); + let config = ReShareClientConfig::parse(); + + let common_seed = derive_common_seed(&config).await?; + + let schema_name = format!("{}_{}_{}", APP_NAME, config.environment, config.party_id); + let store = Store::new(&config.db_url, &schema_name).await?; + + let iris_stream = store.stream_irises_in_range(config.db_start..config.db_end); + let mut iris_stream_chunks = iris_stream.chunks(config.batch_size as usize); + + let mut iris_reshare_helper = IrisCodeReshareSenderHelper::new( + config.party_id as usize, + config.other_party_id as usize, + config.target_party_id as usize, + common_seed, + ); + + let encoded_message_size = + proto::get_size_of_reshare_iris_code_share_batch(config.batch_size as usize); + if encoded_message_size > 100 * 1024 * 1024 { + tracing::warn!( + "encoded batch message size is large: {}MB", + encoded_message_size as f64 / 1024.0 / 1024.0 + ); + } + let encoded_message_size_with_buf = (encoded_message_size as f64 * 1.1) as usize; + + let mut grpc_client = IrisCodeReShareServiceClient::connect(config.server_url) + .await? + .max_decoding_message_size(encoded_message_size_with_buf) + .max_encoding_message_size(encoded_message_size_with_buf); + + while let Some(chunk) = iris_stream_chunks.next().await { + let iris_codes = chunk.into_iter().collect::, sqlx::Error>>()?; + if iris_codes.is_empty() { + continue; + } + let db_chunk_start = iris_codes.first().unwrap().id(); + let db_chunk_end = iris_codes.last().unwrap().id(); + + // sanity check + for window in iris_codes.as_slice().windows(2) { + assert_eq!( + window[0].id() + 1, + window[1].id(), + "expect consecutive iris codes" + ); + } + + iris_reshare_helper.start_reshare_batch(db_chunk_start, db_chunk_end + 1); + + for iris_code in iris_codes { + iris_reshare_helper.add_reshare_iris_to_batch( + iris_code.id(), + GaloisRingIrisCodeShare { + id: config.party_id as usize + 1, + coefs: iris_code.left_code().try_into().unwrap(), + }, + GaloisRingTrimmedMaskCodeShare { + id: config.party_id as usize + 1, + coefs: iris_code.left_mask().try_into().unwrap(), + }, + GaloisRingIrisCodeShare { + id: config.party_id as usize + 1, + coefs: iris_code.right_code().try_into().unwrap(), + }, + GaloisRingTrimmedMaskCodeShare { + id: config.party_id as usize + 1, + coefs: iris_code.right_mask().try_into().unwrap(), + }, + ); + } + tracing::info!( + "Submitting reshare request for iris codes {} to {}", + db_chunk_start, + db_chunk_end + ); + + let request = iris_reshare_helper.finalize_reshare_batch(); + let mut timeout = tokio::time::Duration::from_millis(config.retry_backoff_millis); + loop { + let resp = grpc_client.re_share(request.clone()).await?; + let resp = resp.into_inner(); + match resp.status { + x if x == IrisCodeReShareStatus::Ok as i32 => { + break; + } + x if x == IrisCodeReShareStatus::FullQueue as i32 => { + tokio::time::sleep(timeout).await; + timeout += tokio::time::Duration::from_millis(config.retry_backoff_millis); + continue; + } + x if x == IrisCodeReShareStatus::Error as i32 => { + return Err(eyre::eyre!( + "error during reshare request submission: {}", + resp.message + )); + } + _ => { + return Err(eyre::eyre!("unexpected reshare status: {}", resp.status)); + } + } + } + } + + Ok(()) +} diff --git a/iris-mpc-upgrade/src/bin/reshare-protocol-local.sh b/iris-mpc-upgrade/src/bin/reshare-protocol-local.sh new file mode 100755 index 000000000..f41b52e7d --- /dev/null +++ b/iris-mpc-upgrade/src/bin/reshare-protocol-local.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +rm -rf "*.log" + +docker-compose down --remove-orphans +docker-compose up -d + +sleep 1 + +aws_local() { + AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test AWS_DEFAULT_REGION=us-east-1 aws --endpoint-url=http://${LOCALSTACK_HOST:-localhost}:4566 "$@" +} + +key1_metadata=$(aws_local kms create-key --region us-east-1 --description "Key for Party1" --key-spec ECC_NIST_P256 --key-usage KEY_AGREEMENT) +echo "Created key1: $key1_metadata" +key1_arn=$(echo "$key1_metadata" | jq ".KeyMetadata.Arn" -r) +echo "Key1 ARN: $key1_arn" +key2_metadata=$(aws_local kms create-key --region us-east-1 --description "Key for Party2" --key-spec ECC_NIST_P256 --key-usage KEY_AGREEMENT) +echo "Created key2: $key2_metadata" +key2_arn=$(echo "$key2_metadata" | jq ".KeyMetadata.Arn" -r) +echo "Key2 ARN: $key2_arn" + +sleep 1 + +cargo build --release --bin seed-v2-dbs --bin reshare-server --bin reshare-client + + + +TARGET_DIR=$(cargo metadata --format-version 1 | jq ".target_directory" -r) + +$TARGET_DIR/release/seed-v2-dbs --db-url-party1 postgres://postgres:postgres@localhost:6200 --db-url-party2 postgres://postgres:postgres@localhost:6201 --db-url-party3 postgres://postgres:postgres@localhost:6202 --schema-name-party1 SMPC_testing_0 --schema-name-party2 SMPC_testing_1 --schema-name-party3 SMPC_testing_2 --fill-to 10000 --batch-size 100 + +$TARGET_DIR/release/reshare-server --party-id 2 --sender1-party-id 0 --sender2-party-id 1 --bind-addr 0.0.0.0:7000 --environment testing --db-url postgres://postgres:postgres@localhost:6203 --batch-size 100 & > reshare-server.log + +sleep 5 + +AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test AWS_DEFAULT_REGION=us-east-1 AWS_ENDPOINT_URL=http://${LOCALSTACK_HOST:-localhost}:4566 $TARGET_DIR/release/reshare-client --party-id 0 --other-party-id 1 --target-party-id 2 --server-url http://localhost:7000 --environment testing --db-url postgres://postgres:postgres@localhost:6200 --db-start 1 --db-end 10001 --batch-size 100 --my-kms-key-arn $key1_arn --other-kms-key-arn $key2_arn --reshare-run-session-id testrun1 & > reshare-client-0.log + +AWS_ACCESS_KEY_ID=test AWS_SECRET_ACCESS_KEY=test AWS_DEFAULT_REGION=us-east-1 AWS_ENDPOINT_URL=http://${LOCALSTACK_HOST:-localhost}:4566 $TARGET_DIR/release/reshare-client --party-id 1 --other-party-id 0 --target-party-id 2 --server-url http://localhost:7000 --environment testing --db-url postgres://postgres:postgres@localhost:6201 --db-start 1 --db-end 10001 --batch-size 100 --my-kms-key-arn $key2_arn --other-kms-key-arn $key1_arn --reshare-run-session-id testrun1 > reshare-client-1.log + +sleep 5 +killall reshare-server + diff --git a/iris-mpc-upgrade/src/bin/reshare-server.rs b/iris-mpc-upgrade/src/bin/reshare-server.rs new file mode 100644 index 000000000..7b948ef7c --- /dev/null +++ b/iris-mpc-upgrade/src/bin/reshare-server.rs @@ -0,0 +1,56 @@ +use clap::Parser; +use iris_mpc_store::Store; +use iris_mpc_upgrade::{ + config::ReShareServerConfig, + proto::{ + self, iris_mpc_reshare::iris_code_re_share_service_server::IrisCodeReShareServiceServer, + }, + reshare::{GrpcReshareServer, IrisCodeReshareReceiverHelper}, + utils::install_tracing, +}; +use tonic::transport::Server; + +const APP_NAME: &str = "SMPC"; + +#[tokio::main] +async fn main() -> eyre::Result<()> { + install_tracing(); + let config = ReShareServerConfig::parse(); + + let schema_name = format!("{}_{}_{}", APP_NAME, config.environment, config.party_id); + let store = Store::new(&config.db_url, &schema_name).await?; + + let receiver_helper = IrisCodeReshareReceiverHelper::new( + config.party_id as usize, + config.sender1_party_id as usize, + config.sender2_party_id as usize, + config.max_buffer_size, + ); + + let encoded_message_size = + proto::get_size_of_reshare_iris_code_share_batch(config.batch_size as usize); + if encoded_message_size > 100 * 1024 * 1024 { + tracing::warn!( + "encoded batch message size is large: {}MB", + encoded_message_size as f64 / 1024.0 / 1024.0 + ); + } + let encoded_message_size_with_buf = (encoded_message_size as f64 * 1.1) as usize; + let grpc_server = + IrisCodeReShareServiceServer::new(GrpcReshareServer::new(store, receiver_helper)) + .max_decoding_message_size(encoded_message_size_with_buf) + .max_encoding_message_size(encoded_message_size_with_buf); + + Server::builder() + .add_service(grpc_server) + .serve_with_shutdown(config.bind_addr, shutdown_signal()) + .await?; + + Ok(()) +} + +async fn shutdown_signal() { + tokio::signal::ctrl_c() + .await + .expect("failed to install CTRL+C signal handler"); +} diff --git a/iris-mpc-upgrade/src/bin/seed_v2_dbs.rs b/iris-mpc-upgrade/src/bin/seed_v2_dbs.rs new file mode 100644 index 000000000..c737c82c0 --- /dev/null +++ b/iris-mpc-upgrade/src/bin/seed_v2_dbs.rs @@ -0,0 +1,145 @@ +use clap::Parser; +use iris_mpc_common::{ + galois_engine::degree4::FullGaloisRingIrisCodeShare, iris_db::iris::IrisCode, +}; +use iris_mpc_store::{Store, StoredIrisRef}; +use itertools::Itertools; +use rand::thread_rng; +use std::cmp::min; + +#[derive(Debug, Clone, Parser)] +struct Args { + #[clap(long)] + db_url_party1: String, + + #[clap(long)] + db_url_party2: String, + + #[clap(long)] + db_url_party3: String, + + #[clap(long)] + fill_to: u64, + + #[clap(long)] + batch_size: usize, + + #[clap(long)] + schema_name_party1: String, + + #[clap(long)] + schema_name_party2: String, + + #[clap(long)] + schema_name_party3: String, + + #[clap(long, value_delimiter = ',', num_args = 1..)] + deleted_identities: Option>, +} + +#[tokio::main] +async fn main() -> eyre::Result<()> { + let args = Args::parse(); + + let store1 = Store::new(&args.db_url_party1, &args.schema_name_party1).await?; + let store2 = Store::new(&args.db_url_party2, &args.schema_name_party2).await?; + let store3 = Store::new(&args.db_url_party3, &args.schema_name_party3).await?; + + let mut rng = rand::thread_rng(); + + let latest_serial_id1 = store1.count_irises().await?; + let latest_serial_id2 = store2.count_irises().await?; + let latest_serial_id3 = store3.count_irises().await?; + let mut latest_serial_id = + min(min(latest_serial_id1, latest_serial_id2), latest_serial_id3) as u64; + + if latest_serial_id == args.fill_to { + return Ok(()); + } + // TODO: Does this make sense? + if latest_serial_id == 0 { + latest_serial_id += 1 + } + + let deleted_serial_ids = args.deleted_identities.unwrap_or_default(); + + for range_chunk in &(latest_serial_id..args.fill_to).chunks(args.batch_size) { + let range_chunk = range_chunk.collect_vec(); + let (party1, party2, party3): (Vec<_>, Vec<_>, Vec<_>) = range_chunk + .iter() + .map(|serial_id| { + let (iris_code_left, iris_code_right) = + if deleted_serial_ids.contains(&(*serial_id as i32)) { + ( + // TODO: set them to the deleted values + IrisCode::random_rng(&mut thread_rng()), + IrisCode::random_rng(&mut thread_rng()), + ) + } else { + ( + IrisCode::random_rng(&mut rng), + IrisCode::random_rng(&mut rng), + ) + }; + let [left1, left2, left3] = + FullGaloisRingIrisCodeShare::encode_iris_code(&iris_code_left, &mut rng); + let [right1, right2, right3] = + FullGaloisRingIrisCodeShare::encode_iris_code(&iris_code_right, &mut rng); + ((left1, right1), (left2, right2), (left3, right3)) + }) + .multiunzip(); + let party1_insert = party1 + .iter() + .zip(range_chunk.iter()) + .map(|((left, right), id)| StoredIrisRef { + id: *id as i64, + left_code: &left.code.coefs, + left_mask: &left.mask.coefs, + right_code: &right.code.coefs, + right_mask: &right.mask.coefs, + }) + .collect_vec(); + + let mut tx = store1.tx().await?; + store1 + .insert_irises_overriding(&mut tx, &party1_insert) + .await?; + tx.commit().await?; + + let party2_insert = party2 + .iter() + .zip(range_chunk.iter()) + .map(|((left, right), id)| StoredIrisRef { + id: *id as i64, + left_code: &left.code.coefs, + left_mask: &left.mask.coefs, + right_code: &right.code.coefs, + right_mask: &right.mask.coefs, + }) + .collect_vec(); + let mut tx = store2.tx().await?; + store2 + .insert_irises_overriding(&mut tx, &party2_insert) + .await?; + tx.commit().await?; + + let party3_insert = party3 + .iter() + .zip(range_chunk.iter()) + .map(|((left, right), id)| StoredIrisRef { + id: *id as i64, + left_code: &left.code.coefs, + left_mask: &left.mask.coefs, + right_code: &right.code.coefs, + right_mask: &right.mask.coefs, + }) + .collect_vec(); + let mut tx = store3.tx().await?; + store3 + .insert_irises_overriding(&mut tx, &party3_insert) + .await?; + tx.commit().await?; + } + + Ok(()) +} diff --git a/iris-mpc-upgrade/src/config.rs b/iris-mpc-upgrade/src/config.rs index 92034a08d..cde17fac1 100644 --- a/iris-mpc-upgrade/src/config.rs +++ b/iris-mpc-upgrade/src/config.rs @@ -118,3 +118,103 @@ impl fmt::Debug for UpgradeClientConfig { .finish() } } + +#[derive(Parser)] +pub struct ReShareClientConfig { + /// The URL of the server to send reshare messages to + #[clap(long, default_value = "http://localhost:8000", env("SERVER_URL"))] + pub server_url: String, + + /// The DB index where we start to send Iris codes from (inclusive) + #[clap(long)] + pub db_start: u64, + + /// The DB index where we stop to send Iris codes (exclusive) + #[clap(long)] + pub db_end: u64, + + /// the 0-indexed party ID of the client party + #[clap(long)] + pub party_id: u8, + + /// the 0-indexed party ID of the other client party + #[clap(long)] + pub other_party_id: u8, + + /// the 0-indexed party ID of the receiving party + #[clap(long)] + pub target_party_id: u8, + + /// The batch size to use when sending reshare messages (i.e., how many iris + /// code DB entries per message) + #[clap(long)] + pub batch_size: u64, + + /// DB connection URL for the reshare client + #[clap(long)] + pub db_url: String, + + /// The amount of time to wait before retrying a batch if the server queue + /// was full, in milliseconds. Does a simple linear backoff strategy + #[clap(long, default_value = "100")] + pub retry_backoff_millis: u64, + + /// The environment in which the reshare protocol is being run (mostly used + /// for the DB schema name) + #[clap(long)] + pub environment: String, + + /// The ARN of the KMS key that will be used to derive the common secret + #[clap(long)] + pub my_kms_key_arn: String, + + /// The ARN of the KMS key of the other client party that will be used to + /// derive the common secret + #[clap(long)] + pub other_kms_key_arn: String, + + /// The session ID of the reshare protocol run, this will be used to salt + /// the common secret derived between the two parties + #[clap(long)] + pub reshare_run_session_id: String, +} + +#[derive(Parser)] +pub struct ReShareServerConfig { + /// The socket to bind the reshare server to + #[clap(long, default_value = "0.0.0.0:8000", env("BIND_ADDR"))] + pub bind_addr: SocketAddr, + + /// The 0-indexed party ID of the server party + #[clap(long)] + pub party_id: u8, + + /// The 0-indexed party ID of the first client party (order of the two + /// client parties does not matter) + #[clap(long)] + pub sender1_party_id: u8, + + /// The 0-indexed party ID of the second client party (order of the two + /// client parties does not matter) + #[clap(long)] + pub sender2_party_id: u8, + + /// The maximum allowed batch size for reshare messages + #[clap(long)] + pub batch_size: u64, + + /// The DB connection URL to store reshared iris codes to + #[clap(long)] + pub db_url: String, + + /// The environment in which the reshare protocol is being run (mostly used + /// for the DB schema name) + #[clap(long)] + pub environment: String, + + /// The maximum buffer size for the reshare server (i.e., how many messages + /// are accepted from one client without receving corresponding messages + /// from the other client) + #[clap(long, default_value = "10")] + pub max_buffer_size: usize, +} diff --git a/iris-mpc-upgrade/src/lib.rs b/iris-mpc-upgrade/src/lib.rs index b979cdbf4..ba1e9afb7 100644 --- a/iris-mpc-upgrade/src/lib.rs +++ b/iris-mpc-upgrade/src/lib.rs @@ -13,6 +13,8 @@ use std::{ pub mod config; pub mod db; pub mod packets; +pub mod proto; +pub mod reshare; pub mod utils; pub trait OldIrisShareSource { diff --git a/iris-mpc-upgrade/src/proto/iris_mpc_reshare.rs b/iris-mpc-upgrade/src/proto/iris_mpc_reshare.rs new file mode 100644 index 000000000..3c26ac978 --- /dev/null +++ b/iris-mpc-upgrade/src/proto/iris_mpc_reshare.rs @@ -0,0 +1,368 @@ +// This file is @generated by prost-build. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IrisCodeReShare { + #[prost(bytes = "vec", tag = "1")] + pub left_iris_code_share: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "2")] + pub left_mask_share: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "3")] + pub right_iris_code_share: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "4")] + pub right_mask_share: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IrisCodeReShareRequest { + #[prost(uint64, tag = "1")] + pub sender_id: u64, + #[prost(uint64, tag = "2")] + pub other_id: u64, + #[prost(uint64, tag = "3")] + pub receiver_id: u64, + #[prost(int64, tag = "4")] + pub id_range_start_inclusive: i64, + #[prost(int64, tag = "5")] + pub id_range_end_non_inclusive: i64, + #[prost(message, repeated, tag = "6")] + pub iris_code_re_shares: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "7")] + pub client_correlation_sanity_check: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IrisCodeReShareResponse { + #[prost(enumeration = "IrisCodeReShareStatus", tag = "1")] + pub status: i32, + #[prost(string, tag = "2")] + pub message: ::prost::alloc::string::String, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum IrisCodeReShareStatus { + Ok = 0, + FullQueue = 1, + Error = 2, +} +impl IrisCodeReShareStatus { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Ok => "IRIS_CODE_RE_SHARE_STATUS_OK", + Self::FullQueue => "IRIS_CODE_RE_SHARE_STATUS_FULL_QUEUE", + Self::Error => "IRIS_CODE_RE_SHARE_STATUS_ERROR", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "IRIS_CODE_RE_SHARE_STATUS_OK" => Some(Self::Ok), + "IRIS_CODE_RE_SHARE_STATUS_FULL_QUEUE" => Some(Self::FullQueue), + "IRIS_CODE_RE_SHARE_STATUS_ERROR" => Some(Self::Error), + _ => None, + } + } +} +/// Generated client implementations. +pub mod iris_code_re_share_service_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct IrisCodeReShareServiceClient { + inner: tonic::client::Grpc, + } + impl IrisCodeReShareServiceClient { + /// Attempt to create a new client by connecting to a given endpoint. + pub async fn connect(dst: D) -> Result + where + D: TryInto, + D::Error: Into, + { + let conn = tonic::transport::Endpoint::new(dst)?.connect().await?; + Ok(Self::new(conn)) + } + } + impl IrisCodeReShareServiceClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> IrisCodeReShareServiceClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + IrisCodeReShareServiceClient::new( + InterceptedService::new(inner, interceptor), + ) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn re_share( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/iris_mpc_reshare.IrisCodeReShareService/ReShare", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert( + GrpcMethod::new("iris_mpc_reshare.IrisCodeReShareService", "ReShare"), + ); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod iris_code_re_share_service_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with IrisCodeReShareServiceServer. + #[async_trait] + pub trait IrisCodeReShareService: std::marker::Send + std::marker::Sync + 'static { + async fn re_share( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; + } + #[derive(Debug)] + pub struct IrisCodeReShareServiceServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl IrisCodeReShareServiceServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> + for IrisCodeReShareServiceServer + where + T: IrisCodeReShareService, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/iris_mpc_reshare.IrisCodeReShareService/ReShare" => { + #[allow(non_camel_case_types)] + struct ReShareSvc(pub Arc); + impl< + T: IrisCodeReShareService, + > tonic::server::UnaryService + for ReShareSvc { + type Response = super::IrisCodeReShareResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::re_share(&inner, request) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ReShareSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new(empty_body()); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for IrisCodeReShareServiceServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "iris_mpc_reshare.IrisCodeReShareService"; + impl tonic::server::NamedService for IrisCodeReShareServiceServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/iris-mpc-upgrade/src/proto/mod.rs b/iris-mpc-upgrade/src/proto/mod.rs new file mode 100644 index 000000000..f230282aa --- /dev/null +++ b/iris-mpc-upgrade/src/proto/mod.rs @@ -0,0 +1,30 @@ +use iris_mpc_common::{IRIS_CODE_LENGTH, MASK_CODE_LENGTH}; +use iris_mpc_reshare::IrisCodeReShare; +use prost::Message; + +// this is generated code so we skip linting it +#[rustfmt::skip] +#[allow(clippy::all)] +pub mod iris_mpc_reshare; + +pub fn get_size_of_reshare_iris_code_share_batch(batch_size: usize) -> usize { + let dummy = iris_mpc_reshare::IrisCodeReShareRequest { + sender_id: 0, + other_id: 1, + receiver_id: 2, + id_range_start_inclusive: 0, + id_range_end_non_inclusive: batch_size as i64, + iris_code_re_shares: vec![ + IrisCodeReShare { + left_iris_code_share: vec![1u8; IRIS_CODE_LENGTH * size_of::()], + left_mask_share: vec![2u8; MASK_CODE_LENGTH * size_of::()], + right_iris_code_share: vec![3u8; IRIS_CODE_LENGTH * size_of::()], + right_mask_share: vec![4u8; MASK_CODE_LENGTH * size_of::()], + }; + batch_size + ], + client_correlation_sanity_check: vec![7u8; 32], + }; + + dummy.encoded_len() +} diff --git a/iris-mpc-upgrade/src/reshare.rs b/iris-mpc-upgrade/src/reshare.rs new file mode 100644 index 000000000..c96131b6a --- /dev/null +++ b/iris-mpc-upgrade/src/reshare.rs @@ -0,0 +1,773 @@ +//! # Iris Code Resharing +//! +//! This module has functionality for resharing a secret shared iris code to a +//! new party, producing a valid share for the new party, without leaking +//! information about the individual shares of the sending parties. + +use crate::proto::{ + self, + iris_mpc_reshare::{ + iris_code_re_share_service_server, IrisCodeReShare, IrisCodeReShareRequest, + IrisCodeReShareStatus, + }, +}; +use iris_mpc_common::{ + galois::degree4::{basis::Monomial, GaloisRingElement, ShamirGaloisRingShare}, + galois_engine::degree4::{GaloisRingIrisCodeShare, GaloisRingTrimmedMaskCodeShare}, + IRIS_CODE_LENGTH, MASK_CODE_LENGTH, +}; +use iris_mpc_store::{Store, StoredIrisRef}; +use itertools::{izip, Itertools}; +use rand::{CryptoRng, Rng, SeedableRng}; +use sha2::{Digest, Sha256}; +use std::{collections::VecDeque, sync::Mutex}; +use tonic::Response; + +pub struct IrisCodeReshareSenderHelper { + my_party_id: usize, + other_party_id: usize, + target_party_id: usize, + lagrange_helper: GaloisRingElement, + common_seed: [u8; 32], + current_packet: Option, +} + +impl IrisCodeReshareSenderHelper { + pub fn new( + my_party_id: usize, + other_party_id: usize, + target_party_id: usize, + common_seed: [u8; 32], + ) -> Self { + let lagrange_helper = ShamirGaloisRingShare::deg_1_lagrange_poly_at_v( + my_party_id, + other_party_id, + target_party_id, + ); + Self { + my_party_id, + other_party_id, + target_party_id, + lagrange_helper, + common_seed, + current_packet: None, + } + } + fn reshare_with_random_additive_zero( + &self, + share: GaloisRingElement, + rng: &mut (impl CryptoRng + Rng), + ) -> GaloisRingElement { + let random_mask = GaloisRingElement::::random(rng); + if self.my_party_id < self.other_party_id { + share + random_mask + } else { + share - random_mask + } + } + + fn reshare_code( + &self, + mut code_share: GaloisRingIrisCodeShare, + rng: &mut (impl CryptoRng + Rng), + ) -> Vec { + for i in (0..IRIS_CODE_LENGTH).step_by(4) { + let mut share = GaloisRingElement::::from_coefs([ + code_share.coefs[i], + code_share.coefs[i + 1], + code_share.coefs[i + 2], + code_share.coefs[i + 3], + ]); + share = share * self.lagrange_helper; + share = self.reshare_with_random_additive_zero(share, rng); + code_share.coefs[i] = share.coefs[0]; + code_share.coefs[i + 1] = share.coefs[1]; + code_share.coefs[i + 2] = share.coefs[2]; + code_share.coefs[i + 3] = share.coefs[3]; + } + code_share + .coefs + .into_iter() + .flat_map(|x| x.to_le_bytes()) + .collect() + } + fn reshare_mask( + &self, + mut mask_share: GaloisRingTrimmedMaskCodeShare, + rng: &mut (impl CryptoRng + Rng), + ) -> Vec { + for i in (0..MASK_CODE_LENGTH).step_by(4) { + let mut share = GaloisRingElement::::from_coefs([ + mask_share.coefs[i], + mask_share.coefs[i + 1], + mask_share.coefs[i + 2], + mask_share.coefs[i + 3], + ]); + share = share * self.lagrange_helper; + share = self.reshare_with_random_additive_zero(share, rng); + mask_share.coefs[i] = share.coefs[0]; + mask_share.coefs[i + 1] = share.coefs[1]; + mask_share.coefs[i + 2] = share.coefs[2]; + mask_share.coefs[i + 3] = share.coefs[3]; + } + mask_share + .coefs + .into_iter() + .flat_map(|x| x.to_le_bytes()) + .collect() + } + + /// Start the production of a new reshare batch request. + /// The batch will contain reshared iris codes for the given range of + /// database indices. The start range is inclusive, the end range is + /// exclusive. + /// + /// # Panics + /// + /// Panics if this is called while a batch is already being built. + pub fn start_reshare_batch(&mut self, start_db_index: i64, end_db_index: i64) { + assert!( + self.current_packet.is_none(), + "We expected no batch to be currently being built, but it is..." + ); + let mut digest = Sha256::new(); + digest.update(self.common_seed); + digest.update(start_db_index.to_le_bytes()); + digest.update(end_db_index.to_le_bytes()); + digest.update(b"ReShareSanityCheck"); + + self.current_packet = Some(IrisCodeReShareRequest { + sender_id: self.my_party_id as u64, + other_id: self.other_party_id as u64, + receiver_id: self.target_party_id as u64, + id_range_start_inclusive: start_db_index, + id_range_end_non_inclusive: end_db_index, + iris_code_re_shares: Vec::new(), + client_correlation_sanity_check: digest.finalize().as_slice().to_vec(), + }); + } + + /// Adds a new iris code to the current reshare batch. + /// + /// # Panics + /// + /// Panics if this is called without [Self::start_reshare_batch] being + /// called beforehand. + /// Panics if this is called with an iris code id that is out of the range + /// of the current batch. + pub fn add_reshare_iris_to_batch( + &mut self, + iris_code_id: i64, + left_code_share: GaloisRingIrisCodeShare, + left_mask_share: GaloisRingTrimmedMaskCodeShare, + right_code_share: GaloisRingIrisCodeShare, + right_mask_share: GaloisRingTrimmedMaskCodeShare, + ) { + assert!( + self.current_packet.is_some(), + "We expect a batch to be currently being built" + ); + assert!( + self.current_packet + .as_ref() + .unwrap() + .id_range_start_inclusive + <= iris_code_id + && self + .current_packet + .as_ref() + .unwrap() + .id_range_end_non_inclusive + > iris_code_id, + "The iris code id is out of the range of the current batch" + ); + let mut digest = Sha256::new(); + digest.update(self.common_seed); + digest.update(iris_code_id.to_le_bytes()); + let mut rng = rand_chacha::ChaChaRng::from_seed(digest.finalize().into()); + let left_reshared_code = self.reshare_code(left_code_share, &mut rng); + let left_reshared_mask = self.reshare_mask(left_mask_share, &mut rng); + let right_reshared_code = self.reshare_code(right_code_share, &mut rng); + let right_reshared_mask = self.reshare_mask(right_mask_share, &mut rng); + + let reshare = IrisCodeReShare { + left_iris_code_share: left_reshared_code, + left_mask_share: left_reshared_mask, + right_iris_code_share: right_reshared_code, + right_mask_share: right_reshared_mask, + }; + self.current_packet + .as_mut() + .expect("There is currently a batch being built") + .iris_code_re_shares + .push(reshare); + } + + /// Finalizes the current reshare batch and returns the reshare request. + /// + /// # Panics + /// + /// Panics if this is called without [Self::start_reshare_batch] being + /// called beforehand. Also panics if this is called without the correct + /// number of iris codes being added to the batch. + pub fn finalize_reshare_batch(&mut self) -> IrisCodeReShareRequest { + assert!(self.current_packet.is_some(), "No batch to finalize"); + let packet = self.current_packet.take().unwrap(); + assert_eq!( + packet.iris_code_re_shares.len(), + (packet.id_range_end_non_inclusive - packet.id_range_start_inclusive) as usize, + "Expected the correct number of iris codes to be added to the batch" + ); + packet + } +} + +#[derive(Debug, thiserror::Error)] +pub enum IrisCodeReShareError { + #[error("Invalid reshare request received: {reason}")] + InvalidRequest { reason: String }, + #[error( + "Too many requests received from this party ({party_id}) without matching request from \ + the other party ({other_party_id}" + )] + TooManyRequests { + party_id: usize, + other_party_id: usize, + }, +} + +#[derive(Debug)] +pub struct IrisCodeReshareReceiverHelper { + my_party_id: usize, + sender1_party_id: usize, + sender2_party_id: usize, + max_buffer_size: usize, + sender_1_buffer: Mutex>, + sender_2_buffer: Mutex>, +} + +impl IrisCodeReshareReceiverHelper { + pub fn new( + my_party_id: usize, + sender1_party_id: usize, + sender2_party_id: usize, + max_buffer_size: usize, + ) -> Self { + Self { + my_party_id, + sender1_party_id, + sender2_party_id, + max_buffer_size, + sender_1_buffer: Mutex::new(VecDeque::new()), + sender_2_buffer: Mutex::new(VecDeque::new()), + } + } + + fn check_valid(&self, request: &IrisCodeReShareRequest) -> Result<(), IrisCodeReShareError> { + if request.sender_id as usize == self.sender1_party_id { + if request.other_id as usize != self.sender2_party_id { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Received a request from unexpected set of parties".to_string(), + }); + } + } else if request.sender_id as usize == self.sender2_party_id { + if request.other_id as usize != self.sender1_party_id { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Received a request from unexpected set of parties".to_string(), + }); + } + } else { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Received a request from unexpected set of parties".to_string(), + }); + } + if request.receiver_id != self.my_party_id as u64 { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Received a request intended for a different party".to_string(), + }); + } + if request.id_range_start_inclusive >= request.id_range_end_non_inclusive { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Invalid range of iris codes in received request".to_string(), + }); + } + if request.iris_code_re_shares.len() + != (request.id_range_end_non_inclusive - request.id_range_start_inclusive) as usize + { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Invalid number of iris codes in received request".to_string(), + }); + } + + // Check that the iris code shares are of the correct length + if !request.iris_code_re_shares.iter().all(|reshare| { + reshare.left_iris_code_share.len() == IRIS_CODE_LENGTH * std::mem::size_of::() + && reshare.left_mask_share.len() == MASK_CODE_LENGTH * std::mem::size_of::() + && reshare.right_iris_code_share.len() + == IRIS_CODE_LENGTH * std::mem::size_of::() + && reshare.right_mask_share.len() == MASK_CODE_LENGTH * std::mem::size_of::() + }) { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Invalid iris code/mask share length".to_string(), + }); + } + Ok(()) + } + + pub fn add_request_batch( + &self, + request: IrisCodeReShareRequest, + ) -> Result<(), IrisCodeReShareError> { + self.check_valid(&request)?; + if request.sender_id as usize == self.sender1_party_id { + let mut sender_1_buffer = self.sender_1_buffer.lock().unwrap(); + if sender_1_buffer.len() + 1 >= self.max_buffer_size { + return Err(IrisCodeReShareError::TooManyRequests { + party_id: self.sender1_party_id, + other_party_id: self.sender2_party_id, + }); + } + sender_1_buffer.push_back(request); + } else if request.sender_id as usize == self.sender2_party_id { + let mut sender_2_buffer = self.sender_2_buffer.lock().unwrap(); + if sender_2_buffer.len() + 1 >= self.max_buffer_size { + return Err(IrisCodeReShareError::TooManyRequests { + party_id: self.sender2_party_id, + other_party_id: self.sender1_party_id, + }); + } + sender_2_buffer.push_back(request); + } else { + // check valid should have caught this + unreachable!() + } + + Ok(()) + } + + fn check_requests_matching( + &self, + request1: &IrisCodeReShareRequest, + request2: &IrisCodeReShareRequest, + ) -> Result<(), IrisCodeReShareError> { + if request1.id_range_start_inclusive != request2.id_range_start_inclusive + || request1.id_range_end_non_inclusive != request2.id_range_end_non_inclusive + { + return Err(IrisCodeReShareError::InvalidRequest { + reason: format!( + "Received requests with different iris code ranges: {}-{} from {} and {}-{} \ + from {}", + request1.id_range_start_inclusive, + request1.id_range_end_non_inclusive, + request1.sender_id, + request2.id_range_start_inclusive, + request2.id_range_end_non_inclusive, + request2.sender_id, + ), + }); + } + + if request1.client_correlation_sanity_check != request2.client_correlation_sanity_check { + return Err(IrisCodeReShareError::InvalidRequest { + reason: "Received requests with different correlation sanity checks, recheck the \ + used Keys for common secret derivation" + .to_string(), + }); + } + Ok(()) + } + + fn reshare_code_batch( + &self, + request1: IrisCodeReShareRequest, + request2: IrisCodeReShareRequest, + ) -> Result { + let len = request1.iris_code_re_shares.len(); + let mut left_code = Vec::with_capacity(len); + let mut left_mask = Vec::with_capacity(len); + let mut right_code = Vec::with_capacity(len); + let mut right_mask = Vec::with_capacity(len); + + for (reshare1, reshare2) in + izip!(request1.iris_code_re_shares, request2.iris_code_re_shares) + { + // build galois shares from the u8 Vecs + let mut left_code_share1 = GaloisRingIrisCodeShare { + id: self.my_party_id + 1, + coefs: reshare1 + .left_iris_code_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + .collect_vec() + .try_into() + // we checked this beforehand in check_valid + .expect("Invalid iris code share length"), + }; + let mut left_mask_share1 = GaloisRingTrimmedMaskCodeShare { + id: self.my_party_id + 1, + coefs: reshare1 + .left_mask_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + // we checked this beforehand in check_valid + .collect_vec() + .try_into() + .expect("Invalid mask share length"), + }; + let left_code_share2 = GaloisRingIrisCodeShare { + id: self.my_party_id + 1, + coefs: reshare2 + .left_iris_code_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + .collect_vec() + .try_into() + // we checked this beforehand in check_valid + .expect("Invalid iris code share length"), + }; + let left_mask_share2 = GaloisRingTrimmedMaskCodeShare { + id: self.my_party_id + 1, + coefs: reshare2 + .left_mask_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + // we checked this beforehand in check_valid + .collect_vec() + .try_into() + .expect("Invalid mask share length"), + }; + + // add them together + left_code_share1 + .coefs + .iter_mut() + .zip(left_code_share2.coefs.iter()) + .for_each(|(x, y)| { + *x = x.wrapping_add(*y); + }); + left_mask_share1 + .coefs + .iter_mut() + .zip(left_mask_share2.coefs.iter()) + .for_each(|(x, y)| { + *x = x.wrapping_add(*y); + }); + + left_code.push(left_code_share1); + left_mask.push(left_mask_share1); + + // now the right eye + // build galois shares from the u8 Vecs + let mut right_code_share1 = GaloisRingIrisCodeShare { + id: self.my_party_id + 1, + coefs: reshare1 + .right_iris_code_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + .collect_vec() + .try_into() + // we checked this beforehand in check_valid + .expect("Invalid iris code share length"), + }; + let mut right_mask_share1 = GaloisRingTrimmedMaskCodeShare { + id: self.my_party_id + 1, + coefs: reshare1 + .right_mask_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + // we checked this beforehand in check_valid + .collect_vec() + .try_into() + .expect("Invalid mask share length"), + }; + let right_code_share2 = GaloisRingIrisCodeShare { + id: self.my_party_id + 1, + coefs: reshare2 + .right_iris_code_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + .collect_vec() + .try_into() + // we checked this beforehand in check_valid + .expect("Invalid iris code share length"), + }; + let right_mask_share2 = GaloisRingTrimmedMaskCodeShare { + id: self.my_party_id + 1, + coefs: reshare2 + .right_mask_share + .chunks_exact(std::mem::size_of::()) + .map(|x| u16::from_le_bytes(x.try_into().unwrap())) + // we checked this beforehand in check_valid + .collect_vec() + .try_into() + .expect("Invalid mask share length"), + }; + + // add them together + right_code_share1 + .coefs + .iter_mut() + .zip(right_code_share2.coefs.iter()) + .for_each(|(x, y)| { + *x = x.wrapping_add(*y); + }); + right_mask_share1 + .coefs + .iter_mut() + .zip(right_mask_share2.coefs.iter()) + .for_each(|(x, y)| { + *x = x.wrapping_add(*y); + }); + + right_code.push(right_code_share1); + right_mask.push(right_mask_share1); + } + + Ok(RecombinedIrisCodeBatch { + range_start_inclusive: request1.id_range_start_inclusive, + range_end_exclusive: request1.id_range_end_non_inclusive, + left_iris_codes: left_code, + left_masks: left_mask, + right_iris_codes: right_code, + right_masks: right_mask, + }) + } + + pub fn try_handle_batch( + &self, + ) -> Result, IrisCodeReShareError> { + let mut sender_1_buffer = self.sender_1_buffer.lock().unwrap(); + let mut sender_2_buffer = self.sender_2_buffer.lock().unwrap(); + if sender_1_buffer.is_empty() || sender_2_buffer.is_empty() { + return Ok(None); + } + + let sender_1_batch = sender_1_buffer.pop_front().unwrap(); + let sender_2_batch = sender_2_buffer.pop_front().unwrap(); + drop(sender_1_buffer); + drop(sender_2_buffer); + + self.check_requests_matching(&sender_1_batch, &sender_2_batch)?; + + let reshare = self.reshare_code_batch(sender_1_batch, sender_2_batch)?; + + Ok(Some(reshare)) + } +} + +/// A batch of recombined iris codes, produced by resharing iris codes from two +/// other parties. This should be inserted into the database. +pub struct RecombinedIrisCodeBatch { + range_start_inclusive: i64, + #[expect(unused)] + range_end_exclusive: i64, + left_iris_codes: Vec, + left_masks: Vec, + right_iris_codes: Vec, + right_masks: Vec, +} + +impl RecombinedIrisCodeBatch { + pub async fn insert_into_store(self, store: &Store) -> eyre::Result<()> { + let to_be_inserted = izip!( + &self.left_iris_codes, + &self.left_masks, + &self.right_iris_codes, + &self.right_masks + ) + .enumerate() + .map(|(idx, (left_iris, left_mask, right_iris, right_mask))| { + let id = self.range_start_inclusive + idx as i64; + StoredIrisRef { + id, + left_code: &left_iris.coefs, + left_mask: &left_mask.coefs, + right_code: &right_iris.coefs, + right_mask: &right_mask.coefs, + } + }) + .collect::>(); + let mut tx = store.tx().await?; + store + .insert_irises_overriding(&mut tx, &to_be_inserted) + .await?; + tx.commit().await?; + Ok(()) + } +} + +pub struct GrpcReshareServer { + store: Store, + receiver_helper: IrisCodeReshareReceiverHelper, +} + +impl GrpcReshareServer { + pub fn new(store: Store, receiver_helper: IrisCodeReshareReceiverHelper) -> Self { + Self { + store, + receiver_helper, + } + } +} + +#[tonic::async_trait] +impl iris_code_re_share_service_server::IrisCodeReShareService for GrpcReshareServer { + async fn re_share( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + match self.receiver_helper.add_request_batch(request.into_inner()) { + Ok(()) => (), + Err(err) => { + tracing::warn!(error = err.to_string(), "Error handling reshare request"); + match err { + IrisCodeReShareError::InvalidRequest { reason } => { + return Ok(Response::new( + proto::iris_mpc_reshare::IrisCodeReShareResponse { + status: IrisCodeReShareStatus::Error as i32, + message: reason, + }, + )); + } + IrisCodeReShareError::TooManyRequests { .. } => { + return Ok(Response::new( + proto::iris_mpc_reshare::IrisCodeReShareResponse { + status: IrisCodeReShareStatus::FullQueue as i32, + message: err.to_string(), + }, + )) + } + } + } + } + // we received a batch, try to handle it + match self.receiver_helper.try_handle_batch() { + Ok(Some(batch)) => { + // write the reshared iris codes to the database + match batch.insert_into_store(&self.store).await { + Ok(()) => (), + Err(err) => { + tracing::error!( + error = err.to_string(), + "Error inserting reshared iris codes into DB" + ); + } + } + } + Ok(None) => (), + Err(err) => { + tracing::warn!(error = err.to_string(), "Error handling reshare request"); + return Ok(Response::new( + proto::iris_mpc_reshare::IrisCodeReShareResponse { + status: IrisCodeReShareStatus::Error as i32, + message: err.to_string(), + }, + )); + } + } + + Ok(Response::new( + proto::iris_mpc_reshare::IrisCodeReShareResponse { + status: IrisCodeReShareStatus::Ok as i32, + message: Default::default(), + }, + )) + } +} + +#[cfg(test)] +mod tests { + use super::IrisCodeReshareSenderHelper; + use crate::reshare::IrisCodeReshareReceiverHelper; + use iris_mpc_common::{ + galois_engine::degree4::FullGaloisRingIrisCodeShare, iris_db::db::IrisDB, + }; + use itertools::Itertools; + use rand::thread_rng; + + #[test] + fn test_basic_resharing() { + const DB_SIZE: usize = 100; + + let left_db = IrisDB::new_random_rng(DB_SIZE, &mut thread_rng()); + let right_db = IrisDB::new_random_rng(DB_SIZE, &mut thread_rng()); + + let (party0_db_left, party1_db_left, party2_db_left): (Vec<_>, Vec<_>, Vec<_>) = left_db + .db + .iter() + .map(|x| { + let [a, b, c] = FullGaloisRingIrisCodeShare::encode_iris_code(x, &mut thread_rng()); + (a, b, c) + }) + .multiunzip(); + let (party0_db_right, party1_db_right, party2_db_right): (Vec<_>, Vec<_>, Vec<_>) = + right_db + .db + .iter() + .map(|x| { + let [a, b, c] = + FullGaloisRingIrisCodeShare::encode_iris_code(x, &mut thread_rng()); + (a, b, c) + }) + .multiunzip(); + + let mut reshare_helper_0_1_2 = IrisCodeReshareSenderHelper::new(0, 1, 2, [0; 32]); + let mut reshare_helper_1_0_2 = IrisCodeReshareSenderHelper::new(1, 0, 2, [0; 32]); + let reshare_helper_2 = IrisCodeReshareReceiverHelper::new(2, 0, 1, 100); + + reshare_helper_0_1_2.start_reshare_batch(0, DB_SIZE as i64); + for (idx, (left, right)) in party0_db_left + .iter() + .zip(party0_db_right.iter()) + .enumerate() + { + reshare_helper_0_1_2.add_reshare_iris_to_batch( + idx as i64, + left.code.clone(), + left.mask.clone(), + right.code.clone(), + right.mask.clone(), + ); + } + let reshare_request_0_1_2 = reshare_helper_0_1_2.finalize_reshare_batch(); + + reshare_helper_1_0_2.start_reshare_batch(0, DB_SIZE as i64); + for (idx, (left, right)) in party1_db_left + .iter() + .zip(party1_db_right.iter()) + .enumerate() + { + reshare_helper_1_0_2.add_reshare_iris_to_batch( + idx as i64, + left.code.clone(), + left.mask.clone(), + right.code.clone(), + right.mask.clone(), + ); + } + let reshare_request_1_0_2 = reshare_helper_1_0_2.finalize_reshare_batch(); + + reshare_helper_2 + .add_request_batch(reshare_request_0_1_2) + .unwrap(); + reshare_helper_2 + .add_request_batch(reshare_request_1_0_2) + .unwrap(); + + let reshare_batch = reshare_helper_2.try_handle_batch().unwrap().unwrap(); + + for (idx, (left, right)) in party2_db_left + .iter() + .zip(party2_db_right.iter()) + .enumerate() + { + assert_eq!(&left.code, &reshare_batch.left_iris_codes[idx]); + assert_eq!(&left.mask, &reshare_batch.left_masks[idx]); + assert_eq!(&right.code, &reshare_batch.right_iris_codes[idx]); + assert_eq!(&right.mask, &reshare_batch.right_masks[idx]); + } + } +} From 62eee477de71bfeb2e7e708cac937ccea953e3e4 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 22 Nov 2024 14:23:33 +0100 Subject: [PATCH 015/170] fix nginx configuration (#706) --- deploy/stage/common-values-reshare-server.yaml | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 1082b1063..2c6c21613 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -100,14 +100,14 @@ nginxSidecar: } http { - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent"'; + log_format basic '$remote_addr [$time_local] ' + '$status $bytes_sent'; - access_log /dev/stdout main; + access_log /dev/stdout basic; server { - listen 8443 http2 ssl; + listen 8443 ssl; + http2 on; ssl_certificate /etc/nginx/cert/certificate.crt; ssl_certificate_key /etc/nginx/cert/key.pem; @@ -122,6 +122,14 @@ nginxSidecar: location / { # Forward gRPC traffic to the gRPC server on port 8000 grpc_pass grpc://127.0.0.1:8000; + error_page 502 = /error502grpc; # Custom error page for GRPC backend issues + } + + # Custom error page + location = /error502grpc { + internal; + default_type text/plain; + return 502 "Bad Gateway: gRPC server unreachable."; } } } From fc79e84a5b71cabc423d063577ac0d8bf48560df Mon Sep 17 00:00:00 2001 From: iliailia Date: Fri, 22 Nov 2024 17:14:51 +0100 Subject: [PATCH 016/170] Basic gRPC networking for SMPC (#698) --- Cargo.lock | 22 + deny.toml | 1 + iris-mpc-cpu/Cargo.toml | 10 + iris-mpc-cpu/benches/hnsw.rs | 16 +- iris-mpc-cpu/build.rs | 6 + iris-mpc-cpu/src/execution/local.rs | 120 +++- iris-mpc-cpu/src/execution/session.rs | 6 +- iris-mpc-cpu/src/hawkers/galois_store.rs | 248 +++++---- iris-mpc-cpu/src/hawkers/plaintext_store.rs | 56 +- iris-mpc-cpu/src/lib.rs | 2 + iris-mpc-cpu/src/network/grpc.rs | 512 ++++++++++++++++++ iris-mpc-cpu/src/network/local.rs | 4 +- iris-mpc-cpu/src/network/mod.rs | 7 + iris-mpc-cpu/src/proto/party_node.proto | 13 + iris-mpc-cpu/src/proto_generated/mod.rs | 1 + .../src/proto_generated/party_node.rs | 297 ++++++++++ iris-mpc-cpu/src/protocol/ops.rs | 2 +- 17 files changed, 1173 insertions(+), 150 deletions(-) create mode 100644 iris-mpc-cpu/build.rs create mode 100644 iris-mpc-cpu/src/network/grpc.rs create mode 100644 iris-mpc-cpu/src/proto/party_node.proto create mode 100644 iris-mpc-cpu/src/proto_generated/mod.rs create mode 100644 iris-mpc-cpu/src/proto_generated/party_node.rs diff --git a/Cargo.lock b/Cargo.lock index 811329012..34bc732cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -781,6 +781,20 @@ dependencies = [ "tracing", ] +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "futures-core", + "getrandom", + "instant", + "pin-project-lite", + "rand", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.71" @@ -2689,7 +2703,9 @@ version = "0.1.0" dependencies = [ "aes-prng 0.2.1 (git+https://github.com/tf-encrypted/aes-prng.git?branch=dragos%2Fdisplay)", "async-channel", + "async-stream 0.2.1", "async-trait", + "backoff", "bincode", "bytemuck", "bytes", @@ -2701,12 +2717,18 @@ dependencies = [ "iris-mpc-common", "itertools 0.13.0", "num-traits", + "prost", "rand", "rstest", "serde", + "serde_json", "static_assertions", "tokio", + "tokio-stream", + "tonic", + "tonic-build", "tracing", + "tracing-subscriber", "tracing-test", ] diff --git a/deny.toml b/deny.toml index 26dd67235..4eb13b5fa 100644 --- a/deny.toml +++ b/deny.toml @@ -35,6 +35,7 @@ allow = [ "MIT", "MPL-2.0", # Although this is copyleft, it is scoped to modifying the original files "OpenSSL", + "Unicode-3.0", "Unicode-DFS-2016", "Unlicense", "Zlib", diff --git a/iris-mpc-cpu/Cargo.toml b/iris-mpc-cpu/Cargo.toml index 726d3298c..99bfe2a4b 100644 --- a/iris-mpc-cpu/Cargo.toml +++ b/iris-mpc-cpu/Cargo.toml @@ -10,7 +10,9 @@ repository.workspace = true [dependencies] aes-prng = { git = "https://github.com/tf-encrypted/aes-prng.git", branch = "dragos/display"} async-channel = "2.3.1" +async-stream = "0.2" async-trait = "~0.1" +backoff = {version="0.4.0", features = ["tokio"]} bincode = "1.3.3" bytes = "1.7" bytemuck.workspace = true @@ -21,17 +23,25 @@ hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "4e6 iris-mpc-common = { path = "../iris-mpc-common" } itertools.workspace = true num-traits.workspace = true +prost = "0.13" rand.workspace = true rstest = "0.23.0" serde.workspace = true +serde_json.workspace = true static_assertions.workspace = true tokio.workspace = true +tokio-stream = "0.1" +tonic = "0.12.3" tracing.workspace = true +tracing-subscriber.workspace = true tracing-test = "0.2.5" [dev-dependencies] criterion = { version = "0.5.1", features = ["async_tokio"] } +[build-dependencies] +tonic-build = "0.12.3" + [[bench]] name = "hnsw" harness = false diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index 15013dad6..6eaece7b8 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -5,7 +5,7 @@ use iris_mpc_common::iris_db::{db::IrisDB, iris::IrisCode}; use iris_mpc_cpu::{ database_generators::{create_random_sharing, generate_galois_iris_shares}, execution::local::LocalRuntime, - hawkers::{galois_store::gr_create_ready_made_hawk_searcher, plaintext_store::PlaintextStore}, + hawkers::{galois_store::LocalNetAby3NgStoreProtocol, plaintext_store::PlaintextStore}, protocol::ops::{cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3}, }; use rand::SeedableRng; @@ -89,7 +89,7 @@ fn bench_hnsw_primitives(c: &mut Criterion) { let t1 = create_random_sharing(&mut rng, 10_u16); let t2 = create_random_sharing(&mut rng, 10_u16); - let runtime = LocalRuntime::replicated_test_config().await.unwrap(); + let runtime = LocalRuntime::mock_setup_with_channel().await.unwrap(); let mut jobs = JoinSet::new(); for (index, player) in runtime.identities.iter().enumerate() { @@ -116,7 +116,7 @@ fn bench_gr_primitives(c: &mut Criterion) { .build() .unwrap(); b.to_async(&rt).iter(|| async move { - let runtime = LocalRuntime::replicated_test_config().await.unwrap(); + let runtime = LocalRuntime::mock_setup_with_channel().await.unwrap(); let mut rng = AesRng::seed_from_u64(0); let iris_db = IrisDB::new_random_rng(4, &mut rng).db; @@ -174,9 +174,12 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let (_, secret_searcher) = rt.block_on(async move { let mut rng = AesRng::seed_from_u64(0_u64); - gr_create_ready_made_hawk_searcher(&mut rng, database_size) - .await - .unwrap() + LocalNetAby3NgStoreProtocol::lazy_random_setup_with_local_channel( + &mut rng, + database_size, + ) + .await + .unwrap() }); group.bench_function( @@ -215,7 +218,6 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { .await; }); } - jobs.join_all().await; }, criterion::BatchSize::SmallInput, diff --git a/iris-mpc-cpu/build.rs b/iris-mpc-cpu/build.rs new file mode 100644 index 000000000..cf3860392 --- /dev/null +++ b/iris-mpc-cpu/build.rs @@ -0,0 +1,6 @@ +fn main() { + tonic_build::configure() + .out_dir("src/proto_generated") + .compile_protos(&["src/proto/party_node.proto"], &["src/proto"]) + .unwrap_or_else(|e| panic!("Failed to compile protos {:?}", e)); +} diff --git a/iris-mpc-cpu/src/execution/local.rs b/iris-mpc-cpu/src/execution/local.rs index 12e9f8a99..5dba6bf91 100644 --- a/iris-mpc-cpu/src/execution/local.rs +++ b/iris-mpc-cpu/src/execution/local.rs @@ -3,11 +3,14 @@ use crate::{ player::*, session::{BootSession, Session, SessionHandles, SessionId}, }, - network::local::LocalNetworkingStore, + network::{grpc::setup_local_grpc_networking, local::LocalNetworkingStore, NetworkType}, protocol::{ops::setup_replicated_prf, prf::PrfSeed}, }; -use std::{collections::HashMap, sync::Arc}; -use tokio::task::JoinSet; +use std::{ + collections::{HashMap, HashSet}, + sync::{Arc, LazyLock}, +}; +use tokio::{sync::Mutex, task::JoinSet}; pub fn generate_local_identities() -> Vec { vec![ @@ -17,6 +20,25 @@ pub fn generate_local_identities() -> Vec { ] } +static USED_PORTS: LazyLock>> = LazyLock::new(|| Mutex::new(HashSet::new())); + +pub async fn get_free_local_addresses(num_ports: usize) -> eyre::Result> { + let mut addresses = vec![]; + let mut listeners = vec![]; + while addresses.len() < num_ports { + let listener = std::net::TcpListener::bind("127.0.0.1:0")?; + let port = listener.local_addr()?.port(); + if USED_PORTS.lock().await.insert(port) { + addresses.push(format!("127.0.0.1:{port}")); + listeners.push(listener); + } else { + tracing::warn!("Port {port} already in use, retrying"); + } + } + tracing::info!("Found free addresses: {addresses:?}"); + Ok(addresses) +} + #[derive(Debug, Clone)] pub struct LocalRuntime { pub identities: Vec, @@ -27,7 +49,7 @@ pub struct LocalRuntime { } impl LocalRuntime { - pub async fn replicated_test_config() -> eyre::Result { + pub async fn mock_setup(network_t: NetworkType) -> eyre::Result { let num_parties = 3; let identities = generate_local_identities(); let mut seeds = Vec::new(); @@ -36,28 +58,64 @@ impl LocalRuntime { seed[0] = i; seeds.push(seed); } - LocalRuntime::new(identities, seeds).await + LocalRuntime::new_with_network_type(identities, seeds, network_t).await } - pub async fn new(identities: Vec, seeds: Vec) -> eyre::Result { + pub async fn mock_setup_with_channel() -> eyre::Result { + Self::mock_setup(NetworkType::LocalChannel).await + } + + pub async fn new_with_network_type( + identities: Vec, + seeds: Vec, + network_type: NetworkType, + ) -> eyre::Result { let role_assignments: RoleAssignment = identities .iter() .enumerate() .map(|(index, id)| (Role::new(index), id.clone())) .collect(); - let network = LocalNetworkingStore::from_host_ids(&identities); - let sess_id = SessionId::from(0_u128); - let boot_sessions: Vec = (0..seeds.len()) - .map(|i| { - let identity = identities[i].clone(); - BootSession { - session_id: sess_id, - role_assignments: Arc::new(role_assignments.clone()), - networking: Arc::new(network.get_local_network(identity.clone())), - own_identity: identity, + let sess_id = SessionId::from(0_u64); + let boot_sessions = match network_type { + NetworkType::LocalChannel => { + let network = LocalNetworkingStore::from_host_ids(&identities); + let boot_sessions: Vec = (0..seeds.len()) + .map(|i| { + let identity = identities[i].clone(); + BootSession { + session_id: sess_id, + role_assignments: Arc::new(role_assignments.clone()), + networking: Arc::new(network.get_local_network(identity.clone())), + own_identity: identity, + } + }) + .collect(); + boot_sessions + } + NetworkType::GrpcChannel => { + let networks = setup_local_grpc_networking(identities.clone()).await?; + let mut jobs = JoinSet::new(); + for player in networks.iter() { + let player = player.clone(); + jobs.spawn(async move { + player.create_session(sess_id).await.unwrap(); + }); } - }) - .collect(); + jobs.join_all().await; + let boot_sessions: Vec = (0..seeds.len()) + .map(|i| { + let identity = identities[i].clone(); + BootSession { + session_id: sess_id, + role_assignments: Arc::new(role_assignments.clone()), + networking: Arc::new(networks[i].clone()), + own_identity: identity, + } + }) + .collect(); + boot_sessions + } + }; let mut jobs = JoinSet::new(); for (player_id, boot_session) in boot_sessions.iter().enumerate() { @@ -83,4 +141,30 @@ impl LocalRuntime { sessions, }) } + + pub async fn new(identities: Vec, seeds: Vec) -> eyre::Result { + Self::new_with_network_type(identities, seeds, NetworkType::LocalChannel).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_get_free_local_addresses() { + let mut jobs = JoinSet::new(); + let num_ports = 3; + + for _ in 0..100 { + jobs.spawn(async move { + let mut addresses = get_free_local_addresses(num_ports).await.unwrap(); + assert_eq!(addresses.len(), num_ports); + addresses.sort(); + addresses.dedup(); + assert_eq!(addresses.len(), num_ports); + }); + } + jobs.join_all().await; + } } diff --git a/iris-mpc-cpu/src/execution/session.rs b/iris-mpc-cpu/src/execution/session.rs index 2b857d9f3..fba4403ea 100644 --- a/iris-mpc-cpu/src/execution/session.rs +++ b/iris-mpc-cpu/src/execution/session.rs @@ -8,10 +8,10 @@ use serde::{Deserialize, Serialize}; use std::{collections::HashMap, fmt::Debug, sync::Arc}; #[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct SessionId(pub u128); +pub struct SessionId(pub u64); -impl From for SessionId { - fn from(id: u128) -> Self { +impl From for SessionId { + fn from(id: u64) -> Self { SessionId(id) } } diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index 8e975a377..2c68dd83f 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -7,6 +7,7 @@ use crate::{ session::Session, }, hawkers::plaintext_store::PointId, + network::NetworkType, protocol::ops::{ compare_threshold_and_open, cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3, @@ -75,6 +76,7 @@ pub fn setup_local_player_preloaded_db( pub async fn setup_local_aby3_players_with_preloaded_db( rng: &mut R, database: Vec, + network_t: NetworkType, ) -> eyre::Result> { let identities = generate_local_identities(); @@ -91,7 +93,7 @@ pub async fn setup_local_aby3_players_with_preloaded_db( .into_iter() .map(|player_irises| setup_local_player_preloaded_db(player_irises).unwrap()) .collect(); - let runtime = LocalRuntime::replicated_test_config().await?; + let runtime = LocalRuntime::mock_setup(network_t).await?; let local_stores = identities .into_iter() @@ -134,8 +136,10 @@ impl LocalNetAby3NgStoreProtocol { } } -pub async fn setup_local_store_aby3_players() -> eyre::Result> { - let runtime = LocalRuntime::replicated_test_config().await?; +pub async fn setup_local_store_aby3_players( + network_t: NetworkType, +) -> eyre::Result> { + let runtime = LocalRuntime::mock_setup(network_t).await?; let players = generate_local_identities(); let local_stores = players .into_iter() @@ -279,113 +283,116 @@ impl LocalNetAby3NgStoreProtocol { } } -pub async fn gr_create_ready_made_hawk_searcher( - rng: &mut R, - database_size: usize, -) -> eyre::Result<( - (PlaintextStore, GraphMem), - Vec<( - LocalNetAby3NgStoreProtocol, - GraphMem, - )>, -)> { - // makes sure the searcher produces same graph structure by having the same rng - let mut rng_searcher1 = AesRng::from_rng(rng.clone())?; - let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; - - let mut plaintext_vector_store = PlaintextStore::default(); - let mut plaintext_graph_store = GraphMem::new(); - let searcher = HawkSearcher::default(); - - for raw_query in cleartext_database.iter() { - let query = plaintext_vector_store.prepare_query(raw_query.clone()); - let neighbors = searcher - .search_to_insert( - &mut plaintext_vector_store, - &mut plaintext_graph_store, - &query, - ) - .await; - let inserted = plaintext_vector_store.insert(&query).await; - searcher - .insert_from_search_results( - &mut plaintext_vector_store, - &mut plaintext_graph_store, - &mut rng_searcher1, - inserted, - neighbors, - ) - .await; - } +impl LocalNetAby3NgStoreProtocol { + /// Generates 3 pairs of vector stores and graphs from a random plaintext + /// vector store and graph, which are returned as well. + pub async fn lazy_random_setup( + rng: &mut R, + database_size: usize, + network_t: NetworkType, + ) -> eyre::Result<( + (PlaintextStore, GraphMem), + Vec<(Self, GraphMem)>, + )> { + let (cleartext_database, plaintext_vector_store, plaintext_graph_store) = + PlaintextStore::create_random(rng, database_size).await?; + + let protocol_stores = + setup_local_aby3_players_with_preloaded_db(rng, cleartext_database, network_t).await?; - let protocol_stores = - setup_local_aby3_players_with_preloaded_db(rng, cleartext_database).await?; + let mut jobs = JoinSet::new(); + for store in protocol_stores.iter() { + let mut store = store.clone(); + let plaintext_graph_store = plaintext_graph_store.clone(); + jobs.spawn(async move { + ( + store.clone(), + store.graph_from_plain(&plaintext_graph_store).await, + ) + }); + } + let mut secret_shared_stores = jobs.join_all().await; + secret_shared_stores.sort_by_key(|(store, _)| store.get_owner_index()); + let plaintext = (plaintext_vector_store, plaintext_graph_store); + Ok((plaintext, secret_shared_stores)) + } - let mut jobs = JoinSet::new(); - for store in protocol_stores.into_iter() { - let mut store = store; - let plaintext_graph_store = plaintext_graph_store.clone(); - jobs.spawn(async move { - let graph = store.graph_from_plain(&plaintext_graph_store).await; - (store, graph) - }); + /// Generates 3 pairs of vector stores and graphs from a random plaintext + /// vector store and graph, which are returned as well. Networking is + /// based on local async_channel. + pub async fn lazy_random_setup_with_local_channel( + rng: &mut R, + database_size: usize, + ) -> eyre::Result<( + (PlaintextStore, GraphMem), + Vec<( + LocalNetAby3NgStoreProtocol, + GraphMem, + )>, + )> { + Self::lazy_random_setup(rng, database_size, NetworkType::LocalChannel).await } - let mut secret_shared_stores = jobs.join_all().await; - secret_shared_stores.sort_by_key(|(store, _)| store.get_owner_index()); - let plaintext = (plaintext_vector_store, plaintext_graph_store); - Ok((plaintext, secret_shared_stores)) -} -pub async fn ng_create_from_scratch_hawk_searcher( - rng: &mut R, - database_size: usize, -) -> eyre::Result< - Vec<( - LocalNetAby3NgStoreProtocol, - GraphMem, - )>, -> { - let rng_searcher = AesRng::from_rng(rng.clone())?; - let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; - let shared_irises: Vec<_> = (0..database_size) - .map(|id| generate_galois_iris_shares(rng, cleartext_database[id].clone())) - .collect(); + /// Generates 3 pairs of vector stores and graphs corresponding to each + /// local player. + pub async fn shared_random_setup( + rng: &mut R, + database_size: usize, + network_t: NetworkType, + ) -> eyre::Result)>> { + let rng_searcher = AesRng::from_rng(rng.clone())?; + let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; + let shared_irises: Vec<_> = (0..database_size) + .map(|id| generate_galois_iris_shares(rng, cleartext_database[id].clone())) + .collect(); - let local_stores = setup_local_store_aby3_players().await?; + let mut local_stores = setup_local_store_aby3_players(network_t).await?; - let mut jobs = JoinSet::new(); - for store in local_stores.into_iter() { - let mut store = store; - let role = store.get_owner_index(); - let mut rng_searcher = rng_searcher.clone(); - let queries = (0..database_size) - .map(|id| store.prepare_query(shared_irises[id][role].clone())) - .collect::>(); - jobs.spawn(async move { - let mut graph_store = GraphMem::new(); - let searcher = HawkSearcher::default(); - // insert queries - for query in queries.iter() { - let neighbors = searcher - .search_to_insert(&mut store, &mut graph_store, query) - .await; - searcher - .insert_from_search_results( - &mut store, - &mut graph_store, - &mut rng_searcher, - *query, - neighbors, - ) - .await; - } - (store, graph_store) + let mut jobs = JoinSet::new(); + for store in local_stores.iter_mut() { + let mut store = store.clone(); + let role = store.get_owner_index(); + let mut rng_searcher = rng_searcher.clone(); + let queries = (0..database_size) + .map(|id| store.prepare_query(shared_irises[id][role].clone())) + .collect::>(); + jobs.spawn(async move { + let mut graph_store = GraphMem::new(); + let searcher = HawkSearcher::default(); + // insert queries + for query in queries.iter() { + let neighbors = searcher + .search_to_insert(&mut store, &mut graph_store, query) + .await; + searcher + .insert_from_search_results( + &mut store, + &mut graph_store, + &mut rng_searcher, + *query, + neighbors, + ) + .await; + } + (store, graph_store) + }); + } + let mut result = jobs.join_all().await; + // preserve order of players + result.sort_by(|(store1, _), (store2, _)| { + store1.get_owner_index().cmp(&store2.get_owner_index()) }); + Ok(result) + } + + /// Generates 3 pairs of vector stores and graphs corresponding to each + /// local player. Networking is based on local async_channel. + pub async fn shared_random_setup_with_local_channel( + rng: &mut R, + database_size: usize, + ) -> eyre::Result)>> { + Self::shared_random_setup(rng, database_size, NetworkType::LocalChannel).await } - let mut result = jobs.join_all().await; - // preserve order of players - result.sort_by_key(|(store, _)| store.get_owner_index()); - Ok(result) } #[cfg(test)] @@ -408,7 +415,9 @@ mod tests { .map(|iris| generate_galois_iris_shares(&mut rng, iris.clone())) .collect(); - let mut stores = setup_local_store_aby3_players().await.unwrap(); + let mut stores = setup_local_store_aby3_players(NetworkType::LocalChannel) + .await + .unwrap(); let mut jobs = JoinSet::new(); for store in stores.iter_mut() { @@ -466,15 +475,20 @@ mod tests { async fn test_gr_premade_hnsw() { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 10; - let (mut cleartext_data, secret_data) = - gr_create_ready_made_hawk_searcher(&mut rng, database_size) - .await - .unwrap(); + let network_t = NetworkType::LocalChannel; + let (mut cleartext_data, secret_data) = LocalNetAby3NgStoreProtocol::lazy_random_setup( + &mut rng, + database_size, + network_t.clone(), + ) + .await + .unwrap(); let mut rng = AesRng::seed_from_u64(0_u64); - let vector_graph_stores = ng_create_from_scratch_hawk_searcher(&mut rng, database_size) - .await - .unwrap(); + let vector_graph_stores = + LocalNetAby3NgStoreProtocol::shared_random_setup(&mut rng, database_size, network_t) + .await + .unwrap(); for ((v_from_scratch, _), (premade_v, _)) in vector_graph_stores.iter().zip(secret_data.iter()) @@ -539,7 +553,9 @@ mod tests { .iter() .map(|iris| generate_galois_iris_shares(&mut rng, iris.clone())) .collect(); - let mut local_stores = setup_local_store_aby3_players().await.unwrap(); + let mut local_stores = setup_local_store_aby3_players(NetworkType::LocalChannel) + .await + .unwrap(); // Now do the work for the plaintext store let mut plaintext_store = PlaintextStore::default(); let plaintext_preps: Vec<_> = (0..db_dim) @@ -623,9 +639,13 @@ mod tests { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 2; let searcher = HawkSearcher::default(); - let mut vectors_and_graphs = ng_create_from_scratch_hawk_searcher(&mut rng, database_size) - .await - .unwrap(); + let mut vectors_and_graphs = LocalNetAby3NgStoreProtocol::shared_random_setup( + &mut rng, + database_size, + NetworkType::LocalChannel, + ) + .await + .unwrap(); for i in 0..database_size { let mut jobs = JoinSet::new(); diff --git a/iris-mpc-cpu/src/hawkers/plaintext_store.rs b/iris-mpc-cpu/src/hawkers/plaintext_store.rs index 7111425b5..6c69e6355 100644 --- a/iris-mpc-cpu/src/hawkers/plaintext_store.rs +++ b/iris-mpc-cpu/src/hawkers/plaintext_store.rs @@ -1,5 +1,10 @@ -use hawk_pack::VectorStore; -use iris_mpc_common::iris_db::iris::{IrisCode, MATCH_THRESHOLD_RATIO}; +use aes_prng::AesRng; +use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher, VectorStore}; +use iris_mpc_common::iris_db::{ + db::IrisDB, + iris::{IrisCode, MATCH_THRESHOLD_RATIO}, +}; +use rand::{CryptoRng, RngCore, SeedableRng}; use std::ops::{Index, IndexMut}; #[derive(Default, Debug, Clone)] @@ -129,10 +134,51 @@ impl VectorStore for PlaintextStore { } } +impl PlaintextStore { + pub async fn create_random( + rng: &mut R, + database_size: usize, + ) -> eyre::Result<(Vec, Self, GraphMem)> { + // makes sure the searcher produces same graph structure by having the same rng + let mut rng_searcher1 = AesRng::from_rng(rng.clone())?; + let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; + + let mut plaintext_vector_store = PlaintextStore::default(); + let mut plaintext_graph_store = GraphMem::new(); + let searcher = HawkSearcher::default(); + + for raw_query in cleartext_database.iter() { + let query = plaintext_vector_store.prepare_query(raw_query.clone()); + let neighbors = searcher + .search_to_insert( + &mut plaintext_vector_store, + &mut plaintext_graph_store, + &query, + ) + .await; + let inserted = plaintext_vector_store.insert(&query).await; + searcher + .insert_from_search_results( + &mut plaintext_vector_store, + &mut plaintext_graph_store, + &mut rng_searcher1, + inserted, + neighbors, + ) + .await; + } + + Ok(( + cleartext_database, + plaintext_vector_store, + plaintext_graph_store, + )) + } +} + #[cfg(test)] mod tests { use super::*; - use crate::hawkers::galois_store::gr_create_ready_made_hawk_searcher; use aes_prng::AesRng; use hawk_pack::hnsw_db::HawkSearcher; use iris_mpc_common::iris_db::db::IrisDB; @@ -217,8 +263,8 @@ mod tests { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 1; let searcher = HawkSearcher::default(); - let ((mut ptxt_vector, mut ptxt_graph), _) = - gr_create_ready_made_hawk_searcher(&mut rng, database_size) + let (_, mut ptxt_vector, mut ptxt_graph) = + PlaintextStore::create_random(&mut rng, database_size) .await .unwrap(); for i in 0..database_size { diff --git a/iris-mpc-cpu/src/lib.rs b/iris-mpc-cpu/src/lib.rs index fb378ddd0..1a74801f0 100644 --- a/iris-mpc-cpu/src/lib.rs +++ b/iris-mpc-cpu/src/lib.rs @@ -2,5 +2,7 @@ pub mod database_generators; pub mod execution; pub mod hawkers; pub(crate) mod network; +#[rustfmt::skip] +pub(crate) mod proto_generated; pub mod protocol; pub(crate) mod shares; diff --git a/iris-mpc-cpu/src/network/grpc.rs b/iris-mpc-cpu/src/network/grpc.rs new file mode 100644 index 000000000..81bbe5bd0 --- /dev/null +++ b/iris-mpc-cpu/src/network/grpc.rs @@ -0,0 +1,512 @@ +use super::Networking; +use crate::{ + execution::{local::get_free_local_addresses, player::Identity}, + network::SessionId, + proto_generated::party_node::{ + party_node_client::PartyNodeClient, + party_node_server::{PartyNode, PartyNodeServer}, + SendRequest, SendResponse, + }, +}; +use backoff::{future::retry, ExponentialBackoff}; +use dashmap::DashMap; +use eyre::{eyre, OptionExt}; +use std::{str::FromStr, sync::Arc}; +use tonic::{ + async_trait, + metadata::AsciiMetadataValue, + transport::{Channel, Server}, + Request, Response, Status, +}; + +type TonicResult = Result; + +fn err_to_status(e: eyre::Error) -> Status { + Status::internal(e.to_string()) +} + +#[derive(Clone)] +struct QueueChannel { + pub sender: Arc>>, + pub receiver: Arc>>, +} + +#[derive(Clone)] +struct MessageQueueStore { + queues: DashMap, +} + +impl MessageQueueStore { + fn new() -> Self { + MessageQueueStore { + queues: DashMap::new(), + } + } + + pub fn add_channel(&self, party_id: &Identity) -> QueueChannel { + // check that the party_id is not already in the queues + if self.queues.contains_key(party_id) { + return self.queues.get(party_id).unwrap().clone(); + } + let (sender, receiver) = async_channel::unbounded(); + let channel = QueueChannel { + sender: Arc::new(sender), + receiver: Arc::new(receiver), + }; + self.queues.insert(party_id.clone(), channel.clone()); + channel + } + + fn get_channel(&self, party_id: &Identity) -> eyre::Result { + let channel = self.queues.get(party_id).ok_or_eyre(format!( + "Channel not found for party {:?}, existing channels: alice {}, bob {}, charlie {}", + party_id, + self.queues.contains_key(&Identity("alice".into())), + self.queues.contains_key(&Identity("bob".into())), + self.queues.contains_key(&Identity("charlie".into())) + ))?; + Ok((*channel).clone()) + } + + pub async fn push_back(&self, party_id: &Identity, value: Vec) -> eyre::Result<()> { + let channel = self.get_channel(party_id)?; + // sends the value via the channel sender; if failed, returns an error + channel.sender.send(value).await.map_err(|e| e.into()) + } + + pub async fn pop_front(&self, party_id: &Identity) -> eyre::Result> { + let channel = self.get_channel(party_id)?; + channel.receiver.recv().await.map_err(|e| e.into()) + } +} + +#[derive(Clone)] +pub struct GrpcNetworking { + party_id: Identity, + // other party id -> client to call that party + clients: Arc>>, + message_queues: Arc>, +} + +impl GrpcNetworking { + pub fn new(party_id: Identity) -> Self { + GrpcNetworking { + party_id, + clients: Arc::new(DashMap::new()), + message_queues: Arc::new(DashMap::new()), + } + } + + pub async fn connect_to_party(&self, party_id: Identity, address: &str) -> eyre::Result<()> { + let client = PartyNodeClient::connect(address.to_string()).await?; + self.clients.insert(party_id, client); + Ok(()) + } + + pub async fn create_session(&self, session_id: SessionId) -> eyre::Result<()> { + if self.message_queues.contains_key(&session_id) { + return Err(eyre!("Session already exists")); + } + + let queue = MessageQueueStore::new(); + for client in self.clients.iter() { + queue.add_channel(client.key()); + } + self.message_queues.insert(session_id, queue); + Ok(()) + } +} + +// Server implementation +#[async_trait] +impl PartyNode for GrpcNetworking { + async fn send_message( + &self, + request: Request, + ) -> TonicResult> { + let sender_id: Identity = request + .metadata() + .get("sender_id") + .ok_or(Status::unauthenticated("Sender ID not found"))? + .to_str() + .map_err(|_| Status::unauthenticated("Sender ID not found"))? + .to_string() + .into(); + if sender_id == self.party_id { + return Err(Status::unauthenticated(format!( + "Sender ID coincides with receiver ID: {:?}", + sender_id + ))); + } + let session_id: u64 = request + .metadata() + .get("session_id") + .ok_or(Status::not_found("Seesion ID no found"))? + .to_str() + .map_err(|_| Status::not_found("Session ID not found"))? + .parse() + .map_err(|_| Status::invalid_argument("Session ID not a u64 number"))?; + let session_id = SessionId::from(session_id); + let message_queue = self + .message_queues + .get(&session_id) + .ok_or(Status::not_found(format!( + "Session {:?} hasn't been created", + session_id + )))?; + message_queue + .push_back(&sender_id, request.into_inner().data) + .await + .map_err(err_to_status)?; + Ok(Response::new(SendResponse {})) + } +} + +// Client implementation +#[async_trait] +impl Networking for GrpcNetworking { + async fn send( + &self, + value: Vec, + receiver: &Identity, + session_id: &SessionId, + ) -> eyre::Result<()> { + let backoff = ExponentialBackoff { + max_elapsed_time: Some(std::time::Duration::from_secs(2)), + max_interval: std::time::Duration::from_secs(1), + multiplier: 1.1, + ..Default::default() + }; + retry(backoff, || async { + // Send message via gRPC client + let mut client = self + .clients + .get(receiver) + .ok_or_eyre(format!("Client not found {:?}", receiver))? + .clone(); + let mut request = Request::new(SendRequest { + data: value.clone(), + }); + request.metadata_mut().append( + "sender_id", + AsciiMetadataValue::from_str(&self.party_id.0).unwrap(), + ); + request.metadata_mut().append( + "session_id", + AsciiMetadataValue::from_str(&session_id.0.to_string()).unwrap(), + ); + tracing::trace!( + "Sending message {:?} from {:?} to {:?}", + value, + self.party_id, + receiver + ); + let _response = client + .send_message(request) + .await + .map_err(|err| eyre!(err.to_string()))?; + tracing::trace!( + "SUCCESS: Sending message {:?} from {:?} to {:?}", + value, + self.party_id, + receiver + ); + Ok(()) + }) + .await + } + + async fn receive(&self, sender: &Identity, session_id: &SessionId) -> eyre::Result> { + // Just retrieve the first message from the corresponding queue + self.message_queues + .get(session_id) + .ok_or_eyre(format!( + "Session {session_id:?} hasn't been added to message queues" + ))? + .pop_front(sender) + .await + } +} + +pub async fn setup_local_grpc_networking( + parties: Vec, +) -> eyre::Result> { + let players = parties + .iter() + .map(|party| GrpcNetworking::new(party.clone())) + .collect::>(); + + let addresses = get_free_local_addresses(players.len()).await?; + + let players_addresses = players + .iter() + .cloned() + .zip(addresses.iter().cloned()) + .collect::>(); + + // Initialize servers + for (player, addr) in &players_addresses { + let player = player.clone(); + let socket = addr.parse().unwrap(); + tokio::spawn(async move { + Server::builder() + .add_service(PartyNodeServer::new(player)) + .serve(socket) + .await + .unwrap(); + }); + } + + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + + // Connect to each other + for (player, addr) in &players_addresses { + for (other_player, other_addr) in &players_addresses.clone() { + if addr != other_addr { + let other_addr = format!("http://{}", other_addr); + player + .connect_to_party(other_player.party_id.clone(), &other_addr) + .await + .unwrap(); + } + } + } + + Ok(players) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + execution::{local::generate_local_identities, player::Role}, + hawkers::galois_store::LocalNetAby3NgStoreProtocol, + }; + use aes_prng::AesRng; + use hawk_pack::hnsw_db::HawkSearcher; + use rand::SeedableRng; + use std::time::Duration; + use tokio::task::JoinSet; + use tracing_test::traced_test; + + #[tokio::test(flavor = "multi_thread")] + #[traced_test] + async fn test_grpc_comms_correct() -> eyre::Result<()> { + let identities = generate_local_identities(); + let players = setup_local_grpc_networking(identities.clone()).await?; + + let mut jobs = JoinSet::new(); + + // Simple session with one message sent from one party to another + { + let alice = players[0].clone(); + let bob = players[1].clone(); + + let session_id = SessionId::from(0); + + jobs.spawn(async move { + // Send a message from the first party to the second party + let message = b"Hey, Bob. I'm Alice. Do you copy?".to_vec(); + let message_copy = message.clone(); + + let task1 = tokio::spawn(async move { + alice.create_session(session_id).await.unwrap(); + // Add a delay to ensure that the session is created before sending + tokio::time::sleep(Duration::from_millis(100)).await; + alice + .send(message.clone(), &"bob".into(), &session_id) + .await + .unwrap(); + }); + let task2 = tokio::spawn(async move { + bob.create_session(session_id).await.unwrap(); + // Add a delay to ensure that the session is created before receiving + tokio::time::sleep(Duration::from_millis(100)).await; + let received_message = bob.receive(&"alice".into(), &session_id).await.unwrap(); + assert_eq!(message_copy, received_message); + }); + let _ = tokio::try_join!(task1, task2).unwrap(); + }); + } + + // Each party sending and receiving messages to each other + { + jobs.spawn(async move { + let session_id = SessionId::from(1); + + let mut tasks = JoinSet::new(); + // Send messages + for (player_id, player) in players.iter().enumerate() { + let role = Role::new(player_id); + let next = role.next(3).index(); + let prev = role.prev(3).index(); + + let player = player.clone(); + let next_id = identities[next].clone(); + let prev_id = identities[prev].clone(); + + tasks.spawn(async move { + player.create_session(session_id).await.unwrap(); + // Add a delay to ensure that the session is created before + // sending/receiving + tokio::time::sleep(Duration::from_millis(100)).await; + + // Sending + let msg_to_next = + format!("From player {} to player {} with love", player_id, next) + .into_bytes(); + let msg_to_prev = + format!("From player {} to player {} with love", player_id, prev) + .into_bytes(); + player + .send(msg_to_next.clone(), &next_id, &session_id) + .await + .unwrap(); + player + .send(msg_to_prev.clone(), &prev_id, &session_id) + .await + .unwrap(); + + // Receiving + let received_msg_from_prev = + player.receive(&prev_id, &session_id).await.unwrap(); + let expected_msg_from_prev = + format!("From player {} to player {} with love", prev, player_id) + .into_bytes(); + assert_eq!(received_msg_from_prev, expected_msg_from_prev); + let received_msg_from_next = + player.receive(&next_id, &session_id).await.unwrap(); + let expected_msg_from_next = + format!("From player {} to player {} with love", next, player_id) + .into_bytes(); + assert_eq!(received_msg_from_next, expected_msg_from_next); + }); + } + tasks.join_all().await; + }); + } + + jobs.join_all().await; + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + #[traced_test] + async fn test_grpc_comms_fail() -> eyre::Result<()> { + let parties = generate_local_identities(); + + let players = setup_local_grpc_networking(parties.clone()).await?; + + let mut jobs = JoinSet::new(); + + // Send to a non-existing party + { + let alice = players[0].clone(); + jobs.spawn(async move { + let session_id = SessionId::from(2); + alice.create_session(session_id).await.unwrap(); + + let message = b"Hey, Eve. I'm Alice. Do you copy?".to_vec(); + let res = alice + .send(message.clone(), &Identity::from("eve"), &session_id) + .await; + assert!(res.is_err()); + }); + } + + // Receive from a wrong party + { + let alice = players[0].clone(); + jobs.spawn(async move { + let session_id = SessionId::from(3); + alice.create_session(session_id).await.unwrap(); + + let res = alice.receive(&Identity::from("eve"), &session_id).await; + assert!(res.is_err()); + }); + } + + // Send to itself + { + let alice = players[0].clone(); + jobs.spawn(async move { + let session_id = SessionId::from(4); + alice.create_session(session_id).await.unwrap(); + + let message = b"Hey, Alice. I'm Alice. Do you copy?".to_vec(); + let res = alice + .send(message.clone(), &Identity::from("alice"), &session_id) + .await; + assert!(res.is_err()); + }); + } + + // Add the same session + { + let alice = players[0].clone(); + jobs.spawn(async move { + let session_id = SessionId::from(4); + + // Delay to ensure that the session is created in the previous example + tokio::time::sleep(Duration::from_millis(100)).await; + let res = alice.create_session(session_id).await; + + assert!(res.is_err()); + }); + } + + // Retrieve from a non-existing session + { + let alice = players[0].clone(); + jobs.spawn(async move { + let session_id = SessionId::from(50); + + let message = b"Hey, Bob. I'm Alice. Do you copy?".to_vec(); + let res = alice + .send(message.clone(), &Identity::from("bob"), &session_id) + .await; + assert!(res.is_err()); + let res = alice.receive(&Identity::from("bob"), &session_id).await; + assert!(res.is_err()); + }); + } + + jobs.join_all().await; + + Ok(()) + } + + #[tokio::test] + async fn test_hnsw_local() { + let mut rng = AesRng::seed_from_u64(0_u64); + let database_size = 2; + let searcher = HawkSearcher::default(); + let mut vectors_and_graphs = LocalNetAby3NgStoreProtocol::shared_random_setup( + &mut rng, + database_size, + crate::network::NetworkType::GrpcChannel, + ) + .await + .unwrap(); + + for i in 0..database_size { + let mut jobs = JoinSet::new(); + for (store, graph) in vectors_and_graphs.iter_mut() { + let mut store = store.clone(); + let mut graph = graph.clone(); + let searcher = searcher.clone(); + jobs.spawn(async move { + let secret_neighbors = searcher + .search_to_insert(&mut store, &mut graph, &i.into()) + .await; + searcher.is_match(&mut store, &secret_neighbors).await + }); + } + let res = jobs.join_all().await; + for (party_index, r) in res.iter().enumerate() { + assert!(r, "Failed at index {:?} by party {:?}", i, party_index); + } + } + } +} diff --git a/iris-mpc-cpu/src/network/local.rs b/iris-mpc-cpu/src/network/local.rs index dbdd9e36e..91269795f 100644 --- a/iris-mpc-cpu/src/network/local.rs +++ b/iris-mpc-cpu/src/network/local.rs @@ -115,7 +115,7 @@ mod tests { let bob = networking_store.get_local_network("bob".into()); let task1 = tokio::spawn(async move { - let recv = bob.receive(&"alice".into(), &1_u128.into()).await; + let recv = bob.receive(&"alice".into(), &1_u64.into()).await; assert_eq!( NetworkValue::from_network(recv).unwrap(), NetworkValue::Ring16(Wrapping::(777)) @@ -124,7 +124,7 @@ mod tests { let task2 = tokio::spawn(async move { let value = NetworkValue::Ring16(Wrapping::(777)); alice - .send(value.to_network(), &"bob".into(), &1_u128.into()) + .send(value.to_network(), &"bob".into(), &1_u64.into()) .await }); diff --git a/iris-mpc-cpu/src/network/mod.rs b/iris-mpc-cpu/src/network/mod.rs index d0261b0ec..edd362833 100644 --- a/iris-mpc-cpu/src/network/mod.rs +++ b/iris-mpc-cpu/src/network/mod.rs @@ -14,5 +14,12 @@ pub trait Networking { async fn receive(&self, sender: &Identity, session_id: &SessionId) -> eyre::Result>; } +#[derive(Clone)] +pub enum NetworkType { + LocalChannel, + GrpcChannel, +} + +pub mod grpc; pub mod local; pub mod value; diff --git a/iris-mpc-cpu/src/proto/party_node.proto b/iris-mpc-cpu/src/proto/party_node.proto new file mode 100644 index 000000000..67a2df623 --- /dev/null +++ b/iris-mpc-cpu/src/proto/party_node.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; + +package party_node; + +service PartyNode { + rpc SendMessage (SendRequest) returns (SendResponse); +} + +message SendRequest { + bytes data = 3; +} + +message SendResponse {} \ No newline at end of file diff --git a/iris-mpc-cpu/src/proto_generated/mod.rs b/iris-mpc-cpu/src/proto_generated/mod.rs new file mode 100644 index 000000000..8c4bf931d --- /dev/null +++ b/iris-mpc-cpu/src/proto_generated/mod.rs @@ -0,0 +1 @@ +pub mod party_node; diff --git a/iris-mpc-cpu/src/proto_generated/party_node.rs b/iris-mpc-cpu/src/proto_generated/party_node.rs new file mode 100644 index 000000000..771569b6a --- /dev/null +++ b/iris-mpc-cpu/src/proto_generated/party_node.rs @@ -0,0 +1,297 @@ +// This file is @generated by prost-build. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SendRequest { + #[prost(bytes = "vec", tag = "3")] + pub data: ::prost::alloc::vec::Vec, +} +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct SendResponse {} +/// Generated client implementations. +pub mod party_node_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct PartyNodeClient { + inner: tonic::client::Grpc, + } + impl PartyNodeClient { + /// Attempt to create a new client by connecting to a given endpoint. + pub async fn connect(dst: D) -> Result + where + D: TryInto, + D::Error: Into, + { + let conn = tonic::transport::Endpoint::new(dst)?.connect().await?; + Ok(Self::new(conn)) + } + } + impl PartyNodeClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> PartyNodeClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + PartyNodeClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn send_message( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/party_node.PartyNode/SendMessage", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("party_node.PartyNode", "SendMessage")); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod party_node_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with PartyNodeServer. + #[async_trait] + pub trait PartyNode: std::marker::Send + std::marker::Sync + 'static { + async fn send_message( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + } + #[derive(Debug)] + pub struct PartyNodeServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl PartyNodeServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for PartyNodeServer + where + T: PartyNode, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/party_node.PartyNode/SendMessage" => { + #[allow(non_camel_case_types)] + struct SendMessageSvc(pub Arc); + impl tonic::server::UnaryService + for SendMessageSvc { + type Response = super::SendResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::send_message(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = SendMessageSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new(empty_body()); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for PartyNodeServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "party_node.PartyNode"; + impl tonic::server::NamedService for PartyNodeServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/iris-mpc-cpu/src/protocol/ops.rs b/iris-mpc-cpu/src/protocol/ops.rs index c52e227ee..94dd7897b 100644 --- a/iris-mpc-cpu/src/protocol/ops.rs +++ b/iris-mpc-cpu/src/protocol/ops.rs @@ -552,7 +552,7 @@ mod tests { #[case(1)] #[case(2)] async fn test_galois_ring_to_rep3(#[case] seed: u64) { - let runtime = LocalRuntime::replicated_test_config().await.unwrap(); + let runtime = LocalRuntime::mock_setup_with_channel().await.unwrap(); let mut rng = AesRng::seed_from_u64(seed); let iris_db = IrisDB::new_random_rng(2, &mut rng).db; From e04710cc4e3398aa3bc92adfa37d3e027e01f1f3 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 23 Nov 2024 10:55:30 -0800 Subject: [PATCH 017/170] fix batch matches (#708) * fix batch matches * fix * fix --- iris-mpc-gpu/src/dot/distance_comparator.rs | 10 +++++++++- iris-mpc-gpu/src/dot/kernel.cu | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/iris-mpc-gpu/src/dot/distance_comparator.rs b/iris-mpc-gpu/src/dot/distance_comparator.rs index c70fd4a8e..6c459c717 100644 --- a/iris-mpc-gpu/src/dot/distance_comparator.rs +++ b/iris-mpc-gpu/src/dot/distance_comparator.rs @@ -289,6 +289,7 @@ impl DistanceComparator { ); } + let batch_match_idx: u32 = u32::MAX - (self.query_length / ROTATIONS) as u32; // batch matches have an index of u32::MAX - index let mut matches_per_query = vec![vec![]; match_counters[0].len()]; let n_devices = self.device_manager.device_count(); for i in 0..self.device_manager.device_count() { @@ -297,7 +298,14 @@ impl DistanceComparator { let len = match_counters[i][j] as usize; let ids = results[i][offset..offset + min(len, ALL_MATCHES_LEN)] .iter() - .map(|idx| idx * n_devices as u32 + i as u32) + .map(|&idx| { + if idx > batch_match_idx { + idx + } else { + idx * n_devices as u32 + i as u32 + } + }) + .filter(|&idx| idx < batch_match_idx || i == 0) // take all normal matches, but only batch matches from device 0 .collect::>(); matches_per_query[j].extend_from_slice(&ids); offset += ALL_MATCHES_LEN; diff --git a/iris-mpc-gpu/src/dot/kernel.cu b/iris-mpc-gpu/src/dot/kernel.cu index ff9c13633..b523b02be 100644 --- a/iris-mpc-gpu/src/dot/kernel.cu +++ b/iris-mpc-gpu/src/dot/kernel.cu @@ -108,7 +108,7 @@ extern "C" __global__ void mergeBatchResults(unsigned long long *matchResultsSel continue; // Query is already considering rotations, ignore rotated db entries - if ((dbIdx - ROTATIONS) % ALL_ROTATIONS != 0) + if ((dbIdx < ROTATIONS) || ((dbIdx - ROTATIONS) % ALL_ROTATIONS != 0)) continue; // Only consider results above the diagonal From 263b6dc1126b69501277f6bd99029d6263756a6f Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 23 Nov 2024 11:13:46 -0800 Subject: [PATCH 018/170] release 0.10.1 to prod (#709) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index aac347afc..951c803cc 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.0" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.1" environment: prod replicaCount: 1 From 42524060e69c4741e329195c1a530875fb71e4cb Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 23 Nov 2024 12:30:18 -0800 Subject: [PATCH 019/170] smaller chunk size (#710) --- iris-mpc-gpu/src/server/actor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 6452f28e7..f332372e6 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -66,7 +66,7 @@ impl ServerActorHandle { } } -const DB_CHUNK_SIZE: usize = 1 << 15; +const DB_CHUNK_SIZE: usize = 1 << 14; const KDF_SALT: &str = "111a1a93518f670e9bb0c2c68888e2beb9406d4c4ed571dc77b801e676ae3091"; // Random 32 byte salt pub struct ServerActor { From 47165a7dbfdd27073fb0a16e06d13428e6ce0ab7 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 23 Nov 2024 14:13:13 -0800 Subject: [PATCH 020/170] release 0.10.2 to prod (#711) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 951c803cc..fb2208f95 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.1" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod replicaCount: 1 From a877a985bd543043f0c4935e9e6c665bd3234c3f Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Sun, 24 Nov 2024 23:37:46 -0800 Subject: [PATCH 021/170] deploy upgrade servers new image (#713) --- deploy/prod/common-values-upgrade-server-left.yaml | 2 +- deploy/prod/common-values-upgrade-server-right.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-upgrade-server-left.yaml b/deploy/prod/common-values-upgrade-server-left.yaml index a15c07562..c15cb3eca 100644 --- a/deploy/prod/common-values-upgrade-server-left.yaml +++ b/deploy/prod/common-values-upgrade-server-left.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.8.25" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod replicaCount: 1 diff --git a/deploy/prod/common-values-upgrade-server-right.yaml b/deploy/prod/common-values-upgrade-server-right.yaml index 5f8f28507..d56879676 100644 --- a/deploy/prod/common-values-upgrade-server-right.yaml +++ b/deploy/prod/common-values-upgrade-server-right.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.8.25" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod replicaCount: 1 From ca4fe21751a956c80aa60da9b30a835835464f21 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Sun, 24 Nov 2024 23:38:19 -0800 Subject: [PATCH 022/170] scale pods to 0 (#712) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 0501d8f6c..3612aeb88 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.0" environment: stage -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From 1441dbf8224b923812e9fb9510df886b03e12238 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 25 Nov 2024 01:06:07 -0800 Subject: [PATCH 023/170] scale pods (#714) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index fb2208f95..cfacdaafb 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 3612aeb88..0501d8f6c 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.0" environment: stage -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From e0b8a40bb0f7abe2f04ffd2f2aa9a5b436bf733a Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 25 Nov 2024 01:11:57 -0800 Subject: [PATCH 024/170] scale pods (#715) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index cfacdaafb..fb2208f95 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From 9e33af582b7f3db19c0064d39ffd3ba9cd7d6e28 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Mon, 25 Nov 2024 10:54:08 +0100 Subject: [PATCH 025/170] chore: Scale down (#716) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index fb2208f95..cfacdaafb 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From dec46b2e30b88ed342678d8aa07e0c37d862f15c Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:08:55 +0100 Subject: [PATCH 026/170] chore: Scale up prod (#717) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index cfacdaafb..fb2208f95 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From afea5a63831ec7ee9075327672287489dc1c174e Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 25 Nov 2024 03:17:46 -0800 Subject: [PATCH 027/170] fix typo nginx config (#718) --- deploy/stage/common-values-reshare-server.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 2c6c21613..f9045c440 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -133,4 +133,3 @@ nginxSidecar: } } } -} From 6090f1c369f84a531eea7fa7a2f40a1c9f937264 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Mon, 25 Nov 2024 06:25:41 -0800 Subject: [PATCH 028/170] separate live and health check (#719) * separate live and health check * adapt heartbeat values * cargo update --- Cargo.lock | 35 ++- deploy/prod/common-values-iris-mpc.yaml | 8 +- .../prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- .../prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- .../prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 11 +- iris-mpc-common/src/config/mod.rs | 2 +- iris-mpc/src/bin/server.rs | 210 ++++++++++++------ 8 files changed, 186 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34bc732cc..dbb040d3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,17 +181,38 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-stream" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22068c0c19514942eefcfd4daf8976ef1aad84e61539f95cd200c35202f80af5" +dependencies = [ + "async-stream-impl 0.2.1", + "futures-core", +] + [[package]] name = "async-stream" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ - "async-stream-impl", + "async-stream-impl 0.3.6", "futures-core", "pin-project-lite", ] +[[package]] +name = "async-stream-impl" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25f9db3b38af870bf7e5cc649167533b493928e50744e2c30ae350230b414670" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "async-stream-impl" version = "0.3.6" @@ -868,7 +889,7 @@ dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.10.5", "lazy_static", "lazycell", "log", @@ -2418,7 +2439,7 @@ dependencies = [ "hyper 1.5.0", "hyper-util", "log", - "rustls 0.23.16", + "rustls 0.23.18", "rustls-native-certs 0.8.0", "rustls-pki-types", "tokio", @@ -4426,9 +4447,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.16" +version = "0.23.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" +checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f" dependencies = [ "aws-lc-rs", "log", @@ -5424,7 +5445,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.16", + "rustls 0.23.18", "rustls-pki-types", "tokio", ] @@ -5494,7 +5515,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ - "async-stream", + "async-stream 0.3.6", "async-trait", "axum", "base64 0.22.1", diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index fb2208f95..bed0d3965 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -18,17 +18,13 @@ ports: # protocol: TCP livenessProbe: - initialDelaySeconds: 300 httpGet: path: /health port: health readinessProbe: - initialDelaySeconds: 300 - periodSeconds: 30 - failureThreshold: 10 httpGet: - path: /health + path: /ready port: health startupProbe: @@ -36,7 +32,7 @@ startupProbe: failureThreshold: 40 periodSeconds: 30 httpGet: - path: /health + path: /ready port: health resources: diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 963bd3944..900b2eaaa 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -63,7 +63,7 @@ env: value: "2" - name: SMPC__HEARTBEAT_INITIAL_RETRIES - value: "1000" + value: "30" - name: SMPC__PATH value: "/data/" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 6968b3c99..a4ba8f006 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -63,7 +63,7 @@ env: value: "2" - name: SMPC__HEARTBEAT_INITIAL_RETRIES - value: "1000" + value: "30" - name: SMPC__PATH value: "/data/" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 1620d41d1..28487ae7d 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -63,7 +63,7 @@ env: value: "2" - name: SMPC__HEARTBEAT_INITIAL_RETRIES - value: "1000" + value: "30" - name: SMPC__PATH value: "/data/" diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 0501d8f6c..ebb7876bc 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -15,23 +15,22 @@ ports: protocol: TCP livenessProbe: - initialDelaySeconds: 300 httpGet: path: /health port: health readinessProbe: - initialDelaySeconds: 300 periodSeconds: 30 - failureThreshold: 10 httpGet: - path: /health + path: /ready port: health startupProbe: - initialDelaySeconds: 300 + initialDelaySeconds: 60 + failureThreshold: 40 + periodSeconds: 30 httpGet: - path: /health + path: /ready port: health resources: diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index d98a3ec1f..f2825c976 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -92,7 +92,7 @@ fn default_max_batch_size() -> usize { } fn default_heartbeat_interval_secs() -> u64 { - 30 + 2 } fn default_heartbeat_initial_retries() -> u64 { diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 774669f5a..a847542cf 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -2,7 +2,7 @@ use aws_sdk_sns::{types::MessageAttributeValue, Client as SNSClient}; use aws_sdk_sqs::{config::Region, Client}; -use axum::{routing::get, Router}; +use axum::{response::IntoResponse, routing::get, Router}; use clap::Parser; use eyre::{eyre, Context}; use futures::TryStreamExt; @@ -36,11 +36,15 @@ use iris_mpc_gpu::{ }; use iris_mpc_store::{Store, StoredIrisRef}; use metrics_exporter_statsd::StatsdBuilder; +use reqwest::StatusCode; use std::{ backtrace::Backtrace, collections::HashMap, mem, panic, - sync::{Arc, LazyLock, Mutex}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, LazyLock, Mutex, + }, time::{Duration, Instant}, }; use telemetry_batteries::tracing::{datadog::DatadogBattery, TracingShutdownHandle}; @@ -680,14 +684,116 @@ async fn server_main(config: Config) -> eyre::Result<()> { eyre::bail!("Database size exceeds maximum allowed size: {}", store_len); } + tracing::info!("Preparing task monitor"); + let mut background_tasks = TaskMonitor::new(); + + // -------------------------------------------------------------------------- + // ANCHOR: Starting Healthcheck and Readiness server + // -------------------------------------------------------------------------- + tracing::info!("⚓️ ANCHOR: Starting Healthcheck and Readiness server"); + + let is_ready_flag = Arc::new(AtomicBool::new(false)); + let is_ready_flag_cloned = Arc::clone(&is_ready_flag); + + let _health_check_abort = background_tasks.spawn({ + let uuid = uuid::Uuid::new_v4().to_string(); + async move { + // Generate a random UUID for each run. + let app = Router::new() + .route("/health", get(move || async move { uuid.to_string() })) + .route( + "/ready", + get({ + // We are only ready once this flag is set to true. + let is_ready_flag = Arc::clone(&is_ready_flag); + move || async move { + if is_ready_flag.load(Ordering::SeqCst) { + "ready".into_response() + } else { + StatusCode::SERVICE_UNAVAILABLE.into_response() + } + } + }), + ); + let listener = tokio::net::TcpListener::bind("0.0.0.0:3000") + .await + .wrap_err("healthcheck listener bind error")?; + axum::serve(listener, app) + .await + .wrap_err("healthcheck listener server launch error")?; + + Ok::<(), eyre::Error>(()) + } + }); + + background_tasks.check_tasks(); + tracing::info!("Healthcheck and Readiness server running on port 3000."); + + let (heartbeat_tx, heartbeat_rx) = oneshot::channel(); + let mut heartbeat_tx = Some(heartbeat_tx); + let all_nodes = config.node_hostnames.clone(); + let _heartbeat = background_tasks.spawn(async move { + let next_node = &all_nodes[(config.party_id + 1) % 3]; + let prev_node = &all_nodes[(config.party_id + 2) % 3]; + let mut last_response = [String::default(), String::default()]; + let mut connected = [false, false]; + let mut retries = [0, 0]; + + loop { + for (i, host) in [next_node, prev_node].iter().enumerate() { + let res = reqwest::get(format!("http://{}:3000/health", host)).await; + if res.is_err() || !res.as_ref().unwrap().status().is_success() { + // If it's the first time after startup, we allow a few retries to let the other + // nodes start up as well. + if last_response[i] == String::default() + && retries[i] < config.heartbeat_initial_retries + { + retries[i] += 1; + tracing::warn!("Node {} did not respond with success, retrying...", host); + continue; + } + // The other node seems to be down or returned an error. + panic!( + "Node {} did not respond with success, killing server...", + host + ); + } + + let uuid = res.unwrap().text().await?; + if last_response[i] == String::default() { + last_response[i] = uuid; + connected[i] = true; + + // If all nodes are connected, notify the main thread. + if connected.iter().all(|&c| c) { + if let Some(tx) = heartbeat_tx.take() { + tx.send(()).unwrap(); + } + } + } else if uuid != last_response[i] { + // If the UUID response is different, the node has restarted without us + // noticing. Our main NCCL connections cannot recover from + // this, so we panic. + panic!("Node {} seems to have restarted, killing server...", host); + } else { + tracing::info!("Heartbeat: Node {} is healthy", host); + } + } + + tokio::time::sleep(Duration::from_secs(config.heartbeat_interval_secs)).await; + } + }); + + tracing::info!("Heartbeat starting..."); + heartbeat_rx.await?; + tracing::info!("Heartbeat on all nodes started."); + background_tasks.check_tasks(); + let my_state = SyncState { db_len: store_len as u64, deleted_request_ids: store.last_deleted_requests(max_sync_lookback).await?, }; - tracing::info!("Preparing task monitor"); - let mut background_tasks = TaskMonitor::new(); - // Start the actor in separate task. // A bit convoluted, but we need to create the actor on the thread already, // since it blocks a lot and is `!Send`, we get back the handle via the oneshot @@ -703,10 +809,17 @@ async fn server_main(config: Config) -> eyre::Result<()> { let device_manager = Arc::new(DeviceManager::init()); let ids = device_manager.get_ids_from_magic(0); - tracing::info!("Starting NCCL"); + // -------------------------------------------------------------------------- + // ANCHOR: Starting NCCL + // -------------------------------------------------------------------------- + tracing::info!("⚓️ ANCHOR: Starting NCCL"); let comms = device_manager.instantiate_network_from_ids(config.party_id, &ids)?; + // FYI: If any of the nodes die after this, all connections are broken. - tracing::info!("NCCL: getting sync results"); + // -------------------------------------------------------------------------- + // ANCHOR: Syncing latest node state + // -------------------------------------------------------------------------- + tracing::info!("⚓️ ANCHOR: Syncing latest node state"); let sync_result = match sync_nccl::sync(&comms[0], &my_state) { Ok(res) => res, Err(e) => { @@ -734,6 +847,11 @@ async fn server_main(config: Config) -> eyre::Result<()> { metrics::counter!("db.sync.rollback").increment(1); } + // -------------------------------------------------------------------------- + // ANCHOR: Load the database + // -------------------------------------------------------------------------- + tracing::info!("⚓️ ANCHOR: Load the database"); + tracing::info!("Starting server actor"); match ServerActor::new_with_device_manager_and_comms( config.party_id, @@ -988,89 +1106,55 @@ async fn server_main(config: Config) -> eyre::Result<()> { }); background_tasks.check_tasks(); - tracing::info!("All systems ready."); - tracing::info!("Starting healthcheck server."); - - let _health_check_abort = background_tasks.spawn(async move { - // Generate a random UUID for each run. - let uuid = uuid::Uuid::new_v4().to_string(); - let app = Router::new().route("/health", get(|| async { uuid })); // implicit 200 return - let listener = tokio::net::TcpListener::bind("0.0.0.0:3000") - .await - .wrap_err("healthcheck listener bind error")?; - axum::serve(listener, app) - .await - .wrap_err("healthcheck listener server launch error")?; - - Ok(()) - }); + // -------------------------------------------------------------------------- + // ANCHOR: Enable readiness and check all nodes + // -------------------------------------------------------------------------- + tracing::info!("⚓️ ANCHOR: Enable readiness and check all nodes"); - background_tasks.check_tasks(); - tracing::info!("Healthcheck server running on port 3000."); + // Set the readiness flag to true, which will make the readiness server return a + // 200 status code. + is_ready_flag_cloned.store(true, std::sync::atomic::Ordering::SeqCst); - let (heartbeat_tx, heartbeat_rx) = oneshot::channel(); - let mut heartbeat_tx = Some(heartbeat_tx); + // Check other nodes and wait until all nodes are ready. + let (readiness_tx, readiness_rx) = oneshot::channel(); + let mut readiness_tx = Some(readiness_tx); let all_nodes = config.node_hostnames.clone(); let _heartbeat = background_tasks.spawn(async move { let next_node = &all_nodes[(config.party_id + 1) % 3]; let prev_node = &all_nodes[(config.party_id + 2) % 3]; - let mut last_response = [String::default(), String::default()]; let mut connected = [false, false]; - let mut retries = [0, 0]; loop { for (i, host) in [next_node, prev_node].iter().enumerate() { - let res = reqwest::get(format!("http://{}:3000/health", host)).await; - if res.is_err() || !res.as_ref().unwrap().status().is_success() { - // If it's the first time after startup, we allow a few retries to let the other - // nodes start up as well. - if last_response[i] == String::default() - && retries[i] < config.heartbeat_initial_retries - { - retries[i] += 1; - tracing::warn!("Node {} did not respond with success, retrying...", host); - continue; - } - // The other node seems to be down or returned an error. - panic!( - "Node {} did not respond with success, killing server...", - host - ); - } + let res = reqwest::get(format!("http://{}:3000/ready", host)).await; - let uuid = res.unwrap().text().await?; - if last_response[i] == String::default() { - last_response[i] = uuid; + if res.is_ok() && res.as_ref().unwrap().status().is_success() { connected[i] = true; - // If all nodes are connected, notify the main thread. if connected.iter().all(|&c| c) { - if let Some(tx) = heartbeat_tx.take() { + if let Some(tx) = readiness_tx.take() { tx.send(()).unwrap(); } } - } else if uuid != last_response[i] { - // If the UUID response is different, the node has restarted without us - // noticing. Our main NCCL connections cannot recover from - // this, so we panic. - panic!("Node {} seems to have restarted, killing server...", host); - } else { - tracing::info!("Heartbeat: Node {} is healthy", host); } } - tokio::time::sleep(Duration::from_secs(config.heartbeat_interval_secs)).await; + tokio::time::sleep(Duration::from_secs(1)).await; } }); - tracing::info!("Heartbeat starting..."); - heartbeat_rx.await?; - tracing::info!("Heartbeat on all nodes started."); + tracing::info!("Waiting for all nodes to be ready..."); + readiness_rx.await?; + tracing::info!("All nodes are ready."); background_tasks.check_tasks(); + // -------------------------------------------------------------------------- + // ANCHOR: Start the main loop + // -------------------------------------------------------------------------- + tracing::info!("⚓️ ANCHOR: Start the main loop"); + let processing_timeout = Duration::from_secs(config.processing_timeout_secs); - // Main loop let res: eyre::Result<()> = async { tracing::info!("Entering main loop"); // **Tensor format of queries** From fba7074dc626d61cf710b8007bd85bbb6f78cff3 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 25 Nov 2024 08:14:52 -0800 Subject: [PATCH 029/170] bump to latest (#720) * bump to latest * fix ports --- deploy/stage/common-values-iris-mpc.yaml | 2 +- deploy/stage/common-values-reshare-server.yaml | 16 ++++++++-------- .../stage/common-values-upgrade-server-left.yaml | 2 +- .../common-values-upgrade-server-right.yaml | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index ebb7876bc..6f99973e8 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.0" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: stage replicaCount: 1 diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index f9045c440..fac17d78e 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.8.25" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: stage replicaCount: 1 @@ -11,7 +11,7 @@ datadog: # Nginx exposes the only port required here ports: - - containerPort: 3000 + - containerPort: 3001 name: health protocol: TCP @@ -34,11 +34,11 @@ readinessProbe: resources: limits: - cpu: 1 - memory: 1Gi + cpu: 4 + memory: 16Gi requests: - cpu: 1 - memory: 1Gi + cpu: 4 + memory: 16Gi imagePullSecrets: - name: github-secret @@ -72,7 +72,7 @@ service: nginxSidecar: enabled: true - port: 8443 + port: 6443 secrets: enabled: true volumeMount: @@ -106,7 +106,7 @@ nginxSidecar: access_log /dev/stdout basic; server { - listen 8443 ssl; + listen 6443 ssl; http2 on; ssl_certificate /etc/nginx/cert/certificate.crt; diff --git a/deploy/stage/common-values-upgrade-server-left.yaml b/deploy/stage/common-values-upgrade-server-left.yaml index e5f577ac5..46ea80cda 100644 --- a/deploy/stage/common-values-upgrade-server-left.yaml +++ b/deploy/stage/common-values-upgrade-server-left.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.8.25" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: stage replicaCount: 1 diff --git a/deploy/stage/common-values-upgrade-server-right.yaml b/deploy/stage/common-values-upgrade-server-right.yaml index e5f577ac5..46ea80cda 100644 --- a/deploy/stage/common-values-upgrade-server-right.yaml +++ b/deploy/stage/common-values-upgrade-server-right.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.8.25" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" environment: stage replicaCount: 1 From aa712dffec2feaff2a86e438f2224a09ef37677b Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 25 Nov 2024 08:28:01 -0800 Subject: [PATCH 030/170] fix entrypoint (#721) --- deploy/stage/smpcv2-1-stage/values-reshare-server.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml index c320df97f..f4b72c137 100644 --- a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml +++ b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml @@ -7,6 +7,14 @@ args: - "1" - "--environment" - "$(ENVIRONMENT)" + - "--sender1-party-id" + - "0" + - "--sender2-party-id" + - "2" + - "--batch-size" + - "100" + - "--max-buffer-size" + - "10" initContainer: enabled: true From 925b179072a4f1a58bc676311b18e7cf1a760886 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 25 Nov 2024 09:02:29 -0800 Subject: [PATCH 031/170] use port 7000 (#722) --- deploy/stage/common-values-reshare-server.yaml | 4 ++-- deploy/stage/smpcv2-1-stage/values-reshare-server.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index fac17d78e..3557dd21d 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -120,8 +120,8 @@ nginxSidecar: ssl_session_timeout 1h; location / { - # Forward gRPC traffic to the gRPC server on port 8000 - grpc_pass grpc://127.0.0.1:8000; + # Forward gRPC traffic to the gRPC server on port 7000 + grpc_pass grpc://127.0.0.1:7000; error_page 502 = /error502grpc; # Custom error page for GRPC backend issues } diff --git a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml index f4b72c137..dadecabf2 100644 --- a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml +++ b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml @@ -1,6 +1,6 @@ args: - "--bind-addr" - - "0.0.0.0:8000" + - "0.0.0.0:7000" - "--db-url" - "$(SMPC__DATABASE__URL)" - "--party-id" From a0a27f85a4b05331075097a355a3e71b93fe47b8 Mon Sep 17 00:00:00 2001 From: Bryan Gillespie Date: Mon, 25 Nov 2024 10:16:30 -0700 Subject: [PATCH 032/170] Feat/Python HNSW Bindings (#672) Implement Python bindings for HNSW graph search functionality over plaintext iris codes using the PyO3 library and Maturin build layer. Provides basic Python bindings for iris code, vector store, and graph store data structures, data serialization of iris codes using base64 encoding compatible with Open IRIS, and serialization of vector and graph stores to and from file. Serialization of the vector store is implemented using the NDJSON ("Newline Delimited JSON") file format, which allows the use of a single large database file of iris code test data from which entries can be streamed rather than read into memory as a single block. Build and deployment instructions for the new Python bindings can be found in the `README.md` file of the new `iris-mpc-py` crate. Usage details are also found in `README.md`, and an example Python script exercising the functionality is available in `examples-py/test_integration.py`. --------- Co-authored-by: Bryan Gillespie --- Cargo.lock | 97 +++++++++++++- Cargo.toml | 4 + iris-mpc-common/Cargo.toml | 4 +- iris-mpc-common/src/iris_db/iris.rs | 8 +- iris-mpc-cpu/Cargo.toml | 6 +- iris-mpc-cpu/src/hawkers/plaintext_store.rs | 25 ++-- iris-mpc-cpu/src/lib.rs | 1 + iris-mpc-cpu/src/py_bindings/hnsw.rs | 126 ++++++++++++++++++ iris-mpc-cpu/src/py_bindings/io.rs | 36 +++++ iris-mpc-cpu/src/py_bindings/mod.rs | 13 ++ .../src/py_bindings/plaintext_store.rs | 79 +++++++++++ iris-mpc-py/.gitignore | 72 ++++++++++ iris-mpc-py/Cargo.toml | 19 +++ iris-mpc-py/README.md | 93 +++++++++++++ iris-mpc-py/examples-py/test_integration.py | 37 +++++ iris-mpc-py/pyproject.toml | 16 +++ iris-mpc-py/src/lib.rs | 1 + iris-mpc-py/src/py_hnsw/mod.rs | 2 + .../src/py_hnsw/pyclasses/graph_store.rs | 27 ++++ .../src/py_hnsw/pyclasses/hawk_searcher.rs | 98 ++++++++++++++ .../src/py_hnsw/pyclasses/iris_code.rs | 73 ++++++++++ .../src/py_hnsw/pyclasses/iris_code_array.rs | 46 +++++++ iris-mpc-py/src/py_hnsw/pyclasses/mod.rs | 5 + .../src/py_hnsw/pyclasses/plaintext_store.rs | 52 ++++++++ iris-mpc-py/src/py_hnsw/pymodule.rs | 15 +++ 25 files changed, 935 insertions(+), 20 deletions(-) create mode 100644 iris-mpc-cpu/src/py_bindings/hnsw.rs create mode 100644 iris-mpc-cpu/src/py_bindings/io.rs create mode 100644 iris-mpc-cpu/src/py_bindings/mod.rs create mode 100644 iris-mpc-cpu/src/py_bindings/plaintext_store.rs create mode 100644 iris-mpc-py/.gitignore create mode 100644 iris-mpc-py/Cargo.toml create mode 100644 iris-mpc-py/README.md create mode 100644 iris-mpc-py/examples-py/test_integration.py create mode 100644 iris-mpc-py/pyproject.toml create mode 100644 iris-mpc-py/src/lib.rs create mode 100644 iris-mpc-py/src/py_hnsw/mod.rs create mode 100644 iris-mpc-py/src/py_hnsw/pyclasses/graph_store.rs create mode 100644 iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs create mode 100644 iris-mpc-py/src/py_hnsw/pyclasses/iris_code.rs create mode 100644 iris-mpc-py/src/py_hnsw/pyclasses/iris_code_array.rs create mode 100644 iris-mpc-py/src/py_hnsw/pyclasses/mod.rs create mode 100644 iris-mpc-py/src/py_hnsw/pyclasses/plaintext_store.rs create mode 100644 iris-mpc-py/src/py_hnsw/pymodule.rs diff --git a/Cargo.lock b/Cargo.lock index dbb040d3f..3e2f11542 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2196,7 +2196,7 @@ dependencies = [ [[package]] name = "hawk-pack" version = "0.1.0" -source = "git+https://github.com/Inversed-Tech/hawk-pack.git?rev=4e6de24#4e6de24f7422923f8cccd8571ef03407e8dbbb99" +source = "git+https://github.com/Inversed-Tech/hawk-pack.git?rev=29e888ed#29e888edfe19cd69e5925fa676ca07d1f64214da" dependencies = [ "aes-prng 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion", @@ -2787,6 +2787,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "iris-mpc-py" +version = "0.1.0" +dependencies = [ + "hawk-pack", + "iris-mpc-common", + "iris-mpc-cpu", + "pyo3", + "rand", +] + [[package]] name = "iris-mpc-store" version = "0.1.0" @@ -3109,6 +3120,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "metrics" version = "0.22.3" @@ -3987,6 +4007,69 @@ dependencies = [ "prost", ] +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.85", +] + [[package]] name = "quanta" version = "0.12.3" @@ -5243,6 +5326,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "telemetry-batteries" version = "0.1.0" @@ -5859,6 +5948,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 7416fa873..843cb4908 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "iris-mpc-common", "iris-mpc-upgrade", "iris-mpc-store", + "iris-mpc-py", ] resolver = "2" @@ -28,11 +29,14 @@ bytemuck = { version = "1.17", features = ["derive"] } dotenvy = "0.15" eyre = "0.6" futures = "0.3.30" +hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "29e888ed" } hex = "0.4.3" itertools = "0.13" num-traits = "0.2" serde = { version = "1.0", features = ["derive"] } +serde-big-array = "0.5.1" serde_json = "1" +bincode = "1.3.3" sqlx = { version = "0.8", features = ["runtime-tokio-native-tls", "postgres"] } tracing = "0.1.40" tracing-subscriber = { version = "0.3.15", features = ["env-filter"] } diff --git a/iris-mpc-common/Cargo.toml b/iris-mpc-common/Cargo.toml index a658dba54..d9a287689 100644 --- a/iris-mpc-common/Cargo.toml +++ b/iris-mpc-common/Cargo.toml @@ -45,8 +45,8 @@ wiremock = "0.6.1" digest = "0.10.7" ring = "0.17.8" data-encoding = "2.6.0" -bincode = "1.3.3" -serde-big-array = "0.5.1" +bincode.workspace = true +serde-big-array.workspace = true [dev-dependencies] float_eq = "1" diff --git a/iris-mpc-common/src/iris_db/iris.rs b/iris-mpc-common/src/iris_db/iris.rs index b8acc9e88..1176d5a0b 100644 --- a/iris-mpc-common/src/iris_db/iris.rs +++ b/iris-mpc-common/src/iris_db/iris.rs @@ -4,12 +4,14 @@ use rand::{ distributions::{Bernoulli, Distribution}, Rng, }; +use serde::{Deserialize, Serialize}; +use serde_big_array::BigArray; pub const MATCH_THRESHOLD_RATIO: f64 = 0.375; #[repr(transparent)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct IrisCodeArray(pub [u64; Self::IRIS_CODE_SIZE_U64]); +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct IrisCodeArray(#[serde(with = "BigArray")] pub [u64; Self::IRIS_CODE_SIZE_U64]); impl Default for IrisCodeArray { fn default() -> Self { Self::ZERO @@ -141,7 +143,7 @@ impl std::ops::BitXor for IrisCodeArray { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct IrisCode { pub code: IrisCodeArray, pub mask: IrisCodeArray, diff --git a/iris-mpc-cpu/Cargo.toml b/iris-mpc-cpu/Cargo.toml index 99bfe2a4b..0b67d1a13 100644 --- a/iris-mpc-cpu/Cargo.toml +++ b/iris-mpc-cpu/Cargo.toml @@ -13,13 +13,13 @@ async-channel = "2.3.1" async-stream = "0.2" async-trait = "~0.1" backoff = {version="0.4.0", features = ["tokio"]} -bincode = "1.3.3" +bincode.workspace = true bytes = "1.7" bytemuck.workspace = true dashmap = "6.1.0" eyre.workspace = true futures.workspace = true -hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "4e6de24" } +hawk-pack.workspace = true iris-mpc-common = { path = "../iris-mpc-common" } itertools.workspace = true num-traits.workspace = true @@ -47,4 +47,4 @@ name = "hnsw" harness = false [[example]] -name = "hnsw-ex" \ No newline at end of file +name = "hnsw-ex" diff --git a/iris-mpc-cpu/src/hawkers/plaintext_store.rs b/iris-mpc-cpu/src/hawkers/plaintext_store.rs index 6c69e6355..2d0ebd062 100644 --- a/iris-mpc-cpu/src/hawkers/plaintext_store.rs +++ b/iris-mpc-cpu/src/hawkers/plaintext_store.rs @@ -5,14 +5,10 @@ use iris_mpc_common::iris_db::{ iris::{IrisCode, MATCH_THRESHOLD_RATIO}, }; use rand::{CryptoRng, RngCore, SeedableRng}; +use serde::{Deserialize, Serialize}; use std::ops::{Index, IndexMut}; -#[derive(Default, Debug, Clone)] -pub struct PlaintextStore { - pub points: Vec, -} - -#[derive(Default, Debug, Clone)] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct PlaintextIris(pub IrisCode); impl PlaintextIris { @@ -47,17 +43,19 @@ impl PlaintextIris { } } -#[derive(Clone, Default, Debug)] +// TODO refactor away is_persistent flag; should probably be stored in a +// separate buffer instead whenever working with non-persistent iris codes +#[derive(Clone, Default, Debug, Serialize, Deserialize)] pub struct PlaintextPoint { /// Whatever encoding of a vector. - data: PlaintextIris, + pub data: PlaintextIris, /// Distinguish between queries that are pending, and those that were /// ultimately accepted into the vector store. - is_persistent: bool, + pub is_persistent: bool, } -#[derive(Copy, Debug, Clone, PartialEq, Eq, Hash, serde::Deserialize, serde::Serialize)] -pub struct PointId(u32); +#[derive(Copy, Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct PointId(pub u32); impl Index for Vec { type Output = T; @@ -85,6 +83,11 @@ impl From for PointId { } } +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct PlaintextStore { + pub points: Vec, +} + impl PlaintextStore { pub fn prepare_query(&mut self, raw_query: IrisCode) -> ::QueryRef { self.points.push(PlaintextPoint { diff --git a/iris-mpc-cpu/src/lib.rs b/iris-mpc-cpu/src/lib.rs index 1a74801f0..bf4a96011 100644 --- a/iris-mpc-cpu/src/lib.rs +++ b/iris-mpc-cpu/src/lib.rs @@ -5,4 +5,5 @@ pub(crate) mod network; #[rustfmt::skip] pub(crate) mod proto_generated; pub mod protocol; +pub mod py_bindings; pub(crate) mod shares; diff --git a/iris-mpc-cpu/src/py_bindings/hnsw.rs b/iris-mpc-cpu/src/py_bindings/hnsw.rs new file mode 100644 index 000000000..e57c85ff0 --- /dev/null +++ b/iris-mpc-cpu/src/py_bindings/hnsw.rs @@ -0,0 +1,126 @@ +use super::plaintext_store::Base64IrisCode; +use crate::hawkers::plaintext_store::{PlaintextStore, PointId}; +use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher, VectorStore}; +use iris_mpc_common::iris_db::iris::IrisCode; +use rand::rngs::ThreadRng; +use serde_json::{self, Deserializer}; +use std::{fs::File, io::BufReader}; + +pub fn search( + query: IrisCode, + searcher: &HawkSearcher, + vector: &mut PlaintextStore, + graph: &mut GraphMem, +) -> (PointId, f64) { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async move { + let query = vector.prepare_query(query); + let neighbors = searcher.search_to_insert(vector, graph, &query).await; + let (nearest, (dist_num, dist_denom)) = neighbors[0].get_nearest().unwrap(); + (*nearest, (*dist_num as f64) / (*dist_denom as f64)) + }) +} + +// TODO could instead take iterator of IrisCodes to make more flexible +pub fn insert( + iris: IrisCode, + searcher: &HawkSearcher, + vector: &mut PlaintextStore, + graph: &mut GraphMem, +) -> PointId { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async move { + let mut rng = ThreadRng::default(); + + let query = vector.prepare_query(iris); + let neighbors = searcher.search_to_insert(vector, graph, &query).await; + let inserted = vector.insert(&query).await; + searcher + .insert_from_search_results(vector, graph, &mut rng, inserted, neighbors) + .await; + inserted + }) +} + +pub fn insert_uniform_random( + searcher: &HawkSearcher, + vector: &mut PlaintextStore, + graph: &mut GraphMem, +) -> PointId { + let mut rng = ThreadRng::default(); + let raw_query = IrisCode::random_rng(&mut rng); + + insert(raw_query, searcher, vector, graph) +} + +pub fn fill_uniform_random( + num: usize, + searcher: &HawkSearcher, + vector: &mut PlaintextStore, + graph: &mut GraphMem, +) { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async move { + let mut rng = ThreadRng::default(); + + for idx in 0..num { + let raw_query = IrisCode::random_rng(&mut rng); + let query = vector.prepare_query(raw_query.clone()); + let neighbors = searcher.search_to_insert(vector, graph, &query).await; + let inserted = vector.insert(&query).await; + searcher + .insert_from_search_results(vector, graph, &mut rng, inserted, neighbors) + .await; + if idx % 100 == 99 { + println!("{}", idx + 1); + } + } + }) +} + +pub fn fill_from_ndjson_file( + filename: &str, + limit: Option, + searcher: &HawkSearcher, + vector: &mut PlaintextStore, + graph: &mut GraphMem, +) { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async move { + let mut rng = ThreadRng::default(); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + + // Create an iterator over deserialized objects + let stream = Deserializer::from_reader(reader).into_iter::(); + let stream = super::limited_iterator(stream, limit); + + // Iterate over each deserialized object + for json_pt in stream { + let raw_query = (&json_pt.unwrap()).into(); + let query = vector.prepare_query(raw_query); + let neighbors = searcher.search_to_insert(vector, graph, &query).await; + let inserted = vector.insert(&query).await; + searcher + .insert_from_search_results(vector, graph, &mut rng, inserted, neighbors) + .await; + } + }) +} diff --git a/iris-mpc-cpu/src/py_bindings/io.rs b/iris-mpc-cpu/src/py_bindings/io.rs new file mode 100644 index 000000000..77f2c5b6f --- /dev/null +++ b/iris-mpc-cpu/src/py_bindings/io.rs @@ -0,0 +1,36 @@ +use bincode; +use eyre::Result; +use serde::{de::DeserializeOwned, Serialize}; +use serde_json; +use std::{ + fs::File, + io::{BufReader, BufWriter}, +}; + +pub fn write_bin(data: &T, filename: &str) -> Result<()> { + let file = File::create(filename)?; + let writer = BufWriter::new(file); + bincode::serialize_into(writer, data)?; + Ok(()) +} + +pub fn read_bin(filename: &str) -> Result { + let file = File::open(filename)?; + let reader = BufReader::new(file); + let data: T = bincode::deserialize_from(reader)?; + Ok(data) +} + +pub fn write_json(data: &T, filename: &str) -> Result<()> { + let file = File::create(filename)?; + let writer = BufWriter::new(file); + serde_json::to_writer(writer, &data)?; + Ok(()) +} + +pub fn read_json(filename: &str) -> Result { + let file = File::open(filename)?; + let reader = BufReader::new(file); + let data: T = serde_json::from_reader(reader)?; + Ok(data) +} diff --git a/iris-mpc-cpu/src/py_bindings/mod.rs b/iris-mpc-cpu/src/py_bindings/mod.rs new file mode 100644 index 000000000..b655e05f2 --- /dev/null +++ b/iris-mpc-cpu/src/py_bindings/mod.rs @@ -0,0 +1,13 @@ +pub mod hnsw; +pub mod io; +pub mod plaintext_store; + +pub fn limited_iterator(iter: I, limit: Option) -> Box> +where + I: Iterator + 'static, +{ + match limit { + Some(num) => Box::new(iter.take(num)), + None => Box::new(iter), + } +} diff --git a/iris-mpc-cpu/src/py_bindings/plaintext_store.rs b/iris-mpc-cpu/src/py_bindings/plaintext_store.rs new file mode 100644 index 000000000..7340454e8 --- /dev/null +++ b/iris-mpc-cpu/src/py_bindings/plaintext_store.rs @@ -0,0 +1,79 @@ +use crate::hawkers::plaintext_store::{PlaintextIris, PlaintextPoint, PlaintextStore}; +use iris_mpc_common::iris_db::iris::{IrisCode, IrisCodeArray}; +use serde::{Deserialize, Serialize}; +use std::{ + fs::File, + io::{self, BufReader, BufWriter, Write}, +}; + +/// Iris code representation using base64 encoding compatible with Open IRIS +#[derive(Serialize, Deserialize)] +pub struct Base64IrisCode { + iris_codes: String, + mask_codes: String, +} + +impl From<&IrisCode> for Base64IrisCode { + fn from(value: &IrisCode) -> Self { + Self { + iris_codes: value.code.to_base64().unwrap(), + mask_codes: value.mask.to_base64().unwrap(), + } + } +} + +impl From<&Base64IrisCode> for IrisCode { + fn from(value: &Base64IrisCode) -> Self { + Self { + code: IrisCodeArray::from_base64(&value.iris_codes).unwrap(), + mask: IrisCodeArray::from_base64(&value.mask_codes).unwrap(), + } + } +} + +pub fn from_ndjson_file(filename: &str, len: Option) -> io::Result { + let file = File::open(filename)?; + let reader = BufReader::new(file); + + // Create an iterator over deserialized objects + let stream = serde_json::Deserializer::from_reader(reader).into_iter::(); + let stream = super::limited_iterator(stream, len); + + // Iterate over each deserialized object + let mut vector = PlaintextStore::default(); + for json_pt in stream { + let json_pt = json_pt?; + vector.points.push(PlaintextPoint { + data: PlaintextIris((&json_pt).into()), + is_persistent: true, + }); + } + + if let Some(num) = len { + if vector.points.len() != num { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "File {} contains too few entries; number read: {}", + filename, + vector.points.len() + ), + )); + } + } + + Ok(vector) +} + +pub fn to_ndjson_file(vector: &PlaintextStore, filename: &str) -> std::io::Result<()> { + // Serialize the objects to the file + let file = File::create(filename)?; + let mut writer = BufWriter::new(file); + for pt in &vector.points { + let json_pt: Base64IrisCode = (&pt.data.0).into(); + serde_json::to_writer(&mut writer, &json_pt)?; + writer.write_all(b"\n")?; // Write a newline after each JSON object + } + writer.flush()?; + Ok(()) +} diff --git a/iris-mpc-py/.gitignore b/iris-mpc-py/.gitignore new file mode 100644 index 000000000..c8f044299 --- /dev/null +++ b/iris-mpc-py/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/iris-mpc-py/Cargo.toml b/iris-mpc-py/Cargo.toml new file mode 100644 index 000000000..d3d325935 --- /dev/null +++ b/iris-mpc-py/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "iris-mpc-py" +version = "0.1.0" +publish = false + +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +name = "iris_mpc_py" +crate-type = ["cdylib"] + +[dependencies] +iris-mpc-common = { path = "../iris-mpc-common" } +iris-mpc-cpu = { path = "../iris-mpc-cpu" } +hawk-pack.workspace = true +pyo3 = { version = "0.22.0", features = ["extension-module"] } +rand.workspace = true diff --git a/iris-mpc-py/README.md b/iris-mpc-py/README.md new file mode 100644 index 000000000..e80736956 --- /dev/null +++ b/iris-mpc-py/README.md @@ -0,0 +1,93 @@ +# Python Bindings + +This package provides Python bindings for some functionalities in the `iris-mpc` workspace, currently focused on execution of the HNSW k-nearest neighbors graph search algorithm over plaintext iris codes for testing and data analysis. For compatibility, compilation of this crate is disabled from the workspace root, but enabled from within the crate subdirectory via the Cargo default feature flag `enable`. + +## Installation + +Installation of Python bindings from the PyO3 library code can be accomplished using the Maturin Python package as follows: + +- Install Maturin in the target Python environment, e.g. the venv used for data analysis, using `pip install maturin` + +- Optionally install `patchelf` library with `pip install patchelf` for support for patching wheel files that link other shared libraries + +- Build and install current bindings as a module in the current Python environment by navigating to the `iris-mpc-py` directory and running `maturin develop --release` + +- Build a wheel file suitable for installation using `pip install` by instead running `maturin build --release`; the `.whl` file is specific to the building architecture and Python version, and can be found in `iris_mpc/target/wheels` directory + +See the [Maturin User Guide Tutorial](https://www.maturin.rs/tutorial#build-and-install-the-module-with-maturin-develop) for additional details. + +## Usage + +Once successfully installed, the native rust module `iris_mpc_py` can be imported in your Python environment as usual with `import iris_mpc_py`. Example usage: + +```python +from iris_mpc_py import PyHawkSearcher, PyPlaintextStore, PyGraphStore, PyIrisCode + +hnsw = PyHawkSearcher.new_uniform(32, 32) # M, ef +vector = PyPlaintextStore() +graph = PyGraphStore() + +hnsw.fill_uniform_random(1000, vector, graph) + +iris = PyIrisCode.uniform_random() +iris_id = hnsw.insert(iris, vector, graph) +print("Inserted iris id:", iris_id) + +nearest_id, nearest_dist = hnsw.search(iris, vector, graph) +print("Nearest iris id:", nearest_id) # should be iris_id +print("Nearest iris distance:", nearest_dist) # should be 0.0 +``` + +To write the HNSW vector and graph indices to file and read them back: + +```python +hnsw.write_to_json("searcher.json") +vector.write_to_ndjson("vector.ndjson") +graph.write_to_bin("graph.dat") + +hnsw2 = PyHawkSearcher.read_from_json("searcher.json") +vector2 = PyPlaintextStore.read_from_ndjson("vector.ndjson") +graph2 = PyGraphStore.read_from_bin("graph.dat") +``` + +As an efficiency feature, the data from the vector store is read in a streamed fashion. This means that for a large database of iris codes, the first `num` can be read from file without loading the entire database into memory. This can be used in two ways; first, a vector store can be initialized from the large databse file for use with a previously generated HNSW index: + +```python +# Serialized HNSW graph constructed from the first 10k entries of database file +vector = PyPlaintextStore.read_from_ndjson("large_vector_database.ndjson", 10000) +graph = PyGraphStore.read_from_bin("graph.dat") +``` + +Second, to construct an HNSW index dynamically from streamed database entries: + +```python +hnsw = PyHawkSearcher.new_uniform(32, 32) +vector = PyPlaintextStore() +graph = PyGraphStore() +hnsw.fill_from_ndjson_file("large_vector_database.ndjson", vector, graph, 10000) +``` + +To generate a vector database directly for use in this way: + +```python +# Generate 100k uniform random iris codes +vector_init = PyPlaintextStore() +for i in range(1,100000): + vector_init.insert(PyIrisCode.uniform_random()) +vector_init.write_to_ndjson("vector.ndjson") +``` + +Basic interoperability with Open IRIS iris templates is provided by way of a common base64 encoding scheme, provided by the `iris.io.dataclasses.IrisTemplate` methods `serialize` and `deserialize`. These methods use a base64 encoding of iris code and mask code arrays represented as a Python `dict` with base64-encoded fields `iris_codes`, `mask_codes`, and a version string `iris_code_version` to check for compatibility. The `PyIrisCode` class interacts with this representation as follows: + +```python +serialized_iris_code = { + "iris_codes": "...", + "mask_codes": "...", + "iris_code_version": "1.0", +} + +iris = PyIrisCode.from_open_iris_template_dict(serialized_iris_code) +reserialized_iris_code = iris.to_open_iris_template_dict("1.0") +``` + +Note that the `to_open_iris_template_dict` method takes an optional argument which fills the `iris_code_version` field of the resulting Python `dict` since the `PyIrisCode` object does not preserve this data. diff --git a/iris-mpc-py/examples-py/test_integration.py b/iris-mpc-py/examples-py/test_integration.py new file mode 100644 index 000000000..945069af4 --- /dev/null +++ b/iris-mpc-py/examples-py/test_integration.py @@ -0,0 +1,37 @@ +from iris_mpc_py import PyIrisCode, PyPlaintextStore, PyGraphStore, PyHawkSearcher + +print("Generating 100k uniform random iris codes...") +vector_init = PyPlaintextStore() +iris0 = PyIrisCode.uniform_random() +iris_id = vector_init.insert(iris0) +for i in range(1,100000): + vector_init.insert(PyIrisCode.uniform_random()) + +# write vector store to file +print("Writing vector store to file...") +vector_init.write_to_ndjson("vector.ndjson") + +print("Generating HNSW graphs for 10k imported iris codes...") +hnsw = PyHawkSearcher.new_uniform(32, 32) +vector1 = PyPlaintextStore() +graph1 = PyGraphStore() +hnsw.fill_from_ndjson_file("vector.ndjson", vector1, graph1, 10000) + +print("Imported length:", vector1.len()) + +retrieved_iris = vector1.get(iris_id) +print("Retrieved iris0 base64 == original iris0 base64:", iris0.code.to_base64() == retrieved_iris.code.to_base64() and iris0.mask.to_base64() == retrieved_iris.mask.to_base64()) + +query = PyIrisCode.uniform_random() +print("Search for random query iris code:", hnsw.search(query, vector1, graph1)) + +# write graph store to file +print("Writing graph store to file...") +graph1.write_to_bin("graph1.dat") + +# read HNSW graphs from disk +print("Reading vector and graph stores from file...") +vector2 = PyPlaintextStore.read_from_ndjson("vector.ndjson", 10000) +graph2 = PyGraphStore.read_from_bin("graph1.dat") + +print("Search for random query iris code:", hnsw.search(query, vector2, graph2)) diff --git a/iris-mpc-py/pyproject.toml b/iris-mpc-py/pyproject.toml new file mode 100644 index 000000000..8b731d0c3 --- /dev/null +++ b/iris-mpc-py/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" + +[project] +name = "iris-mpc-py" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] +[tool.maturin] +features = ["pyo3/extension-module"] +module-name = "iris_mpc_py" \ No newline at end of file diff --git a/iris-mpc-py/src/lib.rs b/iris-mpc-py/src/lib.rs new file mode 100644 index 000000000..d8301516c --- /dev/null +++ b/iris-mpc-py/src/lib.rs @@ -0,0 +1 @@ +pub mod py_hnsw; diff --git a/iris-mpc-py/src/py_hnsw/mod.rs b/iris-mpc-py/src/py_hnsw/mod.rs new file mode 100644 index 000000000..d5fe0536c --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/mod.rs @@ -0,0 +1,2 @@ +pub mod pyclasses; +pub mod pymodule; diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/graph_store.rs b/iris-mpc-py/src/py_hnsw/pyclasses/graph_store.rs new file mode 100644 index 000000000..fc6768f3d --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pyclasses/graph_store.rs @@ -0,0 +1,27 @@ +use hawk_pack::graph_store::GraphMem; +use iris_mpc_cpu::{hawkers::plaintext_store::PlaintextStore, py_bindings}; +use pyo3::{exceptions::PyIOError, prelude::*}; + +#[pyclass] +#[derive(Clone, Default)] +pub struct PyGraphStore(pub GraphMem); + +#[pymethods] +impl PyGraphStore { + #[new] + pub fn new() -> Self { + Self::default() + } + + #[staticmethod] + pub fn read_from_bin(filename: String) -> PyResult { + let result = py_bindings::io::read_bin(&filename) + .map_err(|_| PyIOError::new_err("Unable to read from file"))?; + Ok(Self(result)) + } + + pub fn write_to_bin(&self, filename: String) -> PyResult<()> { + py_bindings::io::write_bin(&self.0, &filename) + .map_err(|_| PyIOError::new_err("Unable to write to file")) + } +} diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs b/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs new file mode 100644 index 000000000..05fb346ee --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs @@ -0,0 +1,98 @@ +use super::{graph_store::PyGraphStore, iris_code::PyIrisCode, plaintext_store::PyPlaintextStore}; +use hawk_pack::hnsw_db::{HawkSearcher, Params}; +use iris_mpc_cpu::py_bindings; +use pyo3::{exceptions::PyIOError, prelude::*}; + +#[pyclass] +#[derive(Clone, Default)] +pub struct PyHawkSearcher(pub HawkSearcher); + +#[pymethods] +#[allow(non_snake_case)] +impl PyHawkSearcher { + #[new] + pub fn new(M: usize, ef_constr: usize, ef_search: usize) -> Self { + Self::new_standard(ef_constr, ef_search, M) + } + + #[staticmethod] + pub fn new_standard(M: usize, ef_constr: usize, ef_search: usize) -> Self { + let params = Params::new_standard(ef_constr, ef_search, M); + Self(HawkSearcher { params }) + } + + #[staticmethod] + pub fn new_uniform(M: usize, ef: usize) -> Self { + let params = Params::new_uniform(ef, M); + Self(HawkSearcher { params }) + } + + pub fn insert( + &self, + iris: PyIrisCode, + vector: &mut PyPlaintextStore, + graph: &mut PyGraphStore, + ) -> u32 { + let id = py_bindings::hnsw::insert(iris.0, &self.0, &mut vector.0, &mut graph.0); + id.0 + } + + pub fn insert_uniform_random( + &self, + vector: &mut PyPlaintextStore, + graph: &mut PyGraphStore, + ) -> u32 { + let id = py_bindings::hnsw::insert_uniform_random(&self.0, &mut vector.0, &mut graph.0); + id.0 + } + + pub fn fill_uniform_random( + &self, + num: usize, + vector: &mut PyPlaintextStore, + graph: &mut PyGraphStore, + ) { + py_bindings::hnsw::fill_uniform_random(num, &self.0, &mut vector.0, &mut graph.0); + } + + #[pyo3(signature = (filename, vector, graph, limit=None))] + pub fn fill_from_ndjson_file( + &self, + filename: String, + vector: &mut PyPlaintextStore, + graph: &mut PyGraphStore, + limit: Option, + ) { + py_bindings::hnsw::fill_from_ndjson_file( + &filename, + limit, + &self.0, + &mut vector.0, + &mut graph.0, + ); + } + + /// Search HNSW index and return nearest ID and its distance from query + pub fn search( + &mut self, + query: &PyIrisCode, + vector: &mut PyPlaintextStore, + graph: &mut PyGraphStore, + ) -> (u32, f64) { + let (id, dist) = + py_bindings::hnsw::search(query.0.clone(), &self.0, &mut vector.0, &mut graph.0); + (id.0, dist) + } + + #[staticmethod] + pub fn read_from_json(filename: String) -> PyResult { + let result = py_bindings::io::read_json(&filename) + .map_err(|_| PyIOError::new_err("Unable to read from file"))?; + Ok(Self(result)) + } + + pub fn write_to_json(&self, filename: String) -> PyResult<()> { + py_bindings::io::write_json(&self.0, &filename) + .map_err(|_| PyIOError::new_err("Unable to write to file")) + } +} diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/iris_code.rs b/iris-mpc-py/src/py_hnsw/pyclasses/iris_code.rs new file mode 100644 index 000000000..c004344ee --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pyclasses/iris_code.rs @@ -0,0 +1,73 @@ +use super::iris_code_array::PyIrisCodeArray; +use iris_mpc_common::iris_db::iris::IrisCode; +use pyo3::{prelude::*, types::PyDict}; +use rand::rngs::ThreadRng; + +#[pyclass] +#[derive(Clone, Default)] +pub struct PyIrisCode(pub IrisCode); + +#[pymethods] +impl PyIrisCode { + #[new] + pub fn new(code: &PyIrisCodeArray, mask: &PyIrisCodeArray) -> Self { + Self(IrisCode { + code: code.0, + mask: mask.0, + }) + } + + #[getter] + pub fn code(&self) -> PyIrisCodeArray { + PyIrisCodeArray(self.0.code) + } + + #[getter] + pub fn mask(&self) -> PyIrisCodeArray { + PyIrisCodeArray(self.0.mask) + } + + #[staticmethod] + pub fn uniform_random() -> Self { + let mut rng = ThreadRng::default(); + Self(IrisCode::random_rng(&mut rng)) + } + + #[pyo3(signature = (version=None))] + pub fn to_open_iris_template_dict<'py>( + &self, + py: Python<'py>, + version: Option, + ) -> PyResult> { + let dict = PyDict::new_bound(py); + + dict.set_item("iris_codes", self.0.code.to_base64().unwrap())?; + dict.set_item("mask_codes", self.0.mask.to_base64().unwrap())?; + dict.set_item("iris_code_version", version)?; + + Ok(dict) + } + + #[staticmethod] + pub fn from_open_iris_template_dict(dict_obj: &Bound) -> PyResult { + // Extract base64-encoded iris code arrays + let iris_codes_str: String = dict_obj.get_item("iris_codes")?.unwrap().extract()?; + let mask_codes_str: String = dict_obj.get_item("mask_codes")?.unwrap().extract()?; + + // Convert the base64 strings into PyIrisCodeArrays + let code = PyIrisCodeArray::from_base64(iris_codes_str); + let mask = PyIrisCodeArray::from_base64(mask_codes_str); + + // Construct and return PyIrisCode + Ok(Self(IrisCode { + code: code.0, + mask: mask.0, + })) + } +} + +impl From for PyIrisCode { + fn from(value: IrisCode) -> Self { + Self(value) + } +} diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/iris_code_array.rs b/iris-mpc-py/src/py_hnsw/pyclasses/iris_code_array.rs new file mode 100644 index 000000000..7d12fe3e7 --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pyclasses/iris_code_array.rs @@ -0,0 +1,46 @@ +use iris_mpc_common::iris_db::iris::IrisCodeArray; +use pyo3::prelude::*; +use rand::rngs::ThreadRng; + +#[pyclass] +#[derive(Clone, Default)] +pub struct PyIrisCodeArray(pub IrisCodeArray); + +#[pymethods] +impl PyIrisCodeArray { + #[new] + pub fn new(input: String) -> Self { + Self::from_base64(input) + } + + pub fn to_base64(&self) -> String { + self.0.to_base64().unwrap() + } + + #[staticmethod] + pub fn from_base64(input: String) -> Self { + Self(IrisCodeArray::from_base64(&input).unwrap()) + } + + #[staticmethod] + pub fn zeros() -> Self { + Self(IrisCodeArray::ZERO) + } + + #[staticmethod] + pub fn ones() -> Self { + Self(IrisCodeArray::ONES) + } + + #[staticmethod] + pub fn uniform_random() -> Self { + let mut rng = ThreadRng::default(); + Self(IrisCodeArray::random_rng(&mut rng)) + } +} + +impl From for PyIrisCodeArray { + fn from(value: IrisCodeArray) -> Self { + Self(value) + } +} diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/mod.rs b/iris-mpc-py/src/py_hnsw/pyclasses/mod.rs new file mode 100644 index 000000000..eea66d959 --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pyclasses/mod.rs @@ -0,0 +1,5 @@ +pub mod graph_store; +pub mod hawk_searcher; +pub mod iris_code; +pub mod iris_code_array; +pub mod plaintext_store; diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/plaintext_store.rs b/iris-mpc-py/src/py_hnsw/pyclasses/plaintext_store.rs new file mode 100644 index 000000000..f1d3fed19 --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pyclasses/plaintext_store.rs @@ -0,0 +1,52 @@ +use super::iris_code::PyIrisCode; +use iris_mpc_cpu::{ + hawkers::plaintext_store::{PlaintextIris, PlaintextPoint, PlaintextStore}, + py_bindings, +}; +use pyo3::{exceptions::PyIOError, prelude::*}; + +#[pyclass] +#[derive(Clone, Default)] +pub struct PyPlaintextStore(pub PlaintextStore); + +#[pymethods] +impl PyPlaintextStore { + #[new] + pub fn new() -> Self { + Self::default() + } + + pub fn get(&self, id: u32) -> PyIrisCode { + self.0.points[id as usize].data.0.clone().into() + } + + pub fn insert(&mut self, iris: PyIrisCode) -> u32 { + let new_id = self.0.points.len() as u32; + self.0.points.push(PlaintextPoint { + data: PlaintextIris(iris.0), + is_persistent: true, + }); + new_id + } + + pub fn len(&self) -> usize { + self.0.points.len() + } + + pub fn is_empty(&self) -> bool { + self.0.points.is_empty() + } + + #[staticmethod] + #[pyo3(signature = (filename, len=None))] + pub fn read_from_ndjson(filename: String, len: Option) -> PyResult { + let result = py_bindings::plaintext_store::from_ndjson_file(&filename, len) + .map_err(|_| PyIOError::new_err("Unable to read from file"))?; + Ok(Self(result)) + } + + pub fn write_to_ndjson(&self, filename: String) -> PyResult<()> { + py_bindings::plaintext_store::to_ndjson_file(&self.0, &filename) + .map_err(|_| PyIOError::new_err("Unable to write to file")) + } +} diff --git a/iris-mpc-py/src/py_hnsw/pymodule.rs b/iris-mpc-py/src/py_hnsw/pymodule.rs new file mode 100644 index 000000000..b0ceae8e3 --- /dev/null +++ b/iris-mpc-py/src/py_hnsw/pymodule.rs @@ -0,0 +1,15 @@ +use super::pyclasses::{ + graph_store::PyGraphStore, hawk_searcher::PyHawkSearcher, iris_code::PyIrisCode, + iris_code_array::PyIrisCodeArray, plaintext_store::PyPlaintextStore, +}; +use pyo3::prelude::*; + +#[pymodule] +fn iris_mpc_py(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} From dabf71ed7ccfe79ab7132a29acd13efac2e69411 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Tue, 26 Nov 2024 11:36:25 +0100 Subject: [PATCH 033/170] bump stage and prod to v0.10.3 (#726) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index bed0d3965..2c66d9da2 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.3" environment: prod replicaCount: 1 diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 6f99973e8..de9e525d1 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.3" environment: stage replicaCount: 1 From 598812021d45e72d13d898636a3617a7e0bb021b Mon Sep 17 00:00:00 2001 From: iliailia Date: Tue, 26 Nov 2024 15:51:29 +0100 Subject: [PATCH 034/170] Send tuples of values over network (#724) --------- Co-authored-by: Bryan Gillespie --- iris-mpc-cpu/src/network/value.rs | 31 +++++++++++++++++++++++++++-- iris-mpc-cpu/src/protocol/binary.rs | 30 ++++++++++++++++++---------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/iris-mpc-cpu/src/network/value.rs b/iris-mpc-cpu/src/network/value.rs index 93198a691..43be3b76b 100644 --- a/iris-mpc-cpu/src/network/value.rs +++ b/iris-mpc-cpu/src/network/value.rs @@ -23,7 +23,15 @@ impl NetworkValue { } pub fn from_network(serialized: eyre::Result>) -> eyre::Result { - bincode::deserialize::(&serialized?).map_err(|_e| eyre!("failed to parse value")) + bincode::deserialize::(&serialized?).map_err(|_e| eyre!("Failed to parse value")) + } + + pub fn vec_to_network(values: &Vec) -> Vec { + bincode::serialize(&values).unwrap() + } + + pub fn vec_from_network(serialized: eyre::Result>) -> eyre::Result> { + bincode::deserialize::>(&serialized?).map_err(|_e| eyre!("Failed to parse value")) } } @@ -39,8 +47,27 @@ impl TryFrom for Vec> { match value { NetworkValue::VecRing16(x) => Ok(x), _ => Err(eyre!( - "could not convert Network Value into Vec>" + "Could not convert Network Value into Vec>" )), } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_vec() -> eyre::Result<()> { + let values = (0..2).map(RingElement).collect::>(); + let network_values = values + .iter() + .map(|v| NetworkValue::RingElement16(*v)) + .collect::>(); + let serialized = NetworkValue::vec_to_network(&network_values); + let result_vec = NetworkValue::vec_from_network(Ok(serialized))?; + assert_eq!(network_values, result_vec); + + Ok(()) + } +} diff --git a/iris-mpc-cpu/src/protocol/binary.rs b/iris-mpc-cpu/src/protocol/binary.rs index 3e55308a2..cb7525525 100644 --- a/iris-mpc-cpu/src/protocol/binary.rs +++ b/iris-mpc-cpu/src/protocol/binary.rs @@ -10,6 +10,7 @@ use crate::{ }, }; use eyre::{eyre, Error}; +use itertools::Itertools; use num_traits::{One, Zero}; use rand::{distributions::Standard, prelude::Distribution, Rng}; use std::ops::SubAssign; @@ -282,15 +283,21 @@ async fn bit_inject_ot_2round_receiver( let sid = session.session_id(); let (m0, m1, wc) = tokio::spawn(async move { - let reply_m0 = network.receive(&next_id, &sid).await; - let m0 = match NetworkValue::from_network(reply_m0) { - Ok(NetworkValue::VecRing16(val)) => Ok(val), + let reply_m0_and_m1 = network.receive(&next_id, &sid).await; + let m0_and_m1 = NetworkValue::vec_from_network(reply_m0_and_m1).unwrap(); + assert!( + m0_and_m1.len() == 2, + "Deserialized vec in bit inject is wrong length" + ); + let (m0, m1) = m0_and_m1.into_iter().collect_tuple().unwrap(); + + let m0 = match m0 { + NetworkValue::VecRing16(val) => Ok(val), _ => Err(eyre!("Could not deserialize properly in bit inject")), }; - let reply_m1 = network.receive(&next_id, &sid).await; - let m1 = match NetworkValue::from_network(reply_m1) { - Ok(NetworkValue::VecRing16(val)) => Ok(val), + let m1 = match m1 { + NetworkValue::VecRing16(val) => Ok(val), _ => Err(eyre!("Could not deserialize properly in bit inject")), }; @@ -365,20 +372,21 @@ async fn bit_inject_ot_2round_sender( let prev_id = session.prev_identity()?; let sid = session.session_id(); // TODO(Dragos) Note this can be compressed in a single round. + let m0_and_m1: Vec = [m0, m1] + .into_iter() + .map(NetworkValue::VecRing16) + .collect::>(); // Reshare to Helper tokio::spawn(async move { let _ = network - .send(NetworkValue::VecRing16(m0).to_network(), &prev_id, &sid) - .await; - let _ = network - .send(NetworkValue::VecRing16(m1).to_network(), &prev_id, &sid) + .send(NetworkValue::vec_to_network(&m0_and_m1), &prev_id, &sid) .await; }) .await?; Ok(shares) } -// TODO this is inbalanced, so a real implementation should actually rotate +// TODO this is unbalanced, so a real implementation should actually rotate // parties around pub(crate) async fn bit_inject_ot_2round( session: &mut Session, From 78a1ef4ed330b0fa8f246107b1a24c428fc2423d Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Tue, 26 Nov 2024 08:08:03 -0800 Subject: [PATCH 035/170] remove nccl debug (#727) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 3 --- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 3 --- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 3 --- 3 files changed, 9 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 900b2eaaa..160a65f39 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -2,9 +2,6 @@ env: - name: RUST_LOG value: "info" - - name: NCCL_DEBUG - value: "INFO" - - name: NCCL_SOCKET_IFNAME value: "eth" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index a4ba8f006..ad61e9ad0 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -2,9 +2,6 @@ env: - name: RUST_LOG value: "info" - - name: NCCL_DEBUG - value: "INFO" - - name: RUST_BACKTRACE value: "1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 28487ae7d..a7a3073bf 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -2,9 +2,6 @@ env: - name: RUST_LOG value: "info" - - name: NCCL_DEBUG - value: "INFO" - - name: RUST_BACKTRACE value: "1" From e033b694dd14607e600394fe6def6a64405fea44 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 27 Nov 2024 02:01:23 -0800 Subject: [PATCH 036/170] configurable healtcheck server (#723) * configurable healtcheck server * 3001 hc port * pretty --- .../smpcv2-1-stage/values-reshare-server.yaml | 2 + iris-mpc-upgrade/src/bin/reshare-server.rs | 19 ++++++- .../src/bin/tcp_upgrade_server.rs | 36 +++---------- iris-mpc-upgrade/src/config.rs | 6 +++ iris-mpc-upgrade/src/reshare.rs | 53 ++++++++----------- iris-mpc-upgrade/src/utils.rs | 13 +++++ 6 files changed, 70 insertions(+), 59 deletions(-) diff --git a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml index dadecabf2..a9bab80bb 100644 --- a/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml +++ b/deploy/stage/smpcv2-1-stage/values-reshare-server.yaml @@ -15,6 +15,8 @@ args: - "100" - "--max-buffer-size" - "10" + - "--healthcheck-port" + - "3001" initContainer: enabled: true diff --git a/iris-mpc-upgrade/src/bin/reshare-server.rs b/iris-mpc-upgrade/src/bin/reshare-server.rs index 7b948ef7c..7bd9a9610 100644 --- a/iris-mpc-upgrade/src/bin/reshare-server.rs +++ b/iris-mpc-upgrade/src/bin/reshare-server.rs @@ -1,4 +1,5 @@ use clap::Parser; +use iris_mpc_common::helpers::task_monitor::TaskMonitor; use iris_mpc_store::Store; use iris_mpc_upgrade::{ config::ReShareServerConfig, @@ -6,7 +7,7 @@ use iris_mpc_upgrade::{ self, iris_mpc_reshare::iris_code_re_share_service_server::IrisCodeReShareServiceServer, }, reshare::{GrpcReshareServer, IrisCodeReshareReceiverHelper}, - utils::install_tracing, + utils::{install_tracing, spawn_healthcheck_server}, }; use tonic::transport::Server; @@ -17,6 +18,22 @@ async fn main() -> eyre::Result<()> { install_tracing(); let config = ReShareServerConfig::parse(); + tracing::info!("Starting healthcheck server."); + + let mut background_tasks = TaskMonitor::new(); + let _health_check_abort = background_tasks + .spawn(async move { spawn_healthcheck_server(config.healthcheck_port).await }); + background_tasks.check_tasks(); + tracing::info!( + "Healthcheck server running on port {}.", + config.healthcheck_port.clone() + ); + + tracing::info!( + "Healthcheck server running on port {}.", + config.healthcheck_port + ); + let schema_name = format!("{}_{}_{}", APP_NAME, config.environment, config.party_id); let store = Store::new(&config.db_url, &schema_name).await?; diff --git a/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs b/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs index bc16cfe07..a602cd5e9 100644 --- a/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs +++ b/iris-mpc-upgrade/src/bin/tcp_upgrade_server.rs @@ -1,12 +1,12 @@ -use axum::{routing::get, Router}; use clap::Parser; -use eyre::{bail, Context}; +use eyre::bail; use futures_concurrency::future::Join; use iris_mpc_common::helpers::task_monitor::TaskMonitor; use iris_mpc_store::Store; use iris_mpc_upgrade::{ config::{Eye, UpgradeServerConfig, BATCH_SUCCESSFUL_ACK, FINAL_BATCH_SUCCESSFUL_ACK}, packets::{MaskShareMessage, TwoToThreeIrisCodeMessage}, + utils::{install_tracing, spawn_healthcheck_server}, IrisCodeUpgrader, NewIrisShareSink, }; use std::time::Instant; @@ -14,20 +14,6 @@ use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader}; const APP_NAME: &str = "SMPC"; -fn install_tracing() { - use tracing_subscriber::{fmt, prelude::*, EnvFilter}; - - let fmt_layer = fmt::layer().with_target(true).with_line_number(true); - let filter_layer = EnvFilter::try_from_default_env() - .or_else(|_| EnvFilter::try_new("info")) - .unwrap(); - - tracing_subscriber::registry() - .with(filter_layer) - .with(fmt_layer) - .init(); -} - struct UpgradeTask { msg1: TwoToThreeIrisCodeMessage, msg2: TwoToThreeIrisCodeMessage, @@ -47,19 +33,13 @@ async fn main() -> eyre::Result<()> { tracing::info!("Starting healthcheck server."); let mut background_tasks = TaskMonitor::new(); - let _health_check_abort = background_tasks.spawn(async move { - let app = Router::new().route("/health", get(|| async {})); // implicit 200 return - let listener = tokio::net::TcpListener::bind("0.0.0.0:3000") - .await - .wrap_err("healthcheck listener bind error")?; - axum::serve(listener, app) - .await - .wrap_err("healthcheck listener server launch error")?; - Ok(()) - }); - + let _health_check_abort = background_tasks + .spawn(async move { spawn_healthcheck_server(args.healthcheck_port).await }); background_tasks.check_tasks(); - tracing::info!("Healthcheck server running on port 3000."); + tracing::info!( + "Healthcheck server running on port {}.", + args.healthcheck_port.clone() + ); let upgrader = IrisCodeUpgrader::new(args.party_id, sink.clone()); diff --git a/iris-mpc-upgrade/src/config.rs b/iris-mpc-upgrade/src/config.rs index cde17fac1..696bee343 100644 --- a/iris-mpc-upgrade/src/config.rs +++ b/iris-mpc-upgrade/src/config.rs @@ -54,6 +54,9 @@ pub struct UpgradeServerConfig { #[clap(long)] pub environment: String, + + #[clap(long)] + pub healthcheck_port: usize, } impl fmt::Debug for UpgradeServerConfig { @@ -217,4 +220,7 @@ pub struct ReShareServerConfig { /// from the other client) #[clap(long, default_value = "10")] pub max_buffer_size: usize, + + #[clap(long, default_value = "3000")] + pub healthcheck_port: usize, } diff --git a/iris-mpc-upgrade/src/reshare.rs b/iris-mpc-upgrade/src/reshare.rs index c96131b6a..bef4f220e 100644 --- a/iris-mpc-upgrade/src/reshare.rs +++ b/iris-mpc-upgrade/src/reshare.rs @@ -396,7 +396,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare1 .left_iris_code_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) .collect_vec() .try_into() @@ -407,7 +407,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare1 .left_mask_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) // we checked this beforehand in check_valid .collect_vec() @@ -418,7 +418,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare2 .left_iris_code_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) .collect_vec() .try_into() @@ -429,7 +429,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare2 .left_mask_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) // we checked this beforehand in check_valid .collect_vec() @@ -462,7 +462,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare1 .right_iris_code_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) .collect_vec() .try_into() @@ -473,7 +473,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare1 .right_mask_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) // we checked this beforehand in check_valid .collect_vec() @@ -484,7 +484,7 @@ impl IrisCodeReshareReceiverHelper { id: self.my_party_id + 1, coefs: reshare2 .right_iris_code_share - .chunks_exact(std::mem::size_of::()) + .chunks_exact(size_of::()) .map(|x| u16::from_le_bytes(x.try_into().unwrap())) .collect_vec() .try_into() @@ -614,33 +614,26 @@ impl GrpcReshareServer { impl iris_code_re_share_service_server::IrisCodeReShareService for GrpcReshareServer { async fn re_share( &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - > { + request: tonic::Request, + ) -> Result, tonic::Status> { match self.receiver_helper.add_request_batch(request.into_inner()) { Ok(()) => (), Err(err) => { tracing::warn!(error = err.to_string(), "Error handling reshare request"); - match err { - IrisCodeReShareError::InvalidRequest { reason } => { - return Ok(Response::new( - proto::iris_mpc_reshare::IrisCodeReShareResponse { - status: IrisCodeReShareStatus::Error as i32, - message: reason, - }, - )); - } - IrisCodeReShareError::TooManyRequests { .. } => { - return Ok(Response::new( - proto::iris_mpc_reshare::IrisCodeReShareResponse { - status: IrisCodeReShareStatus::FullQueue as i32, - message: err.to_string(), - }, - )) - } - } + return match err { + IrisCodeReShareError::InvalidRequest { reason } => Ok(Response::new( + proto::iris_mpc_reshare::IrisCodeReShareResponse { + status: IrisCodeReShareStatus::Error as i32, + message: reason, + }, + )), + IrisCodeReShareError::TooManyRequests { .. } => Ok(Response::new( + proto::iris_mpc_reshare::IrisCodeReShareResponse { + status: IrisCodeReShareStatus::FullQueue as i32, + message: err.to_string(), + }, + )), + }; } } // we received a batch, try to handle it diff --git a/iris-mpc-upgrade/src/utils.rs b/iris-mpc-upgrade/src/utils.rs index 34b5a70d3..a6c9ca385 100644 --- a/iris-mpc-upgrade/src/utils.rs +++ b/iris-mpc-upgrade/src/utils.rs @@ -3,6 +3,8 @@ use crate::{ packets::{MaskShareMessage, TwoToThreeIrisCodeMessage}, OldIrisShareSource, }; +use axum::{routing::get, Router}; +use eyre::Context; use futures::{Stream, StreamExt}; use iris_mpc_common::galois_engine::degree4::{ GaloisRingIrisCodeShare, GaloisRingTrimmedMaskCodeShare, @@ -132,3 +134,14 @@ impl OldIrisShareSource for V1Database { })) } } + +pub async fn spawn_healthcheck_server(healthcheck_port: usize) -> eyre::Result<()> { + let app = Router::new().route("/health", get(|| async {})); // Implicit 200 response + let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", healthcheck_port)) + .await + .wrap_err("Healthcheck listener bind error")?; + axum::serve(listener, app) + .await + .wrap_err("healthcheck listener server launch error")?; + Ok(()) +} From 81d4f0ac56b735b32692e85a1a128822a93cba26 Mon Sep 17 00:00:00 2001 From: iliailia Date: Wed, 27 Nov 2024 14:59:32 +0100 Subject: [PATCH 037/170] gRPC networking with streaming (#707) --- Cargo.lock | 27 +- iris-mpc-cpu/Cargo.toml | 2 +- iris-mpc-cpu/src/network/grpc.rs | 331 +++++++++++------- iris-mpc-cpu/src/proto/party_node.proto | 4 +- .../src/proto_generated/party_node.rs | 18 +- 5 files changed, 227 insertions(+), 155 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3e2f11542..1a8291c92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,38 +181,17 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-stream" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22068c0c19514942eefcfd4daf8976ef1aad84e61539f95cd200c35202f80af5" -dependencies = [ - "async-stream-impl 0.2.1", - "futures-core", -] - [[package]] name = "async-stream" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ - "async-stream-impl 0.3.6", + "async-stream-impl", "futures-core", "pin-project-lite", ] -[[package]] -name = "async-stream-impl" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25f9db3b38af870bf7e5cc649167533b493928e50744e2c30ae350230b414670" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "async-stream-impl" version = "0.3.6" @@ -2724,7 +2703,7 @@ version = "0.1.0" dependencies = [ "aes-prng 0.2.1 (git+https://github.com/tf-encrypted/aes-prng.git?branch=dragos%2Fdisplay)", "async-channel", - "async-stream 0.2.1", + "async-stream", "async-trait", "backoff", "bincode", @@ -5604,7 +5583,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ - "async-stream 0.3.6", + "async-stream", "async-trait", "axum", "base64 0.22.1", diff --git a/iris-mpc-cpu/Cargo.toml b/iris-mpc-cpu/Cargo.toml index 0b67d1a13..3fe92f715 100644 --- a/iris-mpc-cpu/Cargo.toml +++ b/iris-mpc-cpu/Cargo.toml @@ -10,7 +10,7 @@ repository.workspace = true [dependencies] aes-prng = { git = "https://github.com/tf-encrypted/aes-prng.git", branch = "dragos/display"} async-channel = "2.3.1" -async-stream = "0.2" +async-stream = "0.3.6" async-trait = "~0.1" backoff = {version="0.4.0", features = ["tokio"]} bincode.workspace = true diff --git a/iris-mpc-cpu/src/network/grpc.rs b/iris-mpc-cpu/src/network/grpc.rs index 81bbe5bd0..96c351881 100644 --- a/iris-mpc-cpu/src/network/grpc.rs +++ b/iris-mpc-cpu/src/network/grpc.rs @@ -10,13 +10,21 @@ use crate::{ }; use backoff::{future::retry, ExponentialBackoff}; use dashmap::DashMap; -use eyre::{eyre, OptionExt}; -use std::{str::FromStr, sync::Arc}; +use eyre::eyre; +use std::{str::FromStr, sync::Arc, time::Duration}; +use tokio::{ + sync::{ + mpsc::{self, UnboundedSender}, + Mutex, + }, + time::timeout, +}; +use tokio_stream::StreamExt; use tonic::{ async_trait, metadata::AsciiMetadataValue, transport::{Channel, Server}, - Request, Response, Status, + Request, Response, Status, Streaming, }; type TonicResult = Result; @@ -25,15 +33,8 @@ fn err_to_status(e: eyre::Error) -> Status { Status::internal(e.to_string()) } -#[derive(Clone)] -struct QueueChannel { - pub sender: Arc>>, - pub receiver: Arc>>, -} - -#[derive(Clone)] struct MessageQueueStore { - queues: DashMap, + queues: DashMap>>, } impl MessageQueueStore { @@ -43,76 +44,124 @@ impl MessageQueueStore { } } - pub fn add_channel(&self, party_id: &Identity) -> QueueChannel { - // check that the party_id is not already in the queues - if self.queues.contains_key(party_id) { - return self.queues.get(party_id).unwrap().clone(); + fn insert(&self, sender_id: Identity, stream: Streaming) -> eyre::Result<()> { + if self.queues.contains_key(&sender_id) { + return Err(eyre!("Player {:?} already has a message queue", sender_id)); } - let (sender, receiver) = async_channel::unbounded(); - let channel = QueueChannel { - sender: Arc::new(sender), - receiver: Arc::new(receiver), - }; - self.queues.insert(party_id.clone(), channel.clone()); - channel + self.queues.insert(sender_id, Mutex::new(stream)); + Ok(()) } - fn get_channel(&self, party_id: &Identity) -> eyre::Result { - let channel = self.queues.get(party_id).ok_or_eyre(format!( - "Channel not found for party {:?}, existing channels: alice {}, bob {}, charlie {}", - party_id, - self.queues.contains_key(&Identity("alice".into())), - self.queues.contains_key(&Identity("bob".into())), - self.queues.contains_key(&Identity("charlie".into())) - ))?; - Ok((*channel).clone()) + async fn pop(&self, sender_id: &Identity) -> eyre::Result> { + let queue = self.queues.get(sender_id).ok_or(eyre!(format!( + "RECEIVE: Sender {sender_id:?} hasn't been found in the message queues" + )))?; + + let mut queue = queue.lock().await; + + let msg = queue.next().await.ok_or(eyre!("No message received"))??; + + Ok(msg.data) } +} + +struct OutgoingStreams { + streams: DashMap<(SessionId, Identity), Arc>>, +} - pub async fn push_back(&self, party_id: &Identity, value: Vec) -> eyre::Result<()> { - let channel = self.get_channel(party_id)?; - // sends the value via the channel sender; if failed, returns an error - channel.sender.send(value).await.map_err(|e| e.into()) +impl OutgoingStreams { + fn new() -> Self { + OutgoingStreams { + streams: DashMap::new(), + } } - pub async fn pop_front(&self, party_id: &Identity) -> eyre::Result> { - let channel = self.get_channel(party_id)?; - channel.receiver.recv().await.map_err(|e| e.into()) + fn add_session_stream( + &self, + session_id: SessionId, + receiver_id: Identity, + stream: UnboundedSender, + ) { + self.streams + .insert((session_id, receiver_id), Arc::new(stream)); } + + fn get_stream( + &self, + session_id: SessionId, + receiver_id: Identity, + ) -> eyre::Result>> { + self.streams + .get(&(session_id, receiver_id.clone())) + .ok_or(eyre!( + "Streams for session {session_id:?} and receiver {receiver_id:?} not found" + )) + .map(|s| s.value().clone()) + } +} + +#[derive(Default, Clone)] +pub struct GrpcConfig { + pub timeout_duration: Duration, } +// WARNING: this implementation assumes that messages for a specific player +// within one session are sent in order and consecutively. Don't send messages +// to the same player in parallel within the same session. Use batching instead. #[derive(Clone)] pub struct GrpcNetworking { - party_id: Identity, + party_id: Identity, // other party id -> client to call that party - clients: Arc>>, - message_queues: Arc>, + clients: Arc>>, + // other party id -> outgoing streams to send messages to that party in different sessions + outgoing_streams: Arc, + // session id -> incoming message streams + message_queues: Arc>, + + pub config: GrpcConfig, } impl GrpcNetworking { - pub fn new(party_id: Identity) -> Self { + pub fn new(party_id: Identity, config: GrpcConfig) -> Self { GrpcNetworking { party_id, clients: Arc::new(DashMap::new()), + outgoing_streams: Arc::new(OutgoingStreams::new()), message_queues: Arc::new(DashMap::new()), + config, } } pub async fn connect_to_party(&self, party_id: Identity, address: &str) -> eyre::Result<()> { let client = PartyNodeClient::connect(address.to_string()).await?; - self.clients.insert(party_id, client); + self.clients.insert(party_id.clone(), client); Ok(()) } pub async fn create_session(&self, session_id: SessionId) -> eyre::Result<()> { if self.message_queues.contains_key(&session_id) { - return Err(eyre!("Session already exists")); + return Err(eyre!( + "Player {:?} has already created session {session_id:?}", + self.party_id + )); } - let queue = MessageQueueStore::new(); - for client in self.clients.iter() { - queue.add_channel(client.key()); + for mut client in self.clients.iter_mut() { + let (tx, rx) = mpsc::unbounded_channel(); + self.outgoing_streams + .add_session_stream(session_id, client.key().clone(), tx); + let receiving_stream = tokio_stream::wrappers::UnboundedReceiverStream::new(rx); + let mut request = Request::new(receiving_stream); + request.metadata_mut().insert( + "sender_id", + AsciiMetadataValue::from_str(&self.party_id.0).unwrap(), + ); + request.metadata_mut().insert( + "session_id", + AsciiMetadataValue::from_str(&session_id.0.to_string()).unwrap(), + ); + let _response = client.value_mut().send_message(request).await?; } - self.message_queues.insert(session_id, queue); Ok(()) } } @@ -122,14 +171,14 @@ impl GrpcNetworking { impl PartyNode for GrpcNetworking { async fn send_message( &self, - request: Request, + request: Request>, ) -> TonicResult> { let sender_id: Identity = request .metadata() .get("sender_id") .ok_or(Status::unauthenticated("Sender ID not found"))? .to_str() - .map_err(|_| Status::unauthenticated("Sender ID not found"))? + .map_err(|_| Status::unauthenticated("Sender ID is not a string"))? .to_string() .into(); if sender_id == self.party_id { @@ -141,23 +190,30 @@ impl PartyNode for GrpcNetworking { let session_id: u64 = request .metadata() .get("session_id") - .ok_or(Status::not_found("Seesion ID no found"))? + .ok_or(Status::not_found("Session ID not found"))? .to_str() - .map_err(|_| Status::not_found("Session ID not found"))? + .map_err(|_| Status::not_found("Session ID malformed"))? .parse() - .map_err(|_| Status::invalid_argument("Session ID not a u64 number"))?; + .map_err(|_| Status::invalid_argument("Session ID is not a u64 number"))?; let session_id = SessionId::from(session_id); + + let incoming_stream = request.into_inner(); + + tracing::trace!( + "Player {:?}. Creating session {:?} for player {:?}", + self.party_id, + session_id, + sender_id + ); let message_queue = self .message_queues - .get(&session_id) - .ok_or(Status::not_found(format!( - "Session {:?} hasn't been created", - session_id - )))?; + .entry(session_id) + .or_insert(MessageQueueStore::new()); + message_queue - .push_back(&sender_id, request.into_inner().data) - .await + .insert(sender_id, incoming_stream) .map_err(err_to_status)?; + Ok(Response::new(SendResponse {})) } } @@ -178,38 +234,30 @@ impl Networking for GrpcNetworking { ..Default::default() }; retry(backoff, || async { - // Send message via gRPC client - let mut client = self - .clients - .get(receiver) - .ok_or_eyre(format!("Client not found {:?}", receiver))? - .clone(); - let mut request = Request::new(SendRequest { + let outgoing_stream = self + .outgoing_streams + .get_stream(*session_id, receiver.clone())?; + + // Send message via the outgoing stream + let request = SendRequest { data: value.clone(), - }); - request.metadata_mut().append( - "sender_id", - AsciiMetadataValue::from_str(&self.party_id.0).unwrap(), - ); - request.metadata_mut().append( - "session_id", - AsciiMetadataValue::from_str(&session_id.0.to_string()).unwrap(), - ); + }; tracing::trace!( - "Sending message {:?} from {:?} to {:?}", + "INIT: Sending message {:?} from {:?} to {:?} in session {:?}", value, self.party_id, - receiver + receiver, + session_id ); - let _response = client - .send_message(request) - .await - .map_err(|err| eyre!(err.to_string()))?; + outgoing_stream + .send(request) + .map_err(|e| eyre!(e.to_string()))?; tracing::trace!( - "SUCCESS: Sending message {:?} from {:?} to {:?}", + "SUCCESS: Sending message {:?} from {:?} to {:?} in session {:?}", value, self.party_id, - receiver + receiver, + session_id ); Ok(()) }) @@ -218,22 +266,36 @@ impl Networking for GrpcNetworking { async fn receive(&self, sender: &Identity, session_id: &SessionId) -> eyre::Result> { // Just retrieve the first message from the corresponding queue - self.message_queues - .get(session_id) - .ok_or_eyre(format!( - "Session {session_id:?} hasn't been added to message queues" - ))? - .pop_front(sender) - .await + let queue = self.message_queues.get(session_id).ok_or(eyre!(format!( + "Session {session_id:?} hasn't been added to message queues" + )))?; + + tracing::trace!( + "Player {:?} is receiving message from {:?} in session {:?}", + self.party_id, + sender, + session_id + ); + + match timeout(self.config.timeout_duration, queue.pop(sender)).await { + Ok(res) => res, + Err(_) => Err(eyre!( + "Timeout while waiting for message from {sender:?} in session {session_id:?}" + )), + } } } pub async fn setup_local_grpc_networking( parties: Vec, ) -> eyre::Result> { + let config = GrpcConfig { + timeout_duration: Duration::from_secs(1), + }; + let players = parties .iter() - .map(|party| GrpcNetworking::new(party.clone())) + .map(|party| GrpcNetworking::new(party.clone(), config.clone())) .collect::>(); let addresses = get_free_local_addresses(players.len()).await?; @@ -285,10 +347,24 @@ mod tests { use aes_prng::AesRng; use hawk_pack::hnsw_db::HawkSearcher; use rand::SeedableRng; - use std::time::Duration; use tokio::task::JoinSet; use tracing_test::traced_test; + async fn create_session_helper( + session_id: SessionId, + players: &[GrpcNetworking], + ) -> eyre::Result<()> { + let mut jobs = JoinSet::new(); + for player in players.iter() { + let player = player.clone(); + jobs.spawn(async move { + player.create_session(session_id).await.unwrap(); + }); + } + jobs.join_all().await; + Ok(()) + } + #[tokio::test(flavor = "multi_thread")] #[traced_test] async fn test_grpc_comms_correct() -> eyre::Result<()> { @@ -299,29 +375,27 @@ mod tests { // Simple session with one message sent from one party to another { - let alice = players[0].clone(); - let bob = players[1].clone(); + let players = players.clone(); let session_id = SessionId::from(0); jobs.spawn(async move { + create_session_helper(session_id, &players).await.unwrap(); + + let alice = players[0].clone(); + let bob = players[1].clone(); + // Send a message from the first party to the second party let message = b"Hey, Bob. I'm Alice. Do you copy?".to_vec(); let message_copy = message.clone(); let task1 = tokio::spawn(async move { - alice.create_session(session_id).await.unwrap(); - // Add a delay to ensure that the session is created before sending - tokio::time::sleep(Duration::from_millis(100)).await; alice .send(message.clone(), &"bob".into(), &session_id) .await .unwrap(); }); let task2 = tokio::spawn(async move { - bob.create_session(session_id).await.unwrap(); - // Add a delay to ensure that the session is created before receiving - tokio::time::sleep(Duration::from_millis(100)).await; let received_message = bob.receive(&"alice".into(), &session_id).await.unwrap(); assert_eq!(message_copy, received_message); }); @@ -334,6 +408,8 @@ mod tests { jobs.spawn(async move { let session_id = SessionId::from(1); + create_session_helper(session_id, &players).await.unwrap(); + let mut tasks = JoinSet::new(); // Send messages for (player_id, player) in players.iter().enumerate() { @@ -346,11 +422,6 @@ mod tests { let prev_id = identities[prev].clone(); tasks.spawn(async move { - player.create_session(session_id).await.unwrap(); - // Add a delay to ensure that the session is created before - // sending/receiving - tokio::time::sleep(Duration::from_millis(100)).await; - // Sending let msg_to_next = format!("From player {} to player {} with love", player_id, next) @@ -402,11 +473,12 @@ mod tests { // Send to a non-existing party { - let alice = players[0].clone(); + let players = players.clone(); jobs.spawn(async move { - let session_id = SessionId::from(2); - alice.create_session(session_id).await.unwrap(); + let session_id = SessionId::from(0); + create_session_helper(session_id, &players).await.unwrap(); + let alice = players[0].clone(); let message = b"Hey, Eve. I'm Alice. Do you copy?".to_vec(); let res = alice .send(message.clone(), &Identity::from("eve"), &session_id) @@ -417,10 +489,12 @@ mod tests { // Receive from a wrong party { - let alice = players[0].clone(); + let players = players.clone(); jobs.spawn(async move { - let session_id = SessionId::from(3); - alice.create_session(session_id).await.unwrap(); + let session_id = SessionId::from(1); + create_session_helper(session_id, &players).await.unwrap(); + + let alice = players[0].clone(); let res = alice.receive(&Identity::from("eve"), &session_id).await; assert!(res.is_err()); @@ -429,10 +503,12 @@ mod tests { // Send to itself { - let alice = players[0].clone(); + let players = players.clone(); jobs.spawn(async move { - let session_id = SessionId::from(4); - alice.create_session(session_id).await.unwrap(); + let session_id = SessionId::from(2); + create_session_helper(session_id, &players).await.unwrap(); + + let alice = players[0].clone(); let message = b"Hey, Alice. I'm Alice. Do you copy?".to_vec(); let res = alice @@ -444,19 +520,20 @@ mod tests { // Add the same session { - let alice = players[0].clone(); + let players = players.clone(); jobs.spawn(async move { - let session_id = SessionId::from(4); + let session_id = SessionId::from(3); + create_session_helper(session_id, &players).await.unwrap(); + + let alice = players[0].clone(); - // Delay to ensure that the session is created in the previous example - tokio::time::sleep(Duration::from_millis(100)).await; let res = alice.create_session(session_id).await; assert!(res.is_err()); }); } - // Retrieve from a non-existing session + // Send and retrieve from a non-existing session { let alice = players[0].clone(); jobs.spawn(async move { @@ -472,12 +549,26 @@ mod tests { }); } + // Receive from a party that didn't send a message + { + let alice = players[0].clone(); + let players = players.clone(); + jobs.spawn(async move { + let session_id = SessionId::from(4); + create_session_helper(session_id, &players).await.unwrap(); + + let res = alice.receive(&Identity::from("bob"), &session_id).await; + assert!(res.is_err()); + }); + } + jobs.join_all().await; Ok(()) } #[tokio::test] + #[traced_test] async fn test_hnsw_local() { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 2; diff --git a/iris-mpc-cpu/src/proto/party_node.proto b/iris-mpc-cpu/src/proto/party_node.proto index 67a2df623..515e95520 100644 --- a/iris-mpc-cpu/src/proto/party_node.proto +++ b/iris-mpc-cpu/src/proto/party_node.proto @@ -3,11 +3,11 @@ syntax = "proto3"; package party_node; service PartyNode { - rpc SendMessage (SendRequest) returns (SendResponse); + rpc SendMessage (stream SendRequest) returns (SendResponse); } message SendRequest { - bytes data = 3; + bytes data = 1; } message SendResponse {} \ No newline at end of file diff --git a/iris-mpc-cpu/src/proto_generated/party_node.rs b/iris-mpc-cpu/src/proto_generated/party_node.rs index 771569b6a..c2f51267a 100644 --- a/iris-mpc-cpu/src/proto_generated/party_node.rs +++ b/iris-mpc-cpu/src/proto_generated/party_node.rs @@ -1,7 +1,7 @@ // This file is @generated by prost-build. #[derive(Clone, PartialEq, ::prost::Message)] pub struct SendRequest { - #[prost(bytes = "vec", tag = "3")] + #[prost(bytes = "vec", tag = "1")] pub data: ::prost::alloc::vec::Vec, } #[derive(Clone, Copy, PartialEq, ::prost::Message)] @@ -99,7 +99,7 @@ pub mod party_node_client { } pub async fn send_message( &mut self, - request: impl tonic::IntoRequest, + request: impl tonic::IntoStreamingRequest, ) -> std::result::Result, tonic::Status> { self.inner .ready() @@ -113,10 +113,10 @@ pub mod party_node_client { let path = http::uri::PathAndQuery::from_static( "/party_node.PartyNode/SendMessage", ); - let mut req = request.into_request(); + let mut req = request.into_streaming_request(); req.extensions_mut() .insert(GrpcMethod::new("party_node.PartyNode", "SendMessage")); - self.inner.unary(req, path, codec).await + self.inner.client_streaming(req, path, codec).await } } } @@ -135,7 +135,7 @@ pub mod party_node_server { pub trait PartyNode: std::marker::Send + std::marker::Sync + 'static { async fn send_message( &self, - request: tonic::Request, + request: tonic::Request>, ) -> std::result::Result, tonic::Status>; } #[derive(Debug)] @@ -217,7 +217,9 @@ pub mod party_node_server { "/party_node.PartyNode/SendMessage" => { #[allow(non_camel_case_types)] struct SendMessageSvc(pub Arc); - impl tonic::server::UnaryService + impl< + T: PartyNode, + > tonic::server::ClientStreamingService for SendMessageSvc { type Response = super::SendResponse; type Future = BoxFuture< @@ -226,7 +228,7 @@ pub mod party_node_server { >; fn call( &mut self, - request: tonic::Request, + request: tonic::Request>, ) -> Self::Future { let inner = Arc::clone(&self.0); let fut = async move { @@ -252,7 +254,7 @@ pub mod party_node_server { max_decoding_message_size, max_encoding_message_size, ); - let res = grpc.unary(method, req).await; + let res = grpc.client_streaming(method, req).await; Ok(res) }; Box::pin(fut) From bdf551f825d57d0691edb72472059150aaaae857 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Thu, 28 Nov 2024 15:42:23 +0100 Subject: [PATCH 038/170] Add readyness checks checking that the image is consistent (#728) * Add readyness checks checking that the image is consistent --- Cargo.lock | 2 + .../stage/smpcv2-0-stage/values-iris-mpc.yaml | 3 ++ .../stage/smpcv2-1-stage/values-iris-mpc.yaml | 3 ++ .../stage/smpcv2-2-stage/values-iris-mpc.yaml | 3 ++ iris-mpc-common/src/config/mod.rs | 3 ++ iris-mpc/Cargo.toml | 2 + iris-mpc/src/bin/server.rs | 40 +++++++++++++++++-- 7 files changed, 52 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a8291c92..db3a60dec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2627,6 +2627,7 @@ dependencies = [ "aws-sdk-sqs", "axum", "base64 0.22.1", + "bincode", "bytemuck", "clap", "criterion", @@ -2641,6 +2642,7 @@ dependencies = [ "ndarray", "rand", "reqwest 0.12.9", + "serde", "serde_json", "sha2", "sodiumoxide", diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index ef3c3066c..a205d1a26 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -100,6 +100,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.stage.smpcv2.worldcoin.dev","iris-mpc-node.2.stage.smpcv2.worldcoin.dev","iris-mpc-node.3.stage.smpcv2.worldcoin.dev"]' + - name: SMPC__IMAGE_NAME + value: {{ .Values.image | quote }} + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index fed0f2b09..96e2bdb28 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -100,6 +100,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.stage.smpcv2.worldcoin.dev","iris-mpc-node.2.stage.smpcv2.worldcoin.dev","iris-mpc-node.3.stage.smpcv2.worldcoin.dev"]' + - name: SMPC__IMAGE_NAME + value: {{ .Values.image | quote }} + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index d5cfcd9cf..19e435f71 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -99,6 +99,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.stage.smpcv2.worldcoin.dev","iris-mpc-node.2.stage.smpcv2.worldcoin.dev","iris-mpc-node.3.stage.smpcv2.worldcoin.dev"]' + + - name: SMPC__IMAGE_NAME + value: {{ .Values.image | quote }} initContainer: enabled: true diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index f2825c976..b21aea8cc 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -81,6 +81,9 @@ pub struct Config { #[serde(default = "default_shutdown_last_results_sync_timeout_secs")] pub shutdown_last_results_sync_timeout_secs: u64, + + #[serde(default)] + pub image_name: String, } fn default_processing_timeout_secs() -> u64 { diff --git a/iris-mpc/Cargo.toml b/iris-mpc/Cargo.toml index 5e603a9b8..53454799e 100644 --- a/iris-mpc/Cargo.toml +++ b/iris-mpc/Cargo.toml @@ -34,6 +34,8 @@ iris-mpc-store = { path = "../iris-mpc-store" } sha2 = "0.10.8" metrics = "0.22.1" metrics-exporter-statsd = "0.7" +serde = { version = "1.0.214", features = ["derive"] } +bincode = "1.3.3" [dev-dependencies] criterion = "0.5" diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index a847542cf..3a634b4f6 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -37,6 +37,7 @@ use iris_mpc_gpu::{ use iris_mpc_store::{Store, StoredIrisRef}; use metrics_exporter_statsd::StatsdBuilder; use reqwest::StatusCode; +use serde::{Deserialize, Serialize}; use std::{ backtrace::Backtrace, collections::HashMap, @@ -695,12 +696,28 @@ async fn server_main(config: Config) -> eyre::Result<()> { let is_ready_flag = Arc::new(AtomicBool::new(false)); let is_ready_flag_cloned = Arc::clone(&is_ready_flag); + #[derive(Serialize, Deserialize)] + struct ReadyProbeResponse { + image_name: String, + uuid: String, + } + let _health_check_abort = background_tasks.spawn({ let uuid = uuid::Uuid::new_v4().to_string(); + let ready_probe_response = ReadyProbeResponse { + image_name: config.image_name.clone(), + uuid, + }; + let serialized_response = serde_json::to_string(&ready_probe_response) + .expect("Serialization to JSON to probe response failed"); + tracing::info!("Healthcheck probe response: {}", serialized_response); async move { // Generate a random UUID for each run. let app = Router::new() - .route("/health", get(move || async move { uuid.to_string() })) + .route( + "/health", + get(move || async move { serialized_response.clone() }), + ) .route( "/ready", get({ @@ -732,6 +749,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { let (heartbeat_tx, heartbeat_rx) = oneshot::channel(); let mut heartbeat_tx = Some(heartbeat_tx); let all_nodes = config.node_hostnames.clone(); + let image_name = config.image_name.clone(); let _heartbeat = background_tasks.spawn(async move { let next_node = &all_nodes[(config.party_id + 1) % 3]; let prev_node = &all_nodes[(config.party_id + 2) % 3]; @@ -759,9 +777,23 @@ async fn server_main(config: Config) -> eyre::Result<()> { ); } - let uuid = res.unwrap().text().await?; + let probe_response = res + .unwrap() + .json::() + .await + .expect("Deserialization of probe response failed"); + if probe_response.image_name != image_name { + // Do not create a panic as we still can continue to process before its + // updated + tracing::error!( + "Host {} is using image {} which differs from current node image: {}", + host, + probe_response.image_name.clone(), + image_name + ); + } if last_response[i] == String::default() { - last_response[i] = uuid; + last_response[i] = probe_response.uuid; connected[i] = true; // If all nodes are connected, notify the main thread. @@ -770,7 +802,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { tx.send(()).unwrap(); } } - } else if uuid != last_response[i] { + } else if probe_response.uuid != last_response[i] { // If the UUID response is different, the node has restarted without us // noticing. Our main NCCL connections cannot recover from // this, so we panic. From 1773feae1b5174d49ad9580155e70d8111015ff0 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Thu, 28 Nov 2024 23:39:57 -0800 Subject: [PATCH 039/170] scale down (#733) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 2c66d9da2..f3fcf3cb2 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.3" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From 603c9469027d219a4de17cfb613b4bdacc324d4b Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 00:44:57 -0800 Subject: [PATCH 040/170] scale up (#734) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index f3fcf3cb2..2c66d9da2 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.3" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From f351cdcac6d8daa79153f6495ca29adcc4aca662 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Fri, 29 Nov 2024 10:34:45 +0100 Subject: [PATCH 041/170] Update stage latest version (#730) * Update getting image name env variable * Update stage image --- deploy/stage/common-values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index de9e525d1..5a2f41a7f 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.3" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index a205d1a26..02fb0a576 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -101,7 +101,7 @@ env: value: '["iris-mpc-node.1.stage.smpcv2.worldcoin.dev","iris-mpc-node.2.stage.smpcv2.worldcoin.dev","iris-mpc-node.3.stage.smpcv2.worldcoin.dev"]' - name: SMPC__IMAGE_NAME - value: {{ .Values.image | quote }} + value: $(IMAGE_NAME) initContainer: enabled: true diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 96e2bdb28..db16650c1 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -101,7 +101,7 @@ env: value: '["iris-mpc-node.1.stage.smpcv2.worldcoin.dev","iris-mpc-node.2.stage.smpcv2.worldcoin.dev","iris-mpc-node.3.stage.smpcv2.worldcoin.dev"]' - name: SMPC__IMAGE_NAME - value: {{ .Values.image | quote }} + value: $(IMAGE_NAME) initContainer: enabled: true diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 19e435f71..728821a8c 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -99,9 +99,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.stage.smpcv2.worldcoin.dev","iris-mpc-node.2.stage.smpcv2.worldcoin.dev","iris-mpc-node.3.stage.smpcv2.worldcoin.dev"]' - + - name: SMPC__IMAGE_NAME - value: {{ .Values.image | quote }} + value: $(IMAGE_NAME) initContainer: enabled: true From 59743126772e3c56990eb0b6bc5c2c07de80415c Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 03:23:46 -0800 Subject: [PATCH 042/170] Update common-values-reshare-server.yaml (#735) --- deploy/stage/common-values-reshare-server.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 3557dd21d..73269e042 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: stage replicaCount: 1 From f61f20d3016841a237aa3f67c0d7089384f6835a Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 03:50:41 -0800 Subject: [PATCH 043/170] test nginx conf (#736) --- .../stage/common-values-reshare-server.yaml | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 73269e042..9d88b4062 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -99,15 +99,18 @@ nginxSidecar: worker_connections 1024; } - http { + stream { log_format basic '$remote_addr [$time_local] ' - '$status $bytes_sent'; + '$protocol $status $bytes_sent $bytes_received ' + '$session_time'; + + upstream tcp_backend { + server 127.0.0.1:8000; + } - access_log /dev/stdout basic; - server { - listen 6443 ssl; - http2 on; + listen 8443 ssl; + proxy_pass tcp_backend; ssl_certificate /etc/nginx/cert/certificate.crt; ssl_certificate_key /etc/nginx/cert/key.pem; @@ -119,17 +122,6 @@ nginxSidecar: ssl_session_cache shared:SSL:10m; ssl_session_timeout 1h; - location / { - # Forward gRPC traffic to the gRPC server on port 7000 - grpc_pass grpc://127.0.0.1:7000; - error_page 502 = /error502grpc; # Custom error page for GRPC backend issues - } - - # Custom error page - location = /error502grpc { - internal; - default_type text/plain; - return 502 "Bad Gateway: gRPC server unreachable."; - } + access_log /dev/stdout basic; } } From 4898a748ac28cba0111e58c5468cbab54a55be64 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 03:56:50 -0800 Subject: [PATCH 044/170] test nginx conf (#737) --- deploy/stage/common-values-reshare-server.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 9d88b4062..f48d1129b 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -105,11 +105,11 @@ nginxSidecar: '$session_time'; upstream tcp_backend { - server 127.0.0.1:8000; + server 127.0.0.1:7000; } server { - listen 8443 ssl; + listen 6443 ssl; proxy_pass tcp_backend; ssl_certificate /etc/nginx/cert/certificate.crt; From a523ea31c678db0260041445de305f5c807aab54 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 04:08:27 -0800 Subject: [PATCH 045/170] test nginx conf (#738) --- .../stage/common-values-reshare-server.yaml | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index f48d1129b..a424e7349 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -99,18 +99,13 @@ nginxSidecar: worker_connections 1024; } - stream { + http { log_format basic '$remote_addr [$time_local] ' - '$protocol $status $bytes_sent $bytes_received ' - '$session_time'; - - upstream tcp_backend { - server 127.0.0.1:7000; - } - + '$status $bytes_sent'; + server { listen 6443 ssl; - proxy_pass tcp_backend; + http2 on; ssl_certificate /etc/nginx/cert/certificate.crt; ssl_certificate_key /etc/nginx/cert/key.pem; @@ -121,7 +116,20 @@ nginxSidecar: # Enable session resumption to improve performance ssl_session_cache shared:SSL:10m; ssl_session_timeout 1h; - + access_log /dev/stdout basic; + + location / { + # Forward gRPC traffic to the gRPC server on port 7000 + grpc_pass grpc://127.0.0.1:7000; + error_page 502 = /error502grpc; # Custom error page for GRPC backend issues + } + + # Custom error page + location = /error502grpc { + internal; + default_type text/plain; + return 502 "Bad Gateway: gRPC server unreachable."; + } } } From 1e28fde0e0be852d7d6342670c75722bb7b66271 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 04:18:01 -0800 Subject: [PATCH 046/170] test nginx conf (#739) --- .../stage/common-values-reshare-server.yaml | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index a424e7349..7a00286bb 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -96,40 +96,40 @@ nginxSidecar: pid /tmp/nginx.pid; events { - worker_connections 1024; + worker_connections 1024; } http { - log_format basic '$remote_addr [$time_local] ' + log_format basic '$remote_addr [$time_local] ' '$status $bytes_sent'; - server { - listen 6443 ssl; - http2 on; + server { + listen 6443 ssl; + http2 on; - ssl_certificate /etc/nginx/cert/certificate.crt; - ssl_certificate_key /etc/nginx/cert/key.pem; + ssl_certificate /etc/nginx/cert/certificate.crt; + ssl_certificate_key /etc/nginx/cert/key.pem; + + ssl_protocols TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + # Enable session resumption to improve performance + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1h; + + location / { + # Forward gRPC traffic to the gRPC server on port 7000 + grpc_pass grpc://127.0.0.1:7000; + error_page 502 = /error502grpc; # Custom error page for GRPC backend issues + } + + # Custom error page + location = /error502grpc { + internal; + default_type text/plain; + return 502 "Bad Gateway: gRPC server unreachable."; + } - ssl_protocols TLSv1.3; - ssl_ciphers HIGH:!aNULL:!MD5; - - # Enable session resumption to improve performance - ssl_session_cache shared:SSL:10m; - ssl_session_timeout 1h; - - access_log /dev/stdout basic; - - location / { - # Forward gRPC traffic to the gRPC server on port 7000 - grpc_pass grpc://127.0.0.1:7000; - error_page 502 = /error502grpc; # Custom error page for GRPC backend issues - } - - # Custom error page - location = /error502grpc { - internal; - default_type text/plain; - return 502 "Bad Gateway: gRPC server unreachable."; + access_log /dev/stdout basic; } - } } From fef0e41b42b8503108dcfed7bea1fb3521757d0f Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 05:19:05 -0800 Subject: [PATCH 047/170] test nginx conf (#740) --- deploy/stage/common-values-reshare-server.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 7a00286bb..b576fd71a 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -54,7 +54,7 @@ podSecurityContext: serviceAccount: create: true -command: [ "/bin/reshare-server" ] +command: [ "chown", "-R" "65534:65534", "/var/cache/", "&&", "/bin/reshare-server" ] env: - name: SMPC__DATABASE__URL @@ -104,7 +104,7 @@ nginxSidecar: '$status $bytes_sent'; server { - listen 6443 ssl; + listen 8443 ssl http2; http2 on; ssl_certificate /etc/nginx/cert/certificate.crt; @@ -112,7 +112,7 @@ nginxSidecar: ssl_protocols TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; - + # Enable session resumption to improve performance ssl_session_cache shared:SSL:10m; ssl_session_timeout 1h; @@ -122,14 +122,15 @@ nginxSidecar: grpc_pass grpc://127.0.0.1:7000; error_page 502 = /error502grpc; # Custom error page for GRPC backend issues } - + # Custom error page location = /error502grpc { internal; default_type text/plain; return 502 "Bad Gateway: gRPC server unreachable."; } - + access_log /dev/stdout basic; + } } From 576783393d48853a6eaac738c8dec8f110d62c87 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 05:25:37 -0800 Subject: [PATCH 048/170] test nginx conf (#741) --- deploy/stage/common-values-reshare-server.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index b576fd71a..ebe7282fe 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -54,7 +54,7 @@ podSecurityContext: serviceAccount: create: true -command: [ "chown", "-R" "65534:65534", "/var/cache/", "&&", "/bin/reshare-server" ] +command: [ "/bin/reshare-server" ] env: - name: SMPC__DATABASE__URL @@ -71,6 +71,7 @@ service: enabled: false nginxSidecar: + image: "nginxinc/nginx-unprivileged:1.27" enabled: true port: 6443 secrets: @@ -104,7 +105,7 @@ nginxSidecar: '$status $bytes_sent'; server { - listen 8443 ssl http2; + listen 6443 ssl http2; http2 on; ssl_certificate /etc/nginx/cert/certificate.crt; From d48ecdefc31fd97f42ae5e261186bb60267e4d27 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 05:41:22 -0800 Subject: [PATCH 049/170] use different user (#742) --- deploy/stage/common-values-reshare-server.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index ebe7282fe..f27c440da 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -48,8 +48,8 @@ nodeSelector: beta.kubernetes.io/instance-type: t3.2xlarge podSecurityContext: - runAsUser: 65534 - runAsGroup: 65534 + runAsUser: 405 + runAsGroup: 405 serviceAccount: create: true From 241bade84cb47708b4defafa6104c7a172f5f050 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 06:26:32 -0800 Subject: [PATCH 050/170] revert image, use cache path (#743) --- .../stage/common-values-reshare-server.yaml | 77 ++++++++++--------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index f27c440da..840967b92 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -71,7 +71,7 @@ service: enabled: false nginxSidecar: - image: "nginxinc/nginx-unprivileged:1.27" + image: "nginxinc/nginx:1.27" enabled: true port: 6443 secrets: @@ -92,46 +92,51 @@ nginxSidecar: config: nginx.conf: | worker_processes auto; - + error_log /dev/stderr notice; pid /tmp/nginx.pid; - + events { - worker_connections 1024; + worker_connections 1024; } - - http { - log_format basic '$remote_addr [$time_local] ' - '$status $bytes_sent'; - server { - listen 6443 ssl http2; - http2 on; - - ssl_certificate /etc/nginx/cert/certificate.crt; - ssl_certificate_key /etc/nginx/cert/key.pem; + http { + proxy_cache_path /tmp/proxy_cache levels=1:2 use_temp_path=on keys_zone=cache:10m max_size=4096m loader_files=200 inactive=24h; + proxy_temp_path /tmp/proxy_temp; - ssl_protocols TLSv1.3; - ssl_ciphers HIGH:!aNULL:!MD5; - - # Enable session resumption to improve performance - ssl_session_cache shared:SSL:10m; - ssl_session_timeout 1h; - - location / { - # Forward gRPC traffic to the gRPC server on port 7000 - grpc_pass grpc://127.0.0.1:7000; - error_page 502 = /error502grpc; # Custom error page for GRPC backend issues - } - - # Custom error page - location = /error502grpc { - internal; - default_type text/plain; - return 502 "Bad Gateway: gRPC server unreachable."; - } + log_format basic '$remote_addr [$time_local] ' + '$status $bytes_sent'; + + server { + listen 6443 ssl http2; + http2 on; + + ssl_certificate /etc/nginx/cert/certificate.crt; + ssl_certificate_key /etc/nginx/cert/key.pem; + + ssl_protocols TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + # Enable session resumption to improve performance + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1h; + + location / { + proxy_cache cache; - access_log /dev/stdout basic; - + # Forward gRPC traffic to the gRPC server on port 7000 + grpc_pass grpc://127.0.0.1:7000; + error_page 502 = /error502grpc; # Custom error page for GRPC backend issues } - } + + # Custom error page + location = /error502grpc { + proxy_cache cache; + internal; + default_type text/plain; + return 502 "Bad Gateway: gRPC server unreachable."; + } + + access_log /dev/stdout basic; + } + } \ No newline at end of file From a9c4fc33aac5c1607e2f359b3f4b396b29f8078e Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 06:30:29 -0800 Subject: [PATCH 051/170] remove image (#744) --- deploy/stage/common-values-reshare-server.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 840967b92..5e4d344aa 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -71,7 +71,6 @@ service: enabled: false nginxSidecar: - image: "nginxinc/nginx:1.27" enabled: true port: 6443 secrets: From 4a051647c2e08664499ddd227ab5ff166ea86732 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 29 Nov 2024 06:38:31 -0800 Subject: [PATCH 052/170] working nginx config (#745) --- .../stage/common-values-reshare-server.yaml | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 5e4d344aa..49b22eecf 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -91,51 +91,51 @@ nginxSidecar: config: nginx.conf: | worker_processes auto; - + error_log /dev/stderr notice; pid /tmp/nginx.pid; events { worker_connections 1024; } - + http { - proxy_cache_path /tmp/proxy_cache levels=1:2 use_temp_path=on keys_zone=cache:10m max_size=4096m loader_files=200 inactive=24h; - proxy_temp_path /tmp/proxy_temp; - + proxy_temp_path /tmp/proxy_temp; + client_body_temp_path /tmp/client_temp; + fastcgi_temp_path /tmp/fastcgi_temp; + uwsgi_temp_path /tmp/uwsgi_temp; + scgi_temp_path /tmp/scgi_temp; + log_format basic '$remote_addr [$time_local] ' - '$status $bytes_sent'; - + '$status $bytes_sent'; + server { - listen 6443 ssl http2; + listen 6443 ssl; http2 on; - + ssl_certificate /etc/nginx/cert/certificate.crt; ssl_certificate_key /etc/nginx/cert/key.pem; - + ssl_protocols TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; - + # Enable session resumption to improve performance ssl_session_cache shared:SSL:10m; ssl_session_timeout 1h; - + location / { - proxy_cache cache; - # Forward gRPC traffic to the gRPC server on port 7000 grpc_pass grpc://127.0.0.1:7000; error_page 502 = /error502grpc; # Custom error page for GRPC backend issues } - + # Custom error page location = /error502grpc { - proxy_cache cache; internal; default_type text/plain; return 502 "Bad Gateway: gRPC server unreachable."; } - + access_log /dev/stdout basic; } } \ No newline at end of file From cc2d6f6bbce1c4eefb4c2ec806ffaf11f9610589 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Mon, 2 Dec 2024 12:44:27 +0100 Subject: [PATCH 053/170] Update prod to latest iris mpc version (#746) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/common-values-upgrade-server-left.yaml | 2 +- deploy/prod/common-values-upgrade-server-right.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 2c66d9da2..4e29972de 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.3" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: prod replicaCount: 1 diff --git a/deploy/prod/common-values-upgrade-server-left.yaml b/deploy/prod/common-values-upgrade-server-left.yaml index c15cb3eca..0c00a9ef2 100644 --- a/deploy/prod/common-values-upgrade-server-left.yaml +++ b/deploy/prod/common-values-upgrade-server-left.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: prod replicaCount: 1 diff --git a/deploy/prod/common-values-upgrade-server-right.yaml b/deploy/prod/common-values-upgrade-server-right.yaml index d56879676..4da521f19 100644 --- a/deploy/prod/common-values-upgrade-server-right.yaml +++ b/deploy/prod/common-values-upgrade-server-right.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: prod replicaCount: 1 From 3a5235e333a9958d5a52ac18cddf25bffdf45f06 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 2 Dec 2024 19:00:29 +0100 Subject: [PATCH 054/170] bump init db size in stage (#748) --- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 02fb0a576..1bb635b08 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "0" + value: "10" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index db16650c1..01aa826ae 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "0" + value: "10" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 728821a8c..42c896ec7 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "0" + value: "10" - name: SMPC__MAX_DB_SIZE value: "1000000" From 6d76bfa5c748d39581881a116939243e0ef5d8ff Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Tue, 3 Dec 2024 10:38:41 +0100 Subject: [PATCH 055/170] Add Image Name to prod env variables (#749) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 3 +++ deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 3 +++ deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 160a65f39..99cca1fa6 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -118,6 +118,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.smpcv2.worldcoin.org","iris-mpc-node.2.smpcv2.worldcoin.org","iris-mpc-node.3.smpcv2.worldcoin.org"]' + - name: SMPC__IMAGE_NAME + value: $(IMAGE_NAME) + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ad61e9ad0..8fdff5cf4 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -118,6 +118,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.smpcv2.worldcoin.org","iris-mpc-node.2.smpcv2.worldcoin.org","iris-mpc-node.3.smpcv2.worldcoin.org"]' + - name: SMPC__IMAGE_NAME + value: $(IMAGE_NAME) + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index a7a3073bf..5a7751442 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -118,6 +118,9 @@ env: - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.smpcv2.worldcoin.org","iris-mpc-node.2.smpcv2.worldcoin.org","iris-mpc-node.3.smpcv2.worldcoin.org"]' + - name: SMPC__IMAGE_NAME + value: $(IMAGE_NAME) + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" From 4b8573ed756ad1a0ba1b3c5704e4c4467921c57c Mon Sep 17 00:00:00 2001 From: iliailia Date: Tue, 3 Dec 2024 22:10:45 +0100 Subject: [PATCH 056/170] Optimize preprocessing of queries (#747) --- Cargo.lock | 2 + iris-mpc-common/src/galois_engine.rs | 3 +- iris-mpc-cpu/Cargo.toml | 6 + iris-mpc-cpu/benches/hnsw.rs | 16 +- iris-mpc-cpu/bin/local_hnsw.rs | 24 +++ iris-mpc-cpu/src/database_generators.rs | 3 +- iris-mpc-cpu/src/execution/local.rs | 4 + iris-mpc-cpu/src/hawkers/galois_store.rs | 202 ++++++++++++++++------- iris-mpc-cpu/src/network/grpc.rs | 6 +- 9 files changed, 192 insertions(+), 74 deletions(-) create mode 100644 iris-mpc-cpu/bin/local_hnsw.rs diff --git a/Cargo.lock b/Cargo.lock index db3a60dec..9330efe36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2711,6 +2711,7 @@ dependencies = [ "bincode", "bytemuck", "bytes", + "clap", "criterion", "dashmap", "eyre", @@ -2732,6 +2733,7 @@ dependencies = [ "tracing", "tracing-subscriber", "tracing-test", + "uuid", ] [[package]] diff --git a/iris-mpc-common/src/galois_engine.rs b/iris-mpc-common/src/galois_engine.rs index dce573ed8..fa6e0814b 100644 --- a/iris-mpc-common/src/galois_engine.rs +++ b/iris-mpc-common/src/galois_engine.rs @@ -44,9 +44,10 @@ pub mod degree4 { .for_each(|chunk| chunk.rotate_left(by * 4)); } - #[derive(Debug, Clone, PartialEq, Eq, Hash)] + #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct GaloisRingTrimmedMaskCodeShare { pub id: usize, + #[serde(with = "BigArray")] pub coefs: [u16; MASK_CODE_LENGTH], } diff --git a/iris-mpc-cpu/Cargo.toml b/iris-mpc-cpu/Cargo.toml index 3fe92f715..05292c77d 100644 --- a/iris-mpc-cpu/Cargo.toml +++ b/iris-mpc-cpu/Cargo.toml @@ -16,6 +16,7 @@ backoff = {version="0.4.0", features = ["tokio"]} bincode.workspace = true bytes = "1.7" bytemuck.workspace = true +clap.workspace = true dashmap = "6.1.0" eyre.workspace = true futures.workspace = true @@ -35,6 +36,7 @@ tonic = "0.12.3" tracing.workspace = true tracing-subscriber.workspace = true tracing-test = "0.2.5" +uuid.workspace = true [dev-dependencies] criterion = { version = "0.5.1", features = ["async_tokio"] } @@ -48,3 +50,7 @@ harness = false [[example]] name = "hnsw-ex" + +[[bin]] +name = "local_hnsw" +path = "bin/local_hnsw.rs" \ No newline at end of file diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index 6eaece7b8..7513f6216 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -89,7 +89,7 @@ fn bench_hnsw_primitives(c: &mut Criterion) { let t1 = create_random_sharing(&mut rng, 10_u16); let t2 = create_random_sharing(&mut rng, 10_u16); - let runtime = LocalRuntime::mock_setup_with_channel().await.unwrap(); + let runtime = LocalRuntime::mock_setup_with_grpc().await.unwrap(); let mut jobs = JoinSet::new(); for (index, player) in runtime.identities.iter().enumerate() { @@ -116,7 +116,7 @@ fn bench_gr_primitives(c: &mut Criterion) { .build() .unwrap(); b.to_async(&rt).iter(|| async move { - let runtime = LocalRuntime::mock_setup_with_channel().await.unwrap(); + let runtime = LocalRuntime::mock_setup_with_grpc().await.unwrap(); let mut rng = AesRng::seed_from_u64(0); let iris_db = IrisDB::new_random_rng(4, &mut rng).db; @@ -174,12 +174,9 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let (_, secret_searcher) = rt.block_on(async move { let mut rng = AesRng::seed_from_u64(0_u64); - LocalNetAby3NgStoreProtocol::lazy_random_setup_with_local_channel( - &mut rng, - database_size, - ) - .await - .unwrap() + LocalNetAby3NgStoreProtocol::lazy_random_setup_with_grpc(&mut rng, database_size) + .await + .unwrap() }); group.bench_function( @@ -207,12 +204,13 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let neighbors = searcher .search_to_insert(&mut vector_store, &mut graph_store, &query) .await; + let inserted_query = vector_store.insert(&query).await; searcher .insert_from_search_results( &mut vector_store, &mut graph_store, &mut rng, - query, + inserted_query, neighbors, ) .await; diff --git a/iris-mpc-cpu/bin/local_hnsw.rs b/iris-mpc-cpu/bin/local_hnsw.rs new file mode 100644 index 000000000..20eca212d --- /dev/null +++ b/iris-mpc-cpu/bin/local_hnsw.rs @@ -0,0 +1,24 @@ +use aes_prng::AesRng; +use clap::Parser; +use iris_mpc_cpu::hawkers::galois_store::LocalNetAby3NgStoreProtocol; +use rand::SeedableRng; +use std::error::Error; + +#[derive(Parser)] +struct Args { + #[clap(short = 'n', default_value = "1000")] + database_size: usize, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let args = Args::parse(); + let database_size = args.database_size; + + println!("Starting Local HNSW with {} vectors", database_size); + let mut rng = AesRng::seed_from_u64(0_u64); + + LocalNetAby3NgStoreProtocol::shared_random_setup_with_grpc(&mut rng, database_size).await?; + + Ok(()) +} diff --git a/iris-mpc-cpu/src/database_generators.rs b/iris-mpc-cpu/src/database_generators.rs index 3d38209ed..243981322 100644 --- a/iris-mpc-cpu/src/database_generators.rs +++ b/iris-mpc-cpu/src/database_generators.rs @@ -4,11 +4,12 @@ use iris_mpc_common::{ iris_db::iris::IrisCode, }; use rand::{CryptoRng, Rng, RngCore}; +use serde::{Deserialize, Serialize}; type ShareRing = u16; type ShareRingPlain = RingElement; -#[derive(PartialEq, Eq, Debug, Clone)] +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Hash)] pub struct GaloisRingSharedIris { pub code: GaloisRingIrisCodeShare, pub mask: GaloisRingTrimmedMaskCodeShare, diff --git a/iris-mpc-cpu/src/execution/local.rs b/iris-mpc-cpu/src/execution/local.rs index 5dba6bf91..0f425beae 100644 --- a/iris-mpc-cpu/src/execution/local.rs +++ b/iris-mpc-cpu/src/execution/local.rs @@ -65,6 +65,10 @@ impl LocalRuntime { Self::mock_setup(NetworkType::LocalChannel).await } + pub async fn mock_setup_with_grpc() -> eyre::Result { + Self::mock_setup(NetworkType::GrpcChannel).await + } + pub async fn new_with_network_type( identities: Vec, seeds: Vec, diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index 2c68dd83f..ee07bd5da 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -16,15 +16,49 @@ use crate::{ }; use aes_prng::AesRng; use hawk_pack::{ - graph_store::{graph_mem::Layer, GraphMem}, + graph_store::{graph_mem::Layer, EntryPoint, GraphMem}, hnsw_db::{FurthestQueue, HawkSearcher}, GraphStore, VectorStore, }; use iris_mpc_common::iris_db::{db::IrisDB, iris::IrisCode}; use rand::{CryptoRng, RngCore, SeedableRng}; -use std::{collections::HashMap, fmt::Debug, vec}; +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fmt::Debug, sync::Arc, vec}; use tokio::task::JoinSet; +#[derive(Copy, Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct VectorId { + id: PointId, +} + +impl From for VectorId { + fn from(id: PointId) -> Self { + VectorId { id } + } +} + +impl From<&PointId> for VectorId { + fn from(id: &PointId) -> Self { + VectorId { id: *id } + } +} + +impl From for VectorId { + fn from(id: usize) -> Self { + VectorId { id: id.into() } + } +} + +type GaloisRingPoint = GaloisRingSharedIris; + +#[derive(Clone, Serialize, Deserialize, Hash, Eq, PartialEq, Debug)] +pub struct Query { + pub query: GaloisRingPoint, + pub processed_query: GaloisRingPoint, +} + +type QueryRef = Arc; + #[derive(Default, Clone)] pub struct Aby3NgStorePlayer { points: Vec, @@ -36,33 +70,34 @@ impl std::fmt::Debug for Aby3NgStorePlayer { } } -#[derive(Eq, PartialEq, Clone, Debug)] -struct GaloisRingPoint { - /// Whatever encoding of a vector. - data: GaloisRingSharedIris, -} - impl Aby3NgStorePlayer { pub fn new_with_shared_db(data: Vec) -> Self { - let points: Vec = data - .into_iter() - .map(|d| GaloisRingPoint { data: d }) - .collect(); - Aby3NgStorePlayer { points } + Aby3NgStorePlayer { points: data } } - pub fn prepare_query(&mut self, raw_query: GaloisRingSharedIris) -> PointId { - self.points.push(GaloisRingPoint { data: raw_query }); + pub fn prepare_query(&mut self, raw_query: GaloisRingSharedIris) -> QueryRef { + let mut preprocessed_query = raw_query.clone(); + preprocessed_query.code.preprocess_iris_code_query_share(); + preprocessed_query.mask.preprocess_mask_code_query_share(); + + Arc::new(Query { + query: raw_query, + processed_query: preprocessed_query, + }) + } - let point_id = self.points.len() - 1; - point_id.into() + pub fn get_vector(&self, vector: &VectorId) -> &GaloisRingPoint { + &self.points[vector.id] } } impl Aby3NgStorePlayer { - fn insert(&mut self, query: &PointId) -> PointId { - // The query is now accepted in the store. It keeps the same ID. - *query + fn insert(&mut self, query: &QueryRef) -> VectorId { + // The query is now accepted in the store. + self.points.push(query.query.clone()); + + let new_id = self.points.len() - 1; + VectorId { id: new_id.into() } } } @@ -153,19 +188,16 @@ pub async fn setup_local_store_aby3_players( } impl LocalNetAby3NgStoreProtocol { - pub fn prepare_query(&mut self, code: GaloisRingSharedIris) -> PointId { + pub fn prepare_query(&mut self, code: GaloisRingSharedIris) -> QueryRef { self.storage.prepare_query(code) } } +/// Assumes that the first iris of each pair is preprocessed. async fn eval_pairwise_distances( - mut pairs: Vec<(GaloisRingSharedIris, GaloisRingSharedIris)>, + pairs: Vec<(GaloisRingSharedIris, GaloisRingSharedIris)>, player_session: &mut Session, ) -> Vec> { - pairs.iter_mut().for_each(|(_x, y)| { - y.code.preprocess_iris_code_query_share(); - y.mask.preprocess_mask_code_query_share(); - }); let ds_and_ts = galois_ring_pairwise_distance(player_session, &pairs) .await .unwrap(); @@ -175,14 +207,12 @@ async fn eval_pairwise_distances( } impl VectorStore for LocalNetAby3NgStoreProtocol { - type QueryRef = PointId; // Vector ID, pending insertion. - type VectorRef = PointId; // Vector ID, inserted. + type QueryRef = QueryRef; // Point ID, pending insertion. + type VectorRef = VectorId; // Point ID, inserted. type DistanceRef = DistanceShare; // Distance represented as shares. async fn insert(&mut self, query: &Self::QueryRef) -> Self::VectorRef { - // The query is now accepted in the store. It keeps the same ID. - self.storage.insert(query); - *query + self.storage.insert(query) } async fn eval_distance( @@ -191,11 +221,8 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { vector: &Self::VectorRef, ) -> Self::DistanceRef { let mut player_session = self.get_owner_session(); - // TODO: decouple queries and vectors. Ideally, queries should be kept in a - // separate store. - let query_point = self.storage.points[*query].clone(); - let vector_point = self.storage.points[*vector].clone(); - let pairs = vec![(query_point.data, vector_point.data)]; + let vector_point = self.storage.get_vector(vector); + let pairs = vec![(query.processed_query.clone(), vector_point.clone())]; let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; DistanceShare::new(ds_and_ts[0].clone(), ds_and_ts[1].clone()) } @@ -206,12 +233,11 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { vectors: &[Self::VectorRef], ) -> Vec { let mut player_session = self.get_owner_session(); - let query_point = self.storage.points[*query].clone(); let pairs = vectors .iter() .map(|vector_id| { - let vector_point = self.storage.points[*vector_id].clone(); - (query_point.data.clone(), vector_point.data) + let vector_point = self.storage.get_vector(vector_id); + (query.processed_query.clone(), vector_point.clone()) }) .collect::>(); let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; @@ -253,11 +279,30 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { } impl LocalNetAby3NgStoreProtocol { + async fn eval_distance_vectors( + &mut self, + vector1: &::VectorRef, + vector2: &::VectorRef, + ) -> ::DistanceRef { + let mut player_session = self.get_owner_session(); + let point1 = self.storage.get_vector(vector1); + let mut point2 = self.storage.get_vector(vector2).clone(); + point2.code.preprocess_iris_code_query_share(); + point2.mask.preprocess_mask_code_query_share(); + let pairs = vec![(point1.clone(), point2.clone())]; + let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; + DistanceShare::new(ds_and_ts[0].clone(), ds_and_ts[1].clone()) + } + async fn graph_from_plain( &mut self, graph_store: &GraphMem, ) -> GraphMem { let ep = graph_store.get_entry_point().await; + let new_ep = ep.map(|ep| EntryPoint { + vector_ref: VectorId { id: ep.vector_ref }, + layer_count: ep.layer_count, + }); let layers = graph_store.get_layers(); @@ -267,19 +312,21 @@ impl LocalNetAby3NgStoreProtocol { let mut shared_links = HashMap::new(); for (source_v, queue) in links { let mut shared_queue = vec![]; + let source_v = source_v.into(); for (target_v, _) in queue.as_vec_ref() { // recompute distances of graph edges from scratch - let distance = self.eval_distance(source_v, target_v).await; - shared_queue.push((*target_v, distance.clone())); + let target_v: VectorId = target_v.into(); + let distance = self.eval_distance_vectors(&source_v, &target_v).await; + shared_queue.push((target_v, distance.clone())); } shared_links.insert( - *source_v, + source_v, FurthestQueue::from_ascending_vec(shared_queue.clone()), ); } shared_layers.push(Layer::from_links(shared_links)); } - GraphMem::from_precomputed(ep.clone(), shared_layers) + GraphMem::from_precomputed(new_ep, shared_layers) } } @@ -333,6 +380,22 @@ impl LocalNetAby3NgStoreProtocol { Self::lazy_random_setup(rng, database_size, NetworkType::LocalChannel).await } + /// Generates 3 pairs of vector stores and graphs from a random plaintext + /// vector store and graph, which are returned as well. Networking is + /// based on gRPC. + pub async fn lazy_random_setup_with_grpc( + rng: &mut R, + database_size: usize, + ) -> eyre::Result<( + (PlaintextStore, GraphMem), + Vec<( + LocalNetAby3NgStoreProtocol, + GraphMem, + )>, + )> { + Self::lazy_random_setup(rng, database_size, NetworkType::GrpcChannel).await + } + /// Generates 3 pairs of vector stores and graphs corresponding to each /// local player. pub async fn shared_random_setup( @@ -364,12 +427,13 @@ impl LocalNetAby3NgStoreProtocol { let neighbors = searcher .search_to_insert(&mut store, &mut graph_store, query) .await; + let inserted_query = store.insert(query).await; searcher .insert_from_search_results( &mut store, &mut graph_store, &mut rng_searcher, - *query, + inserted_query, neighbors, ) .await; @@ -393,6 +457,15 @@ impl LocalNetAby3NgStoreProtocol { ) -> eyre::Result)>> { Self::shared_random_setup(rng, database_size, NetworkType::LocalChannel).await } + + /// Generates 3 pairs of vector stores and graphs corresponding to each + /// local player. Networking is based on gRPC. + pub async fn shared_random_setup_with_grpc( + rng: &mut R, + database_size: usize, + ) -> eyre::Result)>> { + Self::shared_random_setup(rng, database_size, NetworkType::GrpcChannel).await + } } #[cfg(test)] @@ -431,28 +504,32 @@ mod tests { let mut aby3_graph = GraphMem::new(); let db = HawkSearcher::default(); + let mut inserted = vec![]; // insert queries for query in queries.iter() { let neighbors = db .search_to_insert(&mut store, &mut aby3_graph, query) .await; + let inserted_query = store.insert(query).await; + inserted.push(inserted_query); db.insert_from_search_results( &mut store, &mut aby3_graph, &mut rng, - *query, + inserted_query, neighbors, ) .await; } - println!("FINISHED INSERTING"); + tracing::debug!("FINISHED INSERTING"); // Search for the same codes and find matches. let mut matching_results = vec![]; - for query in queries.iter() { + for v in inserted.into_iter() { + let query = store.prepare_query(store.storage.get_vector(&v).clone()); let neighbors = db - .search_to_insert(&mut store, &mut aby3_graph, query) + .search_to_insert(&mut store, &mut aby3_graph, &query) .await; - tracing::debug!("Finished query"); + tracing::debug!("Finished checking query"); matching_results.push(db.is_match(&mut store, &neighbors).await) } matching_results @@ -512,10 +589,9 @@ mod tests { let hawk_searcher = hawk_searcher.clone(); let mut v = v.clone(); let mut g = g.clone(); + let q = v.prepare_query(v.storage.get_vector(&i.into()).clone()); jobs.spawn(async move { - let secret_neighbors = hawk_searcher - .search_to_insert(&mut v, &mut g, &i.into()) - .await; + let secret_neighbors = hawk_searcher.search_to_insert(&mut v, &mut g, &q).await; hawk_searcher.is_match(&mut v, &secret_neighbors).await }); @@ -528,9 +604,9 @@ mod tests { let mut v = v.clone(); let mut g = g.clone(); jobs.spawn(async move { - let secret_neighbors = hawk_searcher - .search_to_insert(&mut v, &mut g, &i.into()) - .await; + let query = v.prepare_query(v.storage.get_vector(&i.into()).clone()); + let secret_neighbors = + hawk_searcher.search_to_insert(&mut v, &mut g, &query).await; hawk_searcher.is_match(&mut v, &secret_neighbors).await }); @@ -611,10 +687,16 @@ mod tests { let index21 = comb2[1]; jobs.spawn(async move { let dist1_aby3 = store - .eval_distance(&player_inserts[index10], &player_inserts[index11]) + .eval_distance_vectors( + &player_inserts[index10], + &player_inserts[index11], + ) .await; let dist2_aby3 = store - .eval_distance(&player_inserts[index20], &player_inserts[index21]) + .eval_distance_vectors( + &player_inserts[index20], + &player_inserts[index21], + ) .await; store.less_than(&dist1_aby3, &dist2_aby3).await }); @@ -653,10 +735,10 @@ mod tests { let mut store = store.clone(); let mut graph = graph.clone(); let searcher = searcher.clone(); + let q = store.prepare_query(store.storage.get_vector(&i.into()).clone()); jobs.spawn(async move { - let secret_neighbors = searcher - .search_to_insert(&mut store, &mut graph, &i.into()) - .await; + let secret_neighbors = + searcher.search_to_insert(&mut store, &mut graph, &q).await; searcher.is_match(&mut store, &secret_neighbors).await }); } diff --git a/iris-mpc-cpu/src/network/grpc.rs b/iris-mpc-cpu/src/network/grpc.rs index 96c351881..b1e134413 100644 --- a/iris-mpc-cpu/src/network/grpc.rs +++ b/iris-mpc-cpu/src/network/grpc.rs @@ -587,10 +587,10 @@ mod tests { let mut store = store.clone(); let mut graph = graph.clone(); let searcher = searcher.clone(); + let q = store.prepare_query(store.storage.get_vector(&i.into()).clone()); jobs.spawn(async move { - let secret_neighbors = searcher - .search_to_insert(&mut store, &mut graph, &i.into()) - .await; + let secret_neighbors = + searcher.search_to_insert(&mut store, &mut graph, &q).await; searcher.is_match(&mut store, &secret_neighbors).await }); } From 283862a852dce4d9a784170791eefdc757bc8699 Mon Sep 17 00:00:00 2001 From: iliailia Date: Wed, 4 Dec 2024 13:23:56 +0100 Subject: [PATCH 057/170] Optimize mock setup generation (#750) --- iris-mpc-cpu/benches/hnsw.rs | 2 +- iris-mpc-cpu/src/hawkers/galois_store.rs | 64 ++++++++++++++++++++---- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index 7513f6216..7b843e408 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -174,7 +174,7 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let (_, secret_searcher) = rt.block_on(async move { let mut rng = AesRng::seed_from_u64(0_u64); - LocalNetAby3NgStoreProtocol::lazy_random_setup_with_grpc(&mut rng, database_size) + LocalNetAby3NgStoreProtocol::lazy_random_setup_with_grpc(&mut rng, database_size, false) .await .unwrap() }); diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index ee07bd5da..f69eea4cc 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -12,7 +12,10 @@ use crate::{ compare_threshold_and_open, cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3, }, - shares::share::{DistanceShare, Share}, + shares::{ + ring_impl::RingElement, + share::{DistanceShare, Share}, + }, }; use aes_prng::AesRng; use hawk_pack::{ @@ -279,6 +282,19 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { } impl LocalNetAby3NgStoreProtocol { + pub fn get_trivial_share(&self, distance: u16) -> Share { + let player = self.get_owner_index(); + let distance_elem = RingElement(distance); + let zero_elem = RingElement(0_u16); + + match player { + 0 => Share::new(distance_elem, zero_elem), + 1 => Share::new(zero_elem, distance_elem), + 2 => Share::new(zero_elem, zero_elem), + _ => panic!("Invalid player index"), + } + } + async fn eval_distance_vectors( &mut self, vector1: &::VectorRef, @@ -297,6 +313,7 @@ impl LocalNetAby3NgStoreProtocol { async fn graph_from_plain( &mut self, graph_store: &GraphMem, + recompute_distances: bool, ) -> GraphMem { let ep = graph_store.get_entry_point().await; let new_ep = ep.map(|ep| EntryPoint { @@ -311,12 +328,19 @@ impl LocalNetAby3NgStoreProtocol { let links = layer.get_links_map(); let mut shared_links = HashMap::new(); for (source_v, queue) in links { - let mut shared_queue = vec![]; let source_v = source_v.into(); - for (target_v, _) in queue.as_vec_ref() { - // recompute distances of graph edges from scratch - let target_v: VectorId = target_v.into(); - let distance = self.eval_distance_vectors(&source_v, &target_v).await; + let mut shared_queue = vec![]; + for (target_v, dist) in queue.as_vec_ref() { + let target_v = target_v.into(); + let distance = if recompute_distances { + // recompute distances of graph edges from scratch + self.eval_distance_vectors(&source_v, &target_v).await + } else { + DistanceShare::new( + self.get_trivial_share(dist.0), + self.get_trivial_share(dist.1), + ) + }; shared_queue.push((target_v, distance.clone())); } shared_links.insert( @@ -333,10 +357,15 @@ impl LocalNetAby3NgStoreProtocol { impl LocalNetAby3NgStoreProtocol { /// Generates 3 pairs of vector stores and graphs from a random plaintext /// vector store and graph, which are returned as well. + /// The network type is specified by the user. + /// A recompute flag is used to determine whether to recompute the distances + /// from stored shares. If recompute is set to false, the distances are + /// naively converted from plaintext. pub async fn lazy_random_setup( rng: &mut R, database_size: usize, network_t: NetworkType, + recompute_distances: bool, ) -> eyre::Result<( (PlaintextStore, GraphMem), Vec<(Self, GraphMem)>, @@ -354,7 +383,9 @@ impl LocalNetAby3NgStoreProtocol { jobs.spawn(async move { ( store.clone(), - store.graph_from_plain(&plaintext_graph_store).await, + store + .graph_from_plain(&plaintext_graph_store, recompute_distances) + .await, ) }); } @@ -370,6 +401,7 @@ impl LocalNetAby3NgStoreProtocol { pub async fn lazy_random_setup_with_local_channel( rng: &mut R, database_size: usize, + recompute_distances: bool, ) -> eyre::Result<( (PlaintextStore, GraphMem), Vec<( @@ -377,7 +409,13 @@ impl LocalNetAby3NgStoreProtocol { GraphMem, )>, )> { - Self::lazy_random_setup(rng, database_size, NetworkType::LocalChannel).await + Self::lazy_random_setup( + rng, + database_size, + NetworkType::LocalChannel, + recompute_distances, + ) + .await } /// Generates 3 pairs of vector stores and graphs from a random plaintext @@ -386,6 +424,7 @@ impl LocalNetAby3NgStoreProtocol { pub async fn lazy_random_setup_with_grpc( rng: &mut R, database_size: usize, + recompute_distances: bool, ) -> eyre::Result<( (PlaintextStore, GraphMem), Vec<( @@ -393,7 +432,13 @@ impl LocalNetAby3NgStoreProtocol { GraphMem, )>, )> { - Self::lazy_random_setup(rng, database_size, NetworkType::GrpcChannel).await + Self::lazy_random_setup( + rng, + database_size, + NetworkType::GrpcChannel, + recompute_distances, + ) + .await } /// Generates 3 pairs of vector stores and graphs corresponding to each @@ -557,6 +602,7 @@ mod tests { &mut rng, database_size, network_t.clone(), + true, ) .await .unwrap(); From a201496622322a12e1caf88d6f72a14ff32a6829 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 4 Dec 2024 04:31:38 -0800 Subject: [PATCH 058/170] make supermatchers allows non-unique (#751) --- iris-mpc-gpu/src/server/actor.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index f332372e6..3ceb6318f 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -712,7 +712,7 @@ impl ServerActor { let insertion_list = distribute_insertions(&insertion_list, &self.current_db_sizes); // Calculate the new indices for the inserted queries - let matches = calculate_insertion_indices( + let mut matches = calculate_insertion_indices( &mut merged_results, &insertion_list, &self.current_db_sizes, @@ -757,6 +757,15 @@ impl ServerActor { } } + // Check for supermatchers for v1 compatibility and mark them as non-unique + const SUPERMATCH_THRESHOLD: usize = 4_000; + for i in 0..batch_size { + if match_counters[i] > SUPERMATCH_THRESHOLD { + matches[i] = true; + } + } + + // Fetch the partial matches let (partial_match_ids_left, partial_match_ids_right) = if self.return_partial_results { // Transfer the partial results to the host let partial_match_counters_left = self From 3ffffe101fb5383512f5e6630ab6316beb17d2f5 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 4 Dec 2024 15:31:21 +0100 Subject: [PATCH 059/170] Add error response to signup-service (#753) --- iris-mpc-common/src/helpers/mod.rs | 1 + iris-mpc-common/src/helpers/smpc_request.rs | 68 ---------------- iris-mpc-common/src/helpers/smpc_response.rs | 78 +++++++++++++++++++ iris-mpc-store/src/lib.rs | 2 +- iris-mpc/src/bin/client.rs | 6 +- iris-mpc/src/bin/server.rs | 82 ++++++++++++++++---- 6 files changed, 151 insertions(+), 86 deletions(-) create mode 100644 iris-mpc-common/src/helpers/smpc_response.rs diff --git a/iris-mpc-common/src/helpers/mod.rs b/iris-mpc-common/src/helpers/mod.rs index d330d08d6..8731fd9a3 100644 --- a/iris-mpc-common/src/helpers/mod.rs +++ b/iris-mpc-common/src/helpers/mod.rs @@ -5,6 +5,7 @@ pub mod kms_dh; pub mod sha256; pub mod shutdown_handler; pub mod smpc_request; +pub mod smpc_response; pub mod sqs_s3_helper; pub mod sync; pub mod task_monitor; diff --git a/iris-mpc-common/src/helpers/smpc_request.rs b/iris-mpc-common/src/helpers/smpc_request.rs index 53c7b1f72..af97c5b40 100644 --- a/iris-mpc-common/src/helpers/smpc_request.rs +++ b/iris-mpc-common/src/helpers/smpc_request.rs @@ -105,7 +105,6 @@ where map.serialize(serializer) } -pub const SMPC_MESSAGE_TYPE_ATTRIBUTE: &str = "message_type"; pub const IDENTITY_DELETION_MESSAGE_TYPE: &str = "identity_deletion"; pub const CIRCUIT_BREAKER_MESSAGE_TYPE: &str = "circuit_breaker"; pub const UNIQUENESS_MESSAGE_TYPE: &str = "uniqueness"; @@ -299,70 +298,3 @@ impl UniquenessRequest { Ok(self.iris_shares_file_hashes[party_id] == calculate_sha256(stringified_share)) } } - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct UniquenessResult { - pub node_id: usize, - pub serial_id: Option, - pub is_match: bool, - pub signup_id: String, - pub matched_serial_ids: Option>, - pub matched_serial_ids_left: Option>, - pub matched_serial_ids_right: Option>, - pub matched_batch_request_ids: Option>, -} - -impl UniquenessResult { - #[allow(clippy::too_many_arguments)] - pub fn new( - node_id: usize, - serial_id: Option, - is_match: bool, - signup_id: String, - matched_serial_ids: Option>, - matched_serial_ids_left: Option>, - matched_serial_ids_right: Option>, - matched_batch_request_ids: Option>, - ) -> Self { - Self { - node_id, - serial_id, - is_match, - signup_id, - matched_serial_ids, - matched_serial_ids_left, - matched_serial_ids_right, - matched_batch_request_ids, - } - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct IdentityDeletionResult { - pub node_id: usize, - pub serial_id: u32, - pub success: bool, -} - -impl IdentityDeletionResult { - pub fn new(node_id: usize, serial_id: u32, success: bool) -> Self { - Self { - node_id, - serial_id, - success, - } - } -} - -pub fn create_message_type_attribute_map( - message_type: &str, -) -> HashMap { - let mut message_attributes_map = HashMap::new(); - let message_type_value = MessageAttributeValue::builder() - .data_type("String") - .string_value(message_type) - .build() - .unwrap(); - message_attributes_map.insert(SMPC_MESSAGE_TYPE_ATTRIBUTE.to_string(), message_type_value); - message_attributes_map -} diff --git a/iris-mpc-common/src/helpers/smpc_response.rs b/iris-mpc-common/src/helpers/smpc_response.rs new file mode 100644 index 000000000..492ecddc3 --- /dev/null +++ b/iris-mpc-common/src/helpers/smpc_response.rs @@ -0,0 +1,78 @@ +use aws_sdk_sns::types::MessageAttributeValue; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +pub const SMPC_MESSAGE_TYPE_ATTRIBUTE: &str = "message_type"; +// Error Reasons +pub const ERROR_FAILED_TO_PROCESS_IRIS_SHARES: &str = "failed_to_process_iris_shares"; + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UniquenessResult { + pub node_id: usize, + pub serial_id: Option, + pub is_match: bool, + pub signup_id: String, + pub matched_serial_ids: Option>, + pub matched_serial_ids_left: Option>, + pub matched_serial_ids_right: Option>, + pub matched_batch_request_ids: Option>, + pub error: Option, + pub error_reason: Option, +} + +impl UniquenessResult { + #[allow(clippy::too_many_arguments)] + pub fn new( + node_id: usize, + serial_id: Option, + is_match: bool, + signup_id: String, + matched_serial_ids: Option>, + matched_serial_ids_left: Option>, + matched_serial_ids_right: Option>, + matched_batch_request_ids: Option>, + ) -> Self { + Self { + node_id, + serial_id, + is_match, + signup_id, + matched_serial_ids, + matched_serial_ids_left, + matched_serial_ids_right, + matched_batch_request_ids, + error: None, + error_reason: None, + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct IdentityDeletionResult { + pub node_id: usize, + pub serial_id: u32, + pub success: bool, +} + +impl IdentityDeletionResult { + pub fn new(node_id: usize, serial_id: u32, success: bool) -> Self { + Self { + node_id, + serial_id, + success, + } + } +} + +pub fn create_message_type_attribute_map( + message_type: &str, +) -> HashMap { + let mut message_attributes_map = HashMap::new(); + let message_type_value = MessageAttributeValue::builder() + .data_type("String") + .string_value(message_type) + .build() + .unwrap(); + message_attributes_map.insert(SMPC_MESSAGE_TYPE_ATTRIBUTE.to_string(), message_type_value); + message_attributes_map +} diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index 15cca4345..29afade68 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -485,7 +485,7 @@ mod tests { use super::*; use futures::TryStreamExt; - use iris_mpc_common::helpers::smpc_request::UniquenessResult; + use iris_mpc_common::helpers::smpc_response::UniquenessResult; #[tokio::test] async fn test_store() -> Result<()> { diff --git a/iris-mpc/src/bin/client.rs b/iris-mpc/src/bin/client.rs index 682b630d3..0ab9f77af 100644 --- a/iris-mpc/src/bin/client.rs +++ b/iris-mpc/src/bin/client.rs @@ -10,10 +10,8 @@ use iris_mpc_common::{ helpers::{ key_pair::download_public_key, sha256::calculate_sha256, - smpc_request::{ - create_message_type_attribute_map, IrisCodesJSON, UniquenessRequest, UniquenessResult, - UNIQUENESS_MESSAGE_TYPE, - }, + smpc_request::{IrisCodesJSON, UniquenessRequest, UNIQUENESS_MESSAGE_TYPE}, + smpc_response::{create_message_type_attribute_map, UniquenessResult}, sqs_s3_helper::upload_file_and_generate_presigned_url, }, iris_db::{db::IrisDB, iris::IrisCode}, diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 3a634b4f6..842d4f220 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -18,10 +18,13 @@ use iris_mpc_common::{ kms_dh::derive_shared_secret, shutdown_handler::ShutdownHandler, smpc_request::{ - create_message_type_attribute_map, CircuitBreakerRequest, IdentityDeletionRequest, - IdentityDeletionResult, ReceiveRequestError, SQSMessage, UniquenessRequest, - UniquenessResult, CIRCUIT_BREAKER_MESSAGE_TYPE, IDENTITY_DELETION_MESSAGE_TYPE, - SMPC_MESSAGE_TYPE_ATTRIBUTE, UNIQUENESS_MESSAGE_TYPE, + CircuitBreakerRequest, IdentityDeletionRequest, ReceiveRequestError, SQSMessage, + UniquenessRequest, CIRCUIT_BREAKER_MESSAGE_TYPE, IDENTITY_DELETION_MESSAGE_TYPE, + UNIQUENESS_MESSAGE_TYPE, + }, + smpc_response::{ + create_message_type_attribute_map, IdentityDeletionResult, UniquenessResult, + ERROR_FAILED_TO_PROCESS_IRIS_SHARES, SMPC_MESSAGE_TYPE_ATTRIBUTE, }, sync::SyncState, task_monitor::TaskMonitor, @@ -118,13 +121,16 @@ fn preprocess_iris_message_shares( async fn receive_batch( party_id: usize, client: &Client, - queue_url: &String, + sns_client: &SNSClient, + config: &Config, store: &Store, skip_request_ids: &[String], shares_encryption_key_pairs: SharesEncryptionKeyPairs, - max_batch_size: usize, shutdown_handler: &ShutdownHandler, + error_result_attributes: &HashMap, ) -> eyre::Result, ReceiveRequestError> { + let max_batch_size = config.clone().max_batch_size; + let queue_url = &config.clone().requests_queue_url; if shutdown_handler.is_shutting_down() { tracing::info!("Stopping batch receive due to shutdown signal..."); return Ok(None); @@ -349,8 +355,7 @@ async fn receive_batch( tokio::time::sleep(SQS_POLLING_INTERVAL).await; } } - - for handle in handles { + for (index, handle) in handles.into_iter().enumerate() { let ( ( ( @@ -378,6 +383,18 @@ async fn receive_batch( Ok(res) => (res, true), Err(e) => { tracing::error!("Failed to process iris shares: {:?}", e); + // Return error message back to the signup-service if failed to process iris + // shares + send_error_results_to_sns( + batch_query.request_ids[index].clone(), + &batch_query.metadata[index], + sns_client, + config, + error_result_attributes, + UNIQUENESS_MESSAGE_TYPE, + ERROR_FAILED_TO_PROCESS_IRIS_SHARES, + ) + .await?; // If we failed to process the iris shares, we include a dummy entry in the // batch in order to keep the same order across nodes let dummy_code_share = GaloisRingIrisCodeShare::default_for_party(party_id); @@ -532,6 +549,43 @@ async fn initialize_chacha_seeds( Ok(chacha_seeds) } +async fn send_error_results_to_sns( + signup_id: String, + metadata: &BatchMetadata, + sns_client: &SNSClient, + config: &Config, + base_message_attributes: &HashMap, + message_type: &str, + error_reason: &str, +) -> eyre::Result<()> { + let message: UniquenessResult = UniquenessResult { + node_id: config.party_id, + serial_id: None, + is_match: false, + signup_id, + matched_serial_ids: None, + matched_serial_ids_left: None, + matched_serial_ids_right: None, + matched_batch_request_ids: None, + error: Some(true), + error_reason: Some(String::from(error_reason)), + }; + let message_serialised = serde_json::to_string(&message)?; + let mut message_attributes = base_message_attributes.clone(); + let trace_attributes = construct_message_attributes(&metadata.trace_id, &metadata.span_id)?; + message_attributes.extend(trace_attributes); + sns_client + .publish() + .topic_arn(&config.results_topic_arn) + .message(message_serialised) + .message_group_id(format!("party-id-{}", config.party_id)) + .set_message_attributes(Some(message_attributes)) + .send() + .await?; + metrics::counter!("result.sent", "type" => message_type.to_owned()+"_error").increment(1); + + Ok(()) +} async fn send_results_to_sns( result_events: Vec, metadata: &[BatchMetadata], @@ -1186,7 +1240,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { tracing::info!("⚓️ ANCHOR: Start the main loop"); let processing_timeout = Duration::from_secs(config.processing_timeout_secs); - + let error_result_attribute = create_message_type_attribute_map(UNIQUENESS_MESSAGE_TYPE); let res: eyre::Result<()> = async { tracing::info!("Entering main loop"); // **Tensor format of queries** @@ -1210,12 +1264,13 @@ async fn server_main(config: Config) -> eyre::Result<()> { let mut next_batch = receive_batch( party_id, &sqs_client, - &config.requests_queue_url, + &sns_client, + &config, &store, &skip_request_ids, shares_encryption_key_pair.clone(), - config.max_batch_size, &shutdown_handler, + &error_result_attribute, ); let dummy_shares_for_deletions = get_dummy_shares_for_deletion(party_id); @@ -1262,12 +1317,13 @@ async fn server_main(config: Config) -> eyre::Result<()> { next_batch = receive_batch( party_id, &sqs_client, - &config.requests_queue_url, + &sns_client, + &config, &store, &skip_request_ids, shares_encryption_key_pair.clone(), - config.max_batch_size, &shutdown_handler, + &error_result_attribute, ); // await the result From f71ec2518a8c3eb2ecac16bab3e3b86c8ea31ded Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 4 Dec 2024 16:16:48 +0100 Subject: [PATCH 060/170] Update stage version iris-mpc (#754) --- deny.toml | 9 +++++---- deploy/stage/common-values-iris-mpc.yaml | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/deny.toml b/deny.toml index 4eb13b5fa..08f16a224 100644 --- a/deny.toml +++ b/deny.toml @@ -5,10 +5,11 @@ all-features = true [advisories] version = 2 ignore = [ - { id = "RUSTSEC-2021-0137", reason = "we will switch to alkali eventually" }, - # https://github.com/mehcode/config-rs/issues/563 - { id = "RUSTSEC-2024-0384", reason = "waiting for `web-time` crate to remove the dependency" }, - { id = "RUSTSEC-2024-0388", reason = "waiting for `mongodb` crate to remove the deprecated dependency" }, + { id = "RUSTSEC-2021-0137", reason = "we will switch to alkali eventually" }, + # https://github.com/mehcode/config-rs/issues/563 + { id = "RUSTSEC-2024-0384", reason = "waiting for `web-time` crate to remove the dependency" }, + { id = "RUSTSEC-2024-0388", reason = "waiting for `mongodb` crate to remove the deprecated dependency" }, + { id = "RUSTSEC-2024-0402", reason = "wating for `index-map` crate to remove the dependency" }, ] [sources] diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 5a2f41a7f..8fcd3920e 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.10.5" environment: stage replicaCount: 1 From bc098e41269b86f59cb44d2fffc61761f4ca55de Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 4 Dec 2024 07:34:16 -0800 Subject: [PATCH 061/170] add hc port (#755) --- deploy/prod/smpcv2-0-prod/values-upgrade-server-left.yaml | 2 ++ deploy/prod/smpcv2-0-prod/values-upgrade-server-right.yaml | 2 ++ deploy/prod/smpcv2-1-prod/values-upgrade-server-left.yaml | 2 ++ deploy/prod/smpcv2-1-prod/values-upgrade-server-right.yaml | 2 ++ deploy/prod/smpcv2-2-prod/values-upgrade-server-left.yaml | 2 ++ deploy/prod/smpcv2-2-prod/values-upgrade-server-right.yaml | 2 ++ deploy/stage/smpcv2-0-stage/values-upgrade-server-left.yaml | 2 ++ deploy/stage/smpcv2-0-stage/values-upgrade-server-right.yaml | 2 ++ deploy/stage/smpcv2-1-stage/values-upgrade-server-left.yaml | 2 ++ deploy/stage/smpcv2-1-stage/values-upgrade-server-right.yaml | 2 ++ deploy/stage/smpcv2-2-stage/values-upgrade-server-left.yaml | 2 ++ deploy/stage/smpcv2-2-stage/values-upgrade-server-right.yaml | 2 ++ 12 files changed, 24 insertions(+) diff --git a/deploy/prod/smpcv2-0-prod/values-upgrade-server-left.yaml b/deploy/prod/smpcv2-0-prod/values-upgrade-server-left.yaml index 884a3f4a4..23a679a81 100644 --- a/deploy/prod/smpcv2-0-prod/values-upgrade-server-left.yaml +++ b/deploy/prod/smpcv2-0-prod/values-upgrade-server-left.yaml @@ -9,6 +9,8 @@ args: - "left" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/prod/smpcv2-0-prod/values-upgrade-server-right.yaml b/deploy/prod/smpcv2-0-prod/values-upgrade-server-right.yaml index f988173e6..35ad13c73 100644 --- a/deploy/prod/smpcv2-0-prod/values-upgrade-server-right.yaml +++ b/deploy/prod/smpcv2-0-prod/values-upgrade-server-right.yaml @@ -9,6 +9,8 @@ args: - "right" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/prod/smpcv2-1-prod/values-upgrade-server-left.yaml b/deploy/prod/smpcv2-1-prod/values-upgrade-server-left.yaml index d140f92fc..b64075865 100644 --- a/deploy/prod/smpcv2-1-prod/values-upgrade-server-left.yaml +++ b/deploy/prod/smpcv2-1-prod/values-upgrade-server-left.yaml @@ -9,6 +9,8 @@ args: - "left" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/prod/smpcv2-1-prod/values-upgrade-server-right.yaml b/deploy/prod/smpcv2-1-prod/values-upgrade-server-right.yaml index 81a947071..0700f4770 100644 --- a/deploy/prod/smpcv2-1-prod/values-upgrade-server-right.yaml +++ b/deploy/prod/smpcv2-1-prod/values-upgrade-server-right.yaml @@ -9,6 +9,8 @@ args: - "right" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/prod/smpcv2-2-prod/values-upgrade-server-left.yaml b/deploy/prod/smpcv2-2-prod/values-upgrade-server-left.yaml index cb4fd532c..3b41b5db5 100644 --- a/deploy/prod/smpcv2-2-prod/values-upgrade-server-left.yaml +++ b/deploy/prod/smpcv2-2-prod/values-upgrade-server-left.yaml @@ -9,6 +9,8 @@ args: - "left" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/prod/smpcv2-2-prod/values-upgrade-server-right.yaml b/deploy/prod/smpcv2-2-prod/values-upgrade-server-right.yaml index e4d486815..a531fc0fc 100644 --- a/deploy/prod/smpcv2-2-prod/values-upgrade-server-right.yaml +++ b/deploy/prod/smpcv2-2-prod/values-upgrade-server-right.yaml @@ -9,6 +9,8 @@ args: - "right" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/stage/smpcv2-0-stage/values-upgrade-server-left.yaml b/deploy/stage/smpcv2-0-stage/values-upgrade-server-left.yaml index 81b44cf68..3af78a4f1 100644 --- a/deploy/stage/smpcv2-0-stage/values-upgrade-server-left.yaml +++ b/deploy/stage/smpcv2-0-stage/values-upgrade-server-left.yaml @@ -9,6 +9,8 @@ args: - "left" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/stage/smpcv2-0-stage/values-upgrade-server-right.yaml b/deploy/stage/smpcv2-0-stage/values-upgrade-server-right.yaml index 45f690e25..56b176fb7 100644 --- a/deploy/stage/smpcv2-0-stage/values-upgrade-server-right.yaml +++ b/deploy/stage/smpcv2-0-stage/values-upgrade-server-right.yaml @@ -9,6 +9,8 @@ args: - "right" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/stage/smpcv2-1-stage/values-upgrade-server-left.yaml b/deploy/stage/smpcv2-1-stage/values-upgrade-server-left.yaml index 4cb1c1b49..ee80a04c7 100644 --- a/deploy/stage/smpcv2-1-stage/values-upgrade-server-left.yaml +++ b/deploy/stage/smpcv2-1-stage/values-upgrade-server-left.yaml @@ -9,6 +9,8 @@ args: - "left" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/stage/smpcv2-1-stage/values-upgrade-server-right.yaml b/deploy/stage/smpcv2-1-stage/values-upgrade-server-right.yaml index 9ec8a96b2..5601bca3e 100644 --- a/deploy/stage/smpcv2-1-stage/values-upgrade-server-right.yaml +++ b/deploy/stage/smpcv2-1-stage/values-upgrade-server-right.yaml @@ -9,6 +9,8 @@ args: - "right" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/stage/smpcv2-2-stage/values-upgrade-server-left.yaml b/deploy/stage/smpcv2-2-stage/values-upgrade-server-left.yaml index 2f181eea7..c8f0f86fa 100644 --- a/deploy/stage/smpcv2-2-stage/values-upgrade-server-left.yaml +++ b/deploy/stage/smpcv2-2-stage/values-upgrade-server-left.yaml @@ -9,6 +9,8 @@ args: - "left" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true diff --git a/deploy/stage/smpcv2-2-stage/values-upgrade-server-right.yaml b/deploy/stage/smpcv2-2-stage/values-upgrade-server-right.yaml index 2d1f948bb..14d22c514 100644 --- a/deploy/stage/smpcv2-2-stage/values-upgrade-server-right.yaml +++ b/deploy/stage/smpcv2-2-stage/values-upgrade-server-right.yaml @@ -9,6 +9,8 @@ args: - "right" - "--environment" - "$(ENVIRONMENT)" + - "--healthcheck-port" + - "3000" initContainer: enabled: true From 4b3375a2a7aeaed017142dca28c1ec9d07ee56a8 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 4 Dec 2024 23:15:35 -0800 Subject: [PATCH 062/170] scale down (#757) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 4e29972de..036cebafc 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From ef1ae021b9873758f10eb9a3b7a3b94d53879d0c Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Thu, 5 Dec 2024 09:04:30 +0100 Subject: [PATCH 063/170] Add retrieving s3 shares via getObject (#756) --- Cargo.lock | 8 ++- iris-mpc-common/Cargo.toml | 1 + iris-mpc-common/src/config/mod.rs | 7 ++ iris-mpc-common/src/helpers/key_pair.rs | 2 + iris-mpc-common/src/helpers/smpc_request.rs | 80 +++++++++------------ iris-mpc-common/tests/smpc_request.rs | 49 +++++++++---- iris-mpc/Cargo.toml | 1 + iris-mpc/src/bin/client.rs | 2 +- iris-mpc/src/bin/server.rs | 25 ++++--- 9 files changed, 104 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9330efe36..0cc4bde74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -679,9 +679,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.2.8" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07c9cdc179e6afbf5d391ab08c85eac817b51c87e1892a5edb5f7bbdc64314b4" +checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510" dependencies = [ "base64-simd", "bytes", @@ -868,7 +868,7 @@ dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -2623,6 +2623,7 @@ name = "iris-mpc" version = "0.1.0" dependencies = [ "aws-config", + "aws-sdk-s3", "aws-sdk-sns", "aws-sdk-sqs", "axum", @@ -2659,6 +2660,7 @@ name = "iris-mpc-common" version = "0.1.0" dependencies = [ "aws-config", + "aws-credential-types", "aws-sdk-kms", "aws-sdk-s3", "aws-sdk-secretsmanager", diff --git a/iris-mpc-common/Cargo.toml b/iris-mpc-common/Cargo.toml index d9a287689..cbec4c6f1 100644 --- a/iris-mpc-common/Cargo.toml +++ b/iris-mpc-common/Cargo.toml @@ -50,6 +50,7 @@ serde-big-array.workspace = true [dev-dependencies] float_eq = "1" +aws-credential-types = "1.2.1" [[bin]] name = "key-manager" diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index b21aea8cc..71ff9d051 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -49,6 +49,9 @@ pub struct Config { #[serde(default)] pub public_key_base_url: String, + #[serde(default = "default_shares_bucket_name")] + pub shares_bucket_name: String, + #[serde(default)] pub clear_db_before_init: bool, @@ -106,6 +109,10 @@ fn default_shutdown_last_results_sync_timeout_secs() -> u64 { 10 } +fn default_shares_bucket_name() -> String { + "wf-mpc-prod-smpcv2-sns-requests".to_string() +} + impl Config { pub fn load_config(prefix: &str) -> eyre::Result { let settings = config::Config::builder(); diff --git a/iris-mpc-common/src/helpers/key_pair.rs b/iris-mpc-common/src/helpers/key_pair.rs index c41554539..cebf7e56c 100644 --- a/iris-mpc-common/src/helpers/key_pair.rs +++ b/iris-mpc-common/src/helpers/key_pair.rs @@ -47,6 +47,8 @@ pub enum SharesDecodingError { url: String, message: String, }, + #[error("Received error message from S3 for key {}: {}", .key, .message)] + S3ResponseContent { key: String, message: String }, #[error(transparent)] SerdeError(#[from] serde_json::error::Error), #[error(transparent)] diff --git a/iris-mpc-common/src/helpers/smpc_request.rs b/iris-mpc-common/src/helpers/smpc_request.rs index af97c5b40..04863df66 100644 --- a/iris-mpc-common/src/helpers/smpc_request.rs +++ b/iris-mpc-common/src/helpers/smpc_request.rs @@ -1,5 +1,6 @@ use super::{key_pair::SharesDecodingError, sha256::calculate_sha256}; use crate::helpers::key_pair::SharesEncryptionKeyPairs; +use aws_sdk_s3::Client as S3Client; use aws_sdk_sns::types::MessageAttributeValue; use aws_sdk_sqs::{ error::SdkError, @@ -7,15 +8,10 @@ use aws_sdk_sqs::{ }; use base64::{engine::general_purpose::STANDARD, Engine}; use eyre::Report; -use reqwest::Client; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::Value; -use std::{collections::HashMap, sync::LazyLock}; +use std::{collections::HashMap, sync::Arc}; use thiserror::Error; -use tokio_retry::{ - strategy::{jitter, FixedInterval}, - Retry, -}; #[derive(Serialize, Deserialize, Debug)] pub struct SQSMessage { @@ -113,7 +109,7 @@ pub const UNIQUENESS_MESSAGE_TYPE: &str = "uniqueness"; pub struct UniquenessRequest { pub batch_size: Option, pub signup_id: String, - pub s3_presigned_url: String, + pub s3_key: String, pub iris_shares_file_hashes: [String; 3], } @@ -196,51 +192,45 @@ impl SharesS3Object { } } -static S3_HTTP_CLIENT: LazyLock = LazyLock::new(Client::new); - impl UniquenessRequest { pub async fn get_iris_data_by_party_id( &self, party_id: usize, + bucket_name: &String, + s3_client: &Arc, ) -> Result { - // Send a GET request to the presigned URL - let retry_strategy = FixedInterval::from_millis(200).map(jitter).take(5); - let response = Retry::spawn(retry_strategy, || async { - S3_HTTP_CLIENT - .get(self.s3_presigned_url.clone()) - .send() - .await - }) - .await?; - - // Ensure the request was successful - if response.status().is_success() { - // Parse the JSON response into the SharesS3Object struct - let shares_file: SharesS3Object = match response.json().await { - Ok(file) => file, - Err(e) => { - tracing::error!("Failed to parse JSON: {}", e); - return Err(SharesDecodingError::RequestError(e)); + let response = s3_client + .get_object() + .bucket(bucket_name) + .key(self.s3_key.as_str()) + .send() + .await + .map_err(|err| { + tracing::error!("Failed to download file: {}", err); + SharesDecodingError::S3ResponseContent { + key: self.s3_key.clone(), + message: err.to_string(), } - }; - - // Construct the field name dynamically - let field_name = format!("iris_share_{}", party_id); - // Access the field dynamically - if let Some(value) = shares_file.get(party_id) { - Ok(value.to_string()) - } else { - tracing::error!("Failed to find field: {}", field_name); - Err(SharesDecodingError::SecretStringNotFound) + })?; + + let object_body = response.body.collect().await.map_err(|e| { + tracing::error!("Failed to get object body: {}", e); + SharesDecodingError::S3ResponseContent { + key: self.s3_key.clone(), + message: e.to_string(), } - } else { - tracing::error!("Failed to download file: {}", response.status()); - Err(SharesDecodingError::ResponseContent { - status: response.status(), - url: self.s3_presigned_url.clone(), - message: response.text().await.unwrap_or_default(), - }) - } + })?; + + let bytes = object_body.into_bytes(); + + let shares_file: SharesS3Object = serde_json::from_slice(&bytes)?; + + let field_name = format!("iris_share_{}", party_id); + + shares_file.get(party_id).cloned().ok_or_else(|| { + tracing::error!("Failed to find field: {}", field_name); + SharesDecodingError::SecretStringNotFound + }) } pub fn decrypt_iris_share( diff --git a/iris-mpc-common/tests/smpc_request.rs b/iris-mpc-common/tests/smpc_request.rs index 273c65008..1c2e7d5fd 100644 --- a/iris-mpc-common/tests/smpc_request.rs +++ b/iris-mpc-common/tests/smpc_request.rs @@ -1,6 +1,7 @@ mod tests { + use aws_credential_types::{provider::SharedCredentialsProvider, Credentials}; + use aws_sdk_s3::Client as S3Client; use base64::{engine::general_purpose::STANDARD, Engine}; - use http::StatusCode; use iris_mpc_common::helpers::{ key_pair::{SharesDecodingError, SharesEncryptionKeyPairs}, sha256::calculate_sha256, @@ -8,10 +9,8 @@ mod tests { }; use serde_json::json; use sodiumoxide::crypto::{box_::PublicKey, sealedbox}; - use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, - }; + use std::sync::Arc; + use wiremock::{matchers::method, Mock, MockServer, ResponseTemplate}; const PREVIOUS_PUBLIC_KEY: &str = "1UY8lKlS7aVj5ZnorSfLIHlG3jg+L4ToVi4K+mLKqFQ="; const PREVIOUS_PRIVATE_KEY: &str = "X26wWfzP5fKMP7QMz0X3eZsEeF4NhJU92jT69wZg6x8="; @@ -45,7 +44,7 @@ mod tests { UniquenessRequest { batch_size: Some(1), signup_id: "signup_mock".to_string(), - s3_presigned_url: "https://example.com/mock".to_string(), + s3_key: "mock".to_string(), iris_shares_file_hashes: hashes, } } @@ -54,7 +53,7 @@ mod tests { UniquenessRequest { batch_size: None, signup_id: "test_signup_id".to_string(), - s3_presigned_url: "https://example.com/package".to_string(), + s3_key: "package".to_string(), iris_shares_file_hashes: [ "hash_0".to_string(), "hash_1".to_string(), @@ -66,26 +65,46 @@ mod tests { #[tokio::test] async fn test_retrieve_iris_shares_from_s3_success() { let mock_server = MockServer::start().await; - - // Simulate a successful response from the presigned URL + let bucket_name = "bobTheBucket"; + let key = "kateTheKey"; let response_body = json!({ "iris_share_0": "share_0_data", "iris_share_1": "share_1_data", "iris_share_2": "share_2_data" }); - let template = ResponseTemplate::new(StatusCode::OK).set_body_json(response_body.clone()); + let data = response_body.to_string(); Mock::given(method("GET")) - .and(path("/test_presign_url")) - .respond_with(template) + .respond_with( + ResponseTemplate::new(200) + .insert_header("Content-Type", "application/octet-stream") + .set_body_raw(data, "application/octet-stream"), + ) .mount(&mock_server) .await; + let credentials = + Credentials::new("test-access-key", "test-secret-key", None, None, "test"); + let credentials_provider = SharedCredentialsProvider::new(credentials); + // Configure the S3Client to point to the mock server + let config = aws_config::from_env() + .region("us-west-2") + .endpoint_url(mock_server.uri()) + .credentials_provider(credentials_provider) + .load() + .await; + let s3_config = aws_sdk_s3::config::Builder::from(&config) + .endpoint_url(mock_server.uri()) + .force_path_style(true) + .build(); + + let s3_client = Arc::new(S3Client::from_conf(s3_config)); + let smpc_request = UniquenessRequest { batch_size: None, signup_id: "test_signup_id".to_string(), - s3_presigned_url: mock_server.uri().clone() + "/test_presign_url", + s3_key: key.to_string(), iris_shares_file_hashes: [ "hash_0".to_string(), "hash_1".to_string(), @@ -93,7 +112,9 @@ mod tests { ], }; - let result = smpc_request.get_iris_data_by_party_id(0).await; + let result = smpc_request + .get_iris_data_by_party_id(0, &bucket_name.to_string(), &s3_client) + .await; assert!(result.is_ok()); assert_eq!(result.unwrap(), "share_0_data".to_string()); diff --git a/iris-mpc/Cargo.toml b/iris-mpc/Cargo.toml index 53454799e..605a831d8 100644 --- a/iris-mpc/Cargo.toml +++ b/iris-mpc/Cargo.toml @@ -11,6 +11,7 @@ repository.workspace = true aws-config.workspace = true aws-sdk-sns.workspace = true aws-sdk-sqs.workspace = true +aws-sdk-s3.workspace = true axum.workspace = true tokio.workspace = true tracing.workspace = true diff --git a/iris-mpc/src/bin/client.rs b/iris-mpc/src/bin/client.rs index 0ab9f77af..cc0cf0529 100644 --- a/iris-mpc/src/bin/client.rs +++ b/iris-mpc/src/bin/client.rs @@ -372,7 +372,7 @@ async fn main() -> eyre::Result<()> { let request_message = UniquenessRequest { batch_size: None, signup_id: request_id.to_string(), - s3_presigned_url: presigned_url, + s3_key: presigned_url, iris_shares_file_hashes, }; diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 842d4f220..26d69a13b 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -1,5 +1,6 @@ #![allow(clippy::needless_range_loop)] +use aws_sdk_s3::Client as S3Client; use aws_sdk_sns::{types::MessageAttributeValue, Client as SNSClient}; use aws_sdk_sqs::{config::Region, Client}; use axum::{response::IntoResponse, routing::get, Router}; @@ -122,6 +123,7 @@ async fn receive_batch( party_id: usize, client: &Client, sns_client: &SNSClient, + s3_client: &Arc, config: &Config, store: &Store, skip_request_ids: &[String], @@ -275,17 +277,21 @@ async fn receive_batch( batch_query.metadata.push(batch_metadata); let semaphore = Arc::clone(&semaphore); + let s3_client_arc = Arc::clone(s3_client); + let bucket_name = config.shares_bucket_name.clone(); let handle = tokio::spawn(async move { let _ = semaphore.acquire().await?; - let base_64_encoded_message_payload = - match smpc_request.get_iris_data_by_party_id(party_id).await { - Ok(iris_message_share) => iris_message_share, - Err(e) => { - tracing::error!("Failed to get iris shares: {:?}", e); - eyre::bail!("Failed to get iris shares: {:?}", e); - } - }; + let base_64_encoded_message_payload = match smpc_request + .get_iris_data_by_party_id(party_id, &bucket_name, &s3_client_arc) + .await + { + Ok(iris_message_share) => iris_message_share, + Err(e) => { + tracing::error!("Failed to get iris shares: {:?}", e); + eyre::bail!("Failed to get iris shares: {:?}", e); + } + }; let iris_message_share = match smpc_request.decrypt_iris_share( base_64_encoded_message_payload, @@ -664,6 +670,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { let shared_config = aws_config::from_env().region(region_provider).load().await; let sqs_client = Client::new(&shared_config); let sns_client = SNSClient::new(&shared_config); + let s3_client = Arc::new(S3Client::new(&shared_config)); let shares_encryption_key_pair = match SharesEncryptionKeyPairs::from_storage(config.clone()).await { Ok(key_pair) => key_pair, @@ -1265,6 +1272,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { party_id, &sqs_client, &sns_client, + &s3_client, &config, &store, &skip_request_ids, @@ -1318,6 +1326,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { party_id, &sqs_client, &sns_client, + &s3_client, &config, &store, &skip_request_ids, From fc021aa1326e95f46e9126ee14f46b70b68638e5 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Thu, 5 Dec 2024 00:08:31 -0800 Subject: [PATCH 064/170] scale up (#758) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 036cebafc..4e29972de 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From 146c2cae43dbeb586144d9d37d152a6b2bfacdd4 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Thu, 5 Dec 2024 09:22:47 +0100 Subject: [PATCH 065/170] Upgrade iris mpc to use get object instead of presigned URL (#759) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 5 ++++- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 5 ++++- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 5 ++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 8fcd3920e..50929fe51 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.5" +image: "ghcr.io/worldcoin/iris-mpc:v0.11.0" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 1bb635b08..5759038fb 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -65,6 +65,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpcv2-stage.worldcoin.org" + - name: SMPC__SHARES_BUCKET_NAME + value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" @@ -90,7 +93,7 @@ env: - name: SMPC__SERVICE__METRICS__BUFFER_SIZE value: "256" - + - name: SMPC__SERVICE__METRICS__PREFIX value: "smpcv2-0" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 01aa826ae..8d32e92f2 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -65,6 +65,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpcv2-stage.worldcoin.org" + - name: SMPC__SHARES_BUCKET_NAME + value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" @@ -90,7 +93,7 @@ env: - name: SMPC__SERVICE__METRICS__BUFFER_SIZE value: "256" - + - name: SMPC__SERVICE__METRICS__PREFIX value: "smpcv2-1" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 42c896ec7..12355695a 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -65,6 +65,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpcv2-stage.worldcoin.org" + - name: SMPC__SHARES_BUCKET_NAME + value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" @@ -90,7 +93,7 @@ env: - name: SMPC__SERVICE__METRICS__BUFFER_SIZE value: "256" - + - name: SMPC__SERVICE__METRICS__PREFIX value: "smpcv2-2" From 2650cca38a0fcc222e765ed055ce05201585f260 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Fri, 6 Dec 2024 10:14:50 +0100 Subject: [PATCH 066/170] Upgrade prod to latest version (#762) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 4e29972de..143e60aaf 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.11.0" environment: prod replicaCount: 1 From c09d0b9b00f6f8e7d5be8ba3b3381333c5e61c4b Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Fri, 6 Dec 2024 16:38:56 +0100 Subject: [PATCH 067/170] add last modified at column (#764) * add last modified at column * bump iris-mpc stage image --- deploy/stage/common-values-iris-mpc.yaml | 2 +- .../20241128150412_add-modified-at.down.sql | 3 +++ .../20241128150412_add-modified-at.up.sql | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 iris-mpc-store/migrations/20241128150412_add-modified-at.down.sql create mode 100644 iris-mpc-store/migrations/20241128150412_add-modified-at.up.sql diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 50929fe51..a5c71676e 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.11.0" +image: "ghcr.io/worldcoin/iris-mpc:v0.11.1" environment: stage replicaCount: 1 diff --git a/iris-mpc-store/migrations/20241128150412_add-modified-at.down.sql b/iris-mpc-store/migrations/20241128150412_add-modified-at.down.sql new file mode 100644 index 000000000..f33c3b008 --- /dev/null +++ b/iris-mpc-store/migrations/20241128150412_add-modified-at.down.sql @@ -0,0 +1,3 @@ +DROP TRIGGER IF EXISTS set_last_modified_at ON irises; +DROP FUNCTION IF EXISTS update_last_modified_at(); +ALTER TABLE irises DROP COLUMN last_modified_at; diff --git a/iris-mpc-store/migrations/20241128150412_add-modified-at.up.sql b/iris-mpc-store/migrations/20241128150412_add-modified-at.up.sql new file mode 100644 index 000000000..2d713a025 --- /dev/null +++ b/iris-mpc-store/migrations/20241128150412_add-modified-at.up.sql @@ -0,0 +1,14 @@ +ALTER TABLE irises ADD COLUMN last_modified_at BIGINT; + +CREATE OR REPLACE FUNCTION update_last_modified_at() +RETURNS TRIGGER AS $$ +BEGIN + NEW.last_modified_at = EXTRACT(EPOCH FROM NOW())::BIGINT; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER set_last_modified_at + BEFORE INSERT OR UPDATE ON irises + FOR EACH ROW + EXECUTE FUNCTION update_last_modified_at(); From 1443c9774cce9a5694a7b08e0d6e28c7739d99c5 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Fri, 6 Dec 2024 17:18:08 +0100 Subject: [PATCH 068/170] rename migration file (#766) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- ...fied-at.down.sql => 20241206150412_add-modified-at.down.sql} | 0 ...odified-at.up.sql => 20241206150412_add-modified-at.up.sql} | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename iris-mpc-store/migrations/{20241128150412_add-modified-at.down.sql => 20241206150412_add-modified-at.down.sql} (100%) rename iris-mpc-store/migrations/{20241128150412_add-modified-at.up.sql => 20241206150412_add-modified-at.up.sql} (100%) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index a5c71676e..7e2ba804d 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.11.1" +image: "ghcr.io/worldcoin/iris-mpc:v0.11.2" environment: stage replicaCount: 1 diff --git a/iris-mpc-store/migrations/20241128150412_add-modified-at.down.sql b/iris-mpc-store/migrations/20241206150412_add-modified-at.down.sql similarity index 100% rename from iris-mpc-store/migrations/20241128150412_add-modified-at.down.sql rename to iris-mpc-store/migrations/20241206150412_add-modified-at.down.sql diff --git a/iris-mpc-store/migrations/20241128150412_add-modified-at.up.sql b/iris-mpc-store/migrations/20241206150412_add-modified-at.up.sql similarity index 100% rename from iris-mpc-store/migrations/20241128150412_add-modified-at.up.sql rename to iris-mpc-store/migrations/20241206150412_add-modified-at.up.sql From 348c791a6f4e76634601745106cced14ae3875f7 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Fri, 6 Dec 2024 17:36:23 +0100 Subject: [PATCH 069/170] release v0.11.1 to prod (#765) * release v0.11.1 to prod * release v0.11.2 instead --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 143e60aaf..8b2375410 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.11.0" +image: "ghcr.io/worldcoin/iris-mpc:v0.11.2" environment: prod replicaCount: 1 From 9ca36ff1809b81b7283c072d22d2107ce5da152d Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 7 Dec 2024 14:32:27 -0800 Subject: [PATCH 070/170] load db chunks from s3 (#731) * wip: load chunks * cleanup * more wip * cleanup * simplify * load in server * simplify * pagination and clippy * up * clippy * Update importer contract to make it compatible with exporter (#763) * add temp branch push * bump version * set bucket configs * log all objects * change expected chunk suffix to csv * bump image * remove output/ prefix to get timestamp * add logs on chunks to process * fix removing decremented serial id from hashSet * bump init db size * do not override records from db * remove temp change * bump image * fix test * don't log all objects * log time waiting for stream and loading into mem * add db load safety overlap * fix rebase * remove duplicate sql migrations and bump version * remove unused dependency --------- Co-authored-by: Ertugrul Aypek --- Cargo.lock | 122 ++------ Cargo.toml | 4 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- .../stage/smpcv2-0-stage/values-iris-mpc.yaml | 7 +- .../stage/smpcv2-1-stage/values-iris-mpc.yaml | 7 +- .../stage/smpcv2-2-stage/values-iris-mpc.yaml | 7 +- iris-mpc-common/src/config/mod.rs | 20 ++ iris-mpc-store/Cargo.toml | 7 + iris-mpc-store/src/lib.rs | 36 ++- iris-mpc-store/src/s3_importer.rs | 279 ++++++++++++++++++ iris-mpc/src/bin/server.rs | 102 ++++++- 11 files changed, 466 insertions(+), 127 deletions(-) create mode 100644 iris-mpc-store/src/s3_importer.rs diff --git a/Cargo.lock b/Cargo.lock index 0cc4bde74..105a293b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1441,6 +1441,27 @@ dependencies = [ "typenum", ] +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "cudarc" version = "0.12.1" @@ -2772,28 +2793,23 @@ dependencies = [ "uuid", ] -[[package]] -name = "iris-mpc-py" -version = "0.1.0" -dependencies = [ - "hawk-pack", - "iris-mpc-common", - "iris-mpc-cpu", - "pyo3", - "rand", -] - [[package]] name = "iris-mpc-store" version = "0.1.0" dependencies = [ + "async-trait", + "aws-sdk-s3", "bytemuck", + "bytes", + "csv", "dotenvy", "eyre", "futures", + "hex", "iris-mpc-common", "itertools 0.13.0", "rand", + "rayon", "serde", "serde_json", "sqlx", @@ -3105,15 +3121,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "metrics" version = "0.22.3" @@ -3992,69 +3999,6 @@ dependencies = [ "prost", ] -[[package]] -name = "pyo3" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "memoffset", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn 2.0.85", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn 2.0.85", -] - [[package]] name = "quanta" version = "0.12.3" @@ -5311,12 +5255,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" -[[package]] -name = "target-lexicon" -version = "0.12.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" - [[package]] name = "telemetry-batteries" version = "0.1.0" @@ -5933,12 +5871,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "unindent" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" - [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 843cb4908..1497f7d39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ members = [ "iris-mpc-common", "iris-mpc-upgrade", "iris-mpc-store", - "iris-mpc-py", ] resolver = "2" @@ -22,9 +21,12 @@ aws-sdk-sns = { version = "1.44.0" } aws-sdk-sqs = { version = "1.36.0" } aws-sdk-s3 = { version = "1.50.0" } aws-sdk-secretsmanager = { version = "1.47.0" } +async-trait = "0.1.83" axum = "0.7" clap = { version = "4", features = ["derive", "env"] } +csv = "1.3.1" base64 = "0.22.1" +bytes = "1.5" bytemuck = { version = "1.17", features = ["derive"] } dotenvy = "0.15" eyre = "0.6" diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 7e2ba804d..cce05306a 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.11.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.0" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 5759038fb..0c9a3752d 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -68,14 +68,17 @@ env: - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__DB_CHUNKS_BUCKET_NAME + value: "iris-mpc-db-exporter-store-node-0-stage-eu-north-1" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" - name: SMPC__INIT_DB_SIZE - value: "10" + value: "1000000" - name: SMPC__MAX_DB_SIZE - value: "1000000" + value: "1100000" - name: SMPC__MAX_BATCH_SIZE value: "64" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 8d32e92f2..8210aa3cf 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -68,14 +68,17 @@ env: - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__DB_CHUNKS_BUCKET_NAME + value: "iris-mpc-db-exporter-store-node-1-stage-eu-north-1" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" - name: SMPC__INIT_DB_SIZE - value: "10" + value: "1000000" - name: SMPC__MAX_DB_SIZE - value: "1000000" + value: "1100000" - name: SMPC__MAX_BATCH_SIZE value: "64" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 12355695a..d86443f41 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -68,14 +68,17 @@ env: - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__DB_CHUNKS_BUCKET_NAME + value: "iris-mpc-db-exporter-store-node-2-stage-eu-north-1" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" - name: SMPC__INIT_DB_SIZE - value: "10" + value: "1000000" - name: SMPC__MAX_DB_SIZE - value: "1000000" + value: "1100000" - name: SMPC__MAX_BATCH_SIZE value: "64" diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index 71ff9d051..2f84f8d61 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -87,6 +87,22 @@ pub struct Config { #[serde(default)] pub image_name: String, + + #[serde(default)] + pub db_chunks_bucket_name: String, + + #[serde(default = "default_load_chunks_parallelism")] + pub load_chunks_parallelism: usize, + + /// Defines the safety overlap to load the DB records >last_modified_at in + /// seconds This is to ensure we don't miss any records that were + /// updated during the DB export to S3 + #[serde(default = "default_db_load_safety_overlap_seconds")] + pub db_load_safety_overlap_seconds: i64, +} + +fn default_load_chunks_parallelism() -> usize { + 32 } fn default_processing_timeout_secs() -> u64 { @@ -113,6 +129,10 @@ fn default_shares_bucket_name() -> String { "wf-mpc-prod-smpcv2-sns-requests".to_string() } +fn default_db_load_safety_overlap_seconds() -> i64 { + 60 +} + impl Config { pub fn load_config(prefix: &str) -> eyre::Result { let settings = config::Config::builder(); diff --git a/iris-mpc-store/Cargo.toml b/iris-mpc-store/Cargo.toml index 1a084d010..5dec19b00 100644 --- a/iris-mpc-store/Cargo.toml +++ b/iris-mpc-store/Cargo.toml @@ -8,16 +8,23 @@ license.workspace = true repository.workspace = true [dependencies] +aws-sdk-s3.workspace = true +bytes.workspace = true +async-trait.workspace = true iris-mpc-common = { path = "../iris-mpc-common" } bytemuck.workspace = true +csv.workspace = true futures.workspace = true sqlx.workspace = true eyre.workspace = true +hex.workspace = true itertools.workspace = true serde.workspace = true serde_json.workspace = true tracing.workspace = true +tokio.workspace = true rand.workspace = true +rayon.workspace = true [dev-dependencies] rand.workspace = true diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index 29afade68..c0d504674 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -1,8 +1,10 @@ +mod s3_importer; + use bytemuck::cast_slice; use eyre::{eyre, Result}; use futures::{ stream::{self}, - Stream, + Stream, TryStreamExt, }; use iris_mpc_common::{ config::Config, @@ -10,6 +12,7 @@ use iris_mpc_common::{ iris_db::iris::IrisCode, }; use rand::{rngs::StdRng, Rng, SeedableRng}; +pub use s3_importer::{fetch_and_parse_chunks, last_snapshot_timestamp, ObjectStore, S3Store}; use sqlx::{ migrate::Migrator, postgres::PgPoolOptions, Executor, PgPool, Postgres, Row, Transaction, }; @@ -31,6 +34,12 @@ fn sql_switch_schema(schema_name: &str) -> Result { )) } +// Enum to define the source of the irises +pub enum IrisSource { + S3(StoredIris), + DB(StoredIris), +} + #[derive(sqlx::FromRow, Debug, Default, PartialEq, Eq)] pub struct StoredIris { #[allow(dead_code)] @@ -158,8 +167,9 @@ impl Store { /// Stream irises in parallel, without a particular order. pub async fn stream_irises_par( &self, + min_last_modified_at: i64, partitions: usize, - ) -> impl Stream> + '_ { + ) -> impl Stream> + '_ { let count = self.count_irises().await.expect("Failed count_irises"); let partition_size = count.div_ceil(partitions).max(1); @@ -169,14 +179,17 @@ impl Store { let start_id = 1 + partition_size * i; let end_id = start_id + partition_size - 1; - let partition_stream = - sqlx::query_as::<_, StoredIris>("SELECT * FROM irises WHERE id BETWEEN $1 AND $2") - .bind(start_id as i64) - .bind(end_id as i64) - .fetch(&self.pool); + let partition_stream = sqlx::query_as::<_, StoredIris>( + "SELECT * FROM irises WHERE id BETWEEN $1 AND $2 AND last_modified_at >= $3", + ) + .bind(start_id as i64) + .bind(end_id as i64) + .bind(min_last_modified_at) + .fetch(&self.pool) + .map_err(Into::into); partition_streams.push(Box::pin(partition_stream) - as Pin> + Send>>); + as Pin> + Send>>); } stream::select_all(partition_streams) @@ -496,7 +509,7 @@ mod tests { let got: Vec = store.stream_irises().await.try_collect().await?; assert_eq!(got.len(), 0); - let got: Vec = store.stream_irises_par(2).await.try_collect().await?; + let got: Vec = store.stream_irises_par(0, 2).await.try_collect().await?; assert_eq!(got.len(), 0); let codes_and_masks = &[ @@ -531,7 +544,8 @@ mod tests { let got_len = store.count_irises().await?; let got: Vec = store.stream_irises().await.try_collect().await?; - let mut got_par: Vec = store.stream_irises_par(2).await.try_collect().await?; + let mut got_par: Vec = + store.stream_irises_par(0, 2).await.try_collect().await?; got_par.sort_by_key(|iris| iris.id); assert_eq!(got, got_par); @@ -609,7 +623,7 @@ mod tests { // Compare with the parallel version with several edge-cases. for parallelism in [1, 5, MAX_CONNECTIONS as usize + 1] { let mut got_par: Vec = store - .stream_irises_par(parallelism) + .stream_irises_par(0, parallelism) .await .try_collect() .await?; diff --git a/iris-mpc-store/src/s3_importer.rs b/iris-mpc-store/src/s3_importer.rs new file mode 100644 index 000000000..0ec5f4ab8 --- /dev/null +++ b/iris-mpc-store/src/s3_importer.rs @@ -0,0 +1,279 @@ +use crate::StoredIris; +use async_trait::async_trait; +use aws_sdk_s3::Client; +use bytes::Bytes; +use futures::{stream, Stream, StreamExt}; +use iris_mpc_common::{IRIS_CODE_LENGTH, MASK_CODE_LENGTH}; +use rayon::{iter::ParallelIterator, prelude::ParallelBridge}; +use serde::Deserialize; +use std::{io::Cursor, mem, pin::Pin, sync::Arc}; +use tokio::task; + +const SINGLE_ELEMENT_SIZE: usize = IRIS_CODE_LENGTH * mem::size_of::() * 2 + + MASK_CODE_LENGTH * mem::size_of::() * 2 + + mem::size_of::(); // 75 KB +const CSV_BUFFER_CAPACITY: usize = SINGLE_ELEMENT_SIZE * 10; + +#[async_trait] +pub trait ObjectStore: Send + Sync + 'static { + async fn get_object(&self, key: &str) -> eyre::Result; + async fn list_objects(&self) -> eyre::Result>; +} + +pub struct S3Store { + client: Arc, + bucket: String, +} + +impl S3Store { + pub fn new(client: Arc, bucket: String) -> Self { + Self { client, bucket } + } +} + +#[async_trait] +impl ObjectStore for S3Store { + async fn get_object(&self, key: &str) -> eyre::Result { + let result = self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .send() + .await?; + + let data = result.body.collect().await?; + Ok(data.into_bytes()) + } + + async fn list_objects(&self) -> eyre::Result> { + let mut objects = Vec::new(); + let mut continuation_token = None; + + loop { + let mut request = self.client.list_objects_v2().bucket(&self.bucket); + + if let Some(token) = continuation_token { + request = request.continuation_token(token); + } + + let response = request.send().await?; + + objects.extend( + response + .contents() + .iter() + .filter_map(|obj| obj.key().map(String::from)), + ); + + match response.next_continuation_token() { + Some(token) => continuation_token = Some(token.to_string()), + None => break, + } + } + + Ok(objects) + } +} + +#[derive(Debug, Deserialize)] +struct CsvIrisRecord { + id: String, + left_code: String, + left_mask: String, + right_code: String, + right_mask: String, +} + +fn hex_to_bytes(hex: &str, byte_len: usize) -> eyre::Result> { + if hex.is_empty() { + return Ok(vec![]); + } + let mut bytes = vec![0; byte_len]; + hex::decode_to_slice(hex, &mut bytes)?; + Ok(bytes) +} + +pub async fn last_snapshot_timestamp(store: &impl ObjectStore) -> eyre::Result { + store + .list_objects() + .await? + .into_iter() + .filter(|f| f.starts_with("output/") && f.ends_with(".timestamp")) + .filter_map(|f| { + f.replace(".timestamp", "") + .replace("output/", "") + .parse::() + .ok() + }) + .max() + .ok_or_else(|| eyre::eyre!("No snapshot found")) +} + +pub async fn fetch_and_parse_chunks( + store: &impl ObjectStore, + concurrency: usize, +) -> Pin> + Send + '_>> { + let chunks = store.list_objects().await.unwrap(); + stream::iter(chunks) + .filter_map(|chunk| async move { + if chunk.ends_with(".csv") { + tracing::info!("Processing chunk: {}", chunk); + Some(chunk) + } else { + None + } + }) + .map(move |chunk| async move { + let result = store.get_object(&chunk).await?; + task::spawn_blocking(move || { + let cursor = Cursor::new(result); + let reader = csv::ReaderBuilder::new() + .has_headers(true) + .buffer_capacity(CSV_BUFFER_CAPACITY) + .from_reader(cursor); + + let records: Vec> = reader + .into_deserialize() + .par_bridge() + .map(|r: Result| { + let raw = r.map_err(|e| eyre::eyre!("CSV parse error: {}", e))?; + + Ok(StoredIris { + id: raw.id.parse()?, + left_code: hex_to_bytes( + &raw.left_code, + IRIS_CODE_LENGTH * mem::size_of::(), + )?, + left_mask: hex_to_bytes( + &raw.left_mask, + MASK_CODE_LENGTH * mem::size_of::(), + )?, + right_code: hex_to_bytes( + &raw.right_code, + IRIS_CODE_LENGTH * mem::size_of::(), + )?, + right_mask: hex_to_bytes( + &raw.right_mask, + MASK_CODE_LENGTH * mem::size_of::(), + )?, + }) + }) + .collect(); + + Ok::<_, eyre::Error>(stream::iter(records)) + }) + .await? + }) + .buffer_unordered(concurrency) + .flat_map(|result| match result { + Ok(stream) => stream.boxed(), + Err(e) => stream::once(async move { Err(e) }).boxed(), + }) + .boxed() +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::Rng; + use std::{cmp::min, collections::HashSet}; + + #[derive(Default, Clone)] + pub struct MockStore { + objects: std::collections::HashMap>, + } + + impl MockStore { + pub fn new() -> Self { + Self::default() + } + + pub fn add_test_data(&mut self, key: &str, records: Vec) { + let mut csv = Vec::new(); + { + let mut writer = csv::Writer::from_writer(&mut csv); + writer + .write_record(["id", "left_code", "left_mask", "right_code", "right_mask"]) + .unwrap(); + + for record in records { + writer + .write_record(&[ + record.id.to_string(), + hex::encode(record.left_code), + hex::encode(record.left_mask), + hex::encode(record.right_code), + hex::encode(record.right_mask), + ]) + .unwrap(); + } + } + self.objects.insert(key.to_string(), csv); + } + } + + #[async_trait] + impl ObjectStore for MockStore { + async fn get_object(&self, key: &str) -> eyre::Result { + self.objects + .get(key) + .cloned() + .map(Bytes::from) + .ok_or_else(|| eyre::eyre!("Object not found: {}", key)) + } + + async fn list_objects(&self) -> eyre::Result> { + Ok(self.objects.keys().cloned().collect()) + } + } + + fn random_bytes(len: usize) -> Vec { + let mut rng = rand::thread_rng(); + let mut v = vec![0u8; len]; + v.fill_with(|| rng.gen()); + v + } + + fn dummy_entry(id: usize) -> StoredIris { + StoredIris { + id: id as i64, + left_code: random_bytes(IRIS_CODE_LENGTH * mem::size_of::()), + left_mask: random_bytes(MASK_CODE_LENGTH * mem::size_of::()), + right_code: random_bytes(IRIS_CODE_LENGTH * mem::size_of::()), + right_mask: random_bytes(MASK_CODE_LENGTH * mem::size_of::()), + } + } + + #[tokio::test] + async fn test_fetch_and_parse_chunks() { + const MOCK_ENTRIES: usize = 107; + const MOCK_CHUNK_SIZE: usize = 10; + let mut store = MockStore::new(); + let n_chunks = MOCK_ENTRIES.div_ceil(MOCK_CHUNK_SIZE); + for i in 0..n_chunks { + let start_serial_id = i * MOCK_CHUNK_SIZE + 1; + let end_serial_id = min((i + 1) * MOCK_CHUNK_SIZE, MOCK_ENTRIES); + store.add_test_data( + &format!("{start_serial_id}.csv"), + (start_serial_id..=end_serial_id).map(dummy_entry).collect(), + ); + } + + assert_eq!( + store.list_objects().await.unwrap().len(), + MOCK_ENTRIES.div_ceil(MOCK_CHUNK_SIZE) + ); + + let mut chunks = fetch_and_parse_chunks(&store, 1).await; + let mut count = 0; + let mut ids: HashSet = HashSet::from_iter(1..MOCK_ENTRIES); + while let Some(chunk) = chunks.next().await { + let chunk = chunk.unwrap(); + ids.remove(&(chunk.id as usize)); + count += 1; + } + assert_eq!(count, MOCK_ENTRIES); + assert!(ids.is_empty()); + } +} diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 26d69a13b..28339c1f1 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -6,7 +6,7 @@ use aws_sdk_sqs::{config::Region, Client}; use axum::{response::IntoResponse, routing::get, Router}; use clap::Parser; use eyre::{eyre, Context}; -use futures::TryStreamExt; +use futures::{stream::select_all, StreamExt, TryStreamExt}; use iris_mpc_common::{ config::{json_wrapper::JsonStrWrapper, Config, Opt}, galois_engine::degree4::{GaloisRingIrisCodeShare, GaloisRingTrimmedMaskCodeShare}, @@ -38,18 +38,21 @@ use iris_mpc_gpu::{ BatchQueryEntriesPreprocessed, ServerActor, ServerJobResult, }, }; -use iris_mpc_store::{Store, StoredIrisRef}; +use iris_mpc_store::{ + fetch_and_parse_chunks, last_snapshot_timestamp, IrisSource, S3Store, Store, StoredIrisRef, +}; use metrics_exporter_statsd::StatsdBuilder; use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use std::{ backtrace::Backtrace, - collections::HashMap, + collections::{HashMap, HashSet}, mem, panic, sync::{ atomic::{AtomicBool, Ordering}, Arc, LazyLock, Mutex, }, + time, time::{Duration, Instant}, }; use telemetry_batteries::tracing::{datadog::DatadogBattery, TracingShutdownHandle}; @@ -671,6 +674,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { let sqs_client = Client::new(&shared_config); let sns_client = SNSClient::new(&shared_config); let s3_client = Arc::new(S3Client::new(&shared_config)); + let s3_client_clone = Arc::clone(&s3_client); let shares_encryption_key_pair = match SharesEncryptionKeyPairs::from_storage(config.clone()).await { Ok(key_pair) => key_pair, @@ -897,6 +901,9 @@ async fn server_main(config: Config) -> eyre::Result<()> { .ok_or(eyre!("Missing database config"))? .load_parallelism; + let load_chunks_parallelism = config.load_chunks_parallelism; + let db_chunks_bucket_name = config.db_chunks_bucket_name.clone(); + let (tx, rx) = oneshot::channel(); background_tasks.spawn_blocking(move || { let device_manager = Arc::new(DeviceManager::init()); @@ -974,20 +981,70 @@ async fn server_main(config: Config) -> eyre::Result<()> { "Initialize iris db: Loading from DB (parallelism: {})", parallelism ); + let s3_store = S3Store::new(s3_client_clone, db_chunks_bucket_name); tokio::runtime::Handle::current().block_on(async { - let mut stream = store.stream_irises_par(parallelism).await; + // First fetch last snapshot from S3 + let last_snapshot_timestamp = last_snapshot_timestamp(&s3_store).await?; + let min_last_modified_at = + last_snapshot_timestamp - config.db_load_safety_overlap_seconds; + let stream_s3 = fetch_and_parse_chunks(&s3_store, load_chunks_parallelism) + .await + .map(|result| result.map(IrisSource::S3)) + .boxed(); + + let stream_db = store + .stream_irises_par(min_last_modified_at, parallelism) + .await + .map(|result| result.map(IrisSource::DB)) + .boxed(); + + let mut stream = select_all(vec![stream_s3, stream_db]); + + let now = Instant::now(); + let mut now_load_summary = Instant::now(); + let mut time_waiting_for_stream = time::Duration::from_secs(0); + let mut time_loading_into_memory = time::Duration::from_secs(0); let mut record_counter = 0; - while let Some(iris) = stream.try_next().await? { + let mut all_serial_ids: HashSet = + HashSet::from_iter(1..=(store_len as i64)); + let mut serial_ids_from_db: HashSet = HashSet::new(); + while let Some(result) = stream.try_next().await? { + time_waiting_for_stream += now_load_summary.elapsed(); + now_load_summary = Instant::now(); + + let iris = match result { + IrisSource::DB(iris) => { + serial_ids_from_db.insert(iris.id()); + iris + } + IrisSource::S3(iris) => { + if serial_ids_from_db.contains(&iris.id()) { + tracing::warn!( + "Skip overriding record already loaded via DB with S3 \ + record: {}", + iris.id() + ); + continue; + } + iris + } + }; + if record_counter % 100_000 == 0 { + let elapsed = now.elapsed(); tracing::info!( - "Loaded {} records from db into memory", - record_counter + "Loaded {} records into memory in {:?} ({:.2} entries/s)", + record_counter, + elapsed, + record_counter as f64 / elapsed.as_secs_f64() ); } - if iris.index() > store_len { - tracing::error!("Inconsistent iris index {}", iris.index()); - return Err(eyre!("Inconsistent iris index {}", iris.index())); + + if iris.index() == 0 || iris.index() > store_len { + tracing::error!("Invalid iris index {}", iris.index()); + return Err(eyre!("Invalid iris index {}", iris.index())); } + actor.load_single_record( iris.index() - 1, iris.left_code(), @@ -995,14 +1052,33 @@ async fn server_main(config: Config) -> eyre::Result<()> { iris.right_code(), iris.right_mask(), ); + time_loading_into_memory += now_load_summary.elapsed(); + now_load_summary = Instant::now(); + + all_serial_ids.remove(&(iris.index() as i64)); record_counter += 1; } - assert_eq!( - record_counter, store_len, - "Loaded record count does not match db size" + tracing::info!( + "Loading summary => Loaded {:?} items. Waited for stream: {:?}s, \ + Loaded into memory: {:?}s", + record_counter, + time_waiting_for_stream, + time_loading_into_memory, ); + // Clear the memory allocated by temp HashSet + serial_ids_from_db.clear(); + serial_ids_from_db.shrink_to_fit(); + + if !all_serial_ids.is_empty() { + tracing::error!("Not all serial_ids were loaded: {:?}", all_serial_ids); + return Err(eyre!( + "Not all serial_ids were loaded: {:?}", + all_serial_ids + )); + } + tracing::info!("Preprocessing db"); actor.preprocess_db(); From cee454a5b5aadb342995709cbc3adc5f5d7c2716 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Sun, 8 Dec 2024 12:29:26 +0100 Subject: [PATCH 071/170] Add logs to exporter and add back iris-mpc-py to workspace (#768) * reduce db init size * bump init db size * log records loaded via db vs s3 * bump image * bump version --- Cargo.lock | 95 +++++++++++++++++++ Cargo.toml | 1 + deploy/stage/common-values-iris-mpc.yaml | 2 +- .../stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- .../stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- .../stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- iris-mpc/src/bin/server.rs | 10 +- 7 files changed, 108 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 105a293b4..f80154e29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2793,6 +2793,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "iris-mpc-py" +version = "0.1.0" +dependencies = [ + "hawk-pack", + "iris-mpc-common", + "iris-mpc-cpu", + "pyo3", + "rand", +] + [[package]] name = "iris-mpc-store" version = "0.1.0" @@ -3121,6 +3132,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "metrics" version = "0.22.3" @@ -3999,6 +4019,69 @@ dependencies = [ "prost", ] +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.85", +] + [[package]] name = "quanta" version = "0.12.3" @@ -5255,6 +5338,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "telemetry-batteries" version = "0.1.0" @@ -5871,6 +5960,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 1497f7d39..e475c9cbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "iris-mpc-common", "iris-mpc-upgrade", "iris-mpc-store", + "iris-mpc-py", ] resolver = "2" diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index cce05306a..aa9929d73 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.0" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 0c9a3752d..efaa0d52d 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -75,7 +75,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "1000000" + value: "800000" - name: SMPC__MAX_DB_SIZE value: "1100000" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 8210aa3cf..81524f5f2 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -75,7 +75,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "1000000" + value: "800000" - name: SMPC__MAX_DB_SIZE value: "1100000" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index d86443f41..e4f1a44c5 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -75,7 +75,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "1000000" + value: "800000" - name: SMPC__MAX_DB_SIZE value: "1100000" diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 28339c1f1..67f64611c 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -1008,12 +1008,15 @@ async fn server_main(config: Config) -> eyre::Result<()> { let mut all_serial_ids: HashSet = HashSet::from_iter(1..=(store_len as i64)); let mut serial_ids_from_db: HashSet = HashSet::new(); + let mut n_loaded_from_db = 0; + let mut n_loaded_from_s3 = 0; while let Some(result) = stream.try_next().await? { time_waiting_for_stream += now_load_summary.elapsed(); now_load_summary = Instant::now(); let iris = match result { IrisSource::DB(iris) => { + n_loaded_from_db += 1; serial_ids_from_db.insert(iris.id()); iris } @@ -1026,6 +1029,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { ); continue; } + n_loaded_from_s3 += 1; iris } }; @@ -1060,9 +1064,11 @@ async fn server_main(config: Config) -> eyre::Result<()> { } tracing::info!( - "Loading summary => Loaded {:?} items. Waited for stream: {:?}s, \ - Loaded into memory: {:?}s", + "Loading summary => Loaded {:?} items. {} from DB, {} from S3. Waited \ + for stream: {:?}, Loaded into memory: {:?}", record_counter, + n_loaded_from_db, + n_loaded_from_s3, time_waiting_for_stream, time_loading_into_memory, ); From e5f6375a0e05f36746b2247f4f0ee4d8fb00b1e2 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Sun, 8 Dec 2024 13:28:03 +0100 Subject: [PATCH 072/170] release prod v0.12.1 (#769) * release prod v0.12.1 * add prod db chunks --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 3 +++ deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 3 +++ deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 3 +++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 8b2375410..62f8a5ffe 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.11.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 99cca1fa6..e595df095 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -77,6 +77,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" + - name: SMPC__DB_CHUNKS_BUCKET_NAME + value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 8fdff5cf4..82663a304 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -77,6 +77,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" + - name: SMPC__DB_CHUNKS_BUCKET_NAME + value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 5a7751442..80da57a5e 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -77,6 +77,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" + - name: SMPC__DB_CHUNKS_BUCKET_NAME + value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From a3c6cd6acae82c849520bdaa0a9717be2b319d23 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Sun, 8 Dec 2024 14:27:38 +0100 Subject: [PATCH 073/170] bump aws s3 sdk version (#770) --- Cargo.lock | 63 ++++++++++++++---------- Cargo.toml | 4 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f80154e29..f0a2bac70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -237,9 +237,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.5.9" +version = "1.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d6448cfb224dd6a9b9ac734f58622dd0d4751f3589f3b777345745f46b2eb14" +checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924" dependencies = [ "aws-credential-types", "aws-runtime", @@ -248,7 +248,7 @@ dependencies = [ "aws-sdk-sts", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a10d5c055aa540164d9561a0e2e74ad30f0dcf7393c3a92f6733ddf9c5762468" +checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -340,7 +340,7 @@ dependencies = [ "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -354,9 +354,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.58.0" +version = "1.65.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0656a79cf5e6ab0d4bb2465cd750a7a2fd7ea26c062183ed94225f5782e22365" +checksum = "d3ba2c5c0f2618937ce3d4a5ad574b86775576fa24006bcb3128c6e2cbf3c34e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -365,7 +365,7 @@ dependencies = [ "aws-smithy-checksums", "aws-smithy-eventstream", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -396,7 +396,7 @@ dependencies = [ "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -419,7 +419,7 @@ dependencies = [ "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -442,7 +442,7 @@ dependencies = [ "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -456,15 +456,15 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.47.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8776850becacbd3a82a4737a9375ddb5c6832a51379f24443a98e61513f852c" +checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -478,15 +478,15 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.48.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0007b5b8004547133319b6c4e87193eee2a0bcb3e4c18c75d09febe9dab7b383" +checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -500,15 +500,15 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.47.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fffaa356e7f1c725908b75136d53207fa714e348f365671df14e95a60530ad3" +checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -523,9 +523,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1" +checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -623,6 +623,15 @@ dependencies = [ "aws-smithy-types", ] +[[package]] +name = "aws-smithy-json" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095" +dependencies = [ + "aws-smithy-types", +] + [[package]] name = "aws-smithy-query" version = "0.60.7" @@ -635,9 +644,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.7.3" +version = "1.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be28bd063fa91fd871d131fc8b68d7cd4c5fa0869bea68daca50dcb1cbd76be2" +checksum = "9f20685047ca9d6f17b994a07f629c813f08b5bce65523e47124879e60103d45" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -662,9 +671,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96" +checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd" dependencies = [ "aws-smithy-async", "aws-smithy-types", diff --git a/Cargo.toml b/Cargo.toml index e475c9cbd..0385f091f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,11 +16,11 @@ license = "MIT OR (Apache-2.0 WITH LLVM-exception)" repository = "https://github.com/worldcoin/iris-mpc" [workspace.dependencies] -aws-config = { version = "1.5.4", features = ["behavior-version-latest"] } +aws-config = { version = "1.5.10", features = ["behavior-version-latest"] } aws-sdk-kms = { version = "1.44.0" } aws-sdk-sns = { version = "1.44.0" } aws-sdk-sqs = { version = "1.36.0" } -aws-sdk-s3 = { version = "1.50.0" } +aws-sdk-s3 = { version = "1.65.0" } aws-sdk-secretsmanager = { version = "1.47.0" } async-trait = "0.1.83" axum = "0.7" diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index aa9929d73..5b9b99d1d 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.2" environment: stage replicaCount: 1 From 6087b7e3bf9d5d41a96dcce1497b3db93807ff06 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Sun, 8 Dec 2024 23:57:02 -0800 Subject: [PATCH 074/170] Update common-values-upgrade-server-left.yaml (#775) * Update common-values-upgrade-server-left.yaml * Update common-values-upgrade-server-right.yaml --- deploy/prod/common-values-upgrade-server-left.yaml | 2 +- deploy/prod/common-values-upgrade-server-right.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-upgrade-server-left.yaml b/deploy/prod/common-values-upgrade-server-left.yaml index 0c00a9ef2..ceddfc97a 100644 --- a/deploy/prod/common-values-upgrade-server-left.yaml +++ b/deploy/prod/common-values-upgrade-server-left.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" environment: prod replicaCount: 1 diff --git a/deploy/prod/common-values-upgrade-server-right.yaml b/deploy/prod/common-values-upgrade-server-right.yaml index 4da521f19..f34c939ad 100644 --- a/deploy/prod/common-values-upgrade-server-right.yaml +++ b/deploy/prod/common-values-upgrade-server-right.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" environment: prod replicaCount: 1 From d589eac4e8c1b107142d6da0a3751183c7ce8b38 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 9 Dec 2024 00:52:00 -0800 Subject: [PATCH 075/170] Update common-values-iris-mpc.yaml (#776) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 62f8a5ffe..63613dffc 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From 1f5b3b4a5c1c0221a2d0258c2ba14d579bc2db9c Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 9 Dec 2024 01:23:51 -0800 Subject: [PATCH 076/170] scale up (#777) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 63613dffc..62f8a5ffe 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From 8932e93537ae24ac5e3743675b54ae057361a846 Mon Sep 17 00:00:00 2001 From: iliailia Date: Mon, 9 Dec 2024 12:08:42 +0100 Subject: [PATCH 077/170] Move lifting to distance evaluation (#760) --- iris-mpc-cpu/benches/hnsw.rs | 27 ++++- iris-mpc-cpu/src/hawkers/galois_store.rs | 71 ++++++++------ iris-mpc-cpu/src/network/grpc.rs | 20 ++-- iris-mpc-cpu/src/protocol/ops.rs | 120 +++++++++++++---------- 4 files changed, 138 insertions(+), 100 deletions(-) diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index 7b843e408..c988e1bbf 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -6,7 +6,9 @@ use iris_mpc_cpu::{ database_generators::{create_random_sharing, generate_galois_iris_shares}, execution::local::LocalRuntime, hawkers::{galois_store::LocalNetAby3NgStoreProtocol, plaintext_store::PlaintextStore}, - protocol::ops::{cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3}, + protocol::ops::{ + batch_signed_lift_vec, cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3, + }, }; use rand::SeedableRng; use tokio::task::JoinSet; @@ -99,9 +101,23 @@ fn bench_hnsw_primitives(c: &mut Criterion) { let t2i = t2[index].clone(); let mut player_session = runtime.sessions.get(player).unwrap().clone(); jobs.spawn(async move { - cross_compare(&mut player_session, d1i, t1i, d2i, t2i) - .await - .unwrap() + let ds_and_ts = batch_signed_lift_vec(&mut player_session, vec![ + d1i.clone(), + d2i.clone(), + t1i.clone(), + t2i.clone(), + ]) + .await + .unwrap(); + cross_compare( + &mut player_session, + ds_and_ts[0].clone(), + ds_and_ts[1].clone(), + ds_and_ts[2].clone(), + ds_and_ts[3].clone(), + ) + .await + .unwrap() }); } let _outputs = black_box(jobs.join_all().await); @@ -146,6 +162,9 @@ fn bench_gr_primitives(c: &mut Criterion) { let ds_and_ts = galois_ring_to_rep3(&mut player_session, ds_and_ts) .await .unwrap(); + let ds_and_ts = batch_signed_lift_vec(&mut player_session, ds_and_ts) + .await + .unwrap(); cross_compare( &mut player_session, ds_and_ts[0].clone(), diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index f69eea4cc..e2186a324 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -9,8 +9,8 @@ use crate::{ hawkers::plaintext_store::PointId, network::NetworkType, protocol::ops::{ - compare_threshold_and_open, cross_compare, galois_ring_pairwise_distance, - galois_ring_to_rep3, + batch_signed_lift_vec, compare_threshold_and_open, cross_compare, + galois_ring_pairwise_distance, galois_ring_to_rep3, }, shares::{ ring_impl::RingElement, @@ -194,25 +194,40 @@ impl LocalNetAby3NgStoreProtocol { pub fn prepare_query(&mut self, code: GaloisRingSharedIris) -> QueryRef { self.storage.prepare_query(code) } -} -/// Assumes that the first iris of each pair is preprocessed. -async fn eval_pairwise_distances( - pairs: Vec<(GaloisRingSharedIris, GaloisRingSharedIris)>, - player_session: &mut Session, -) -> Vec> { - let ds_and_ts = galois_ring_pairwise_distance(player_session, &pairs) - .await - .unwrap(); - galois_ring_to_rep3(player_session, ds_and_ts) - .await - .unwrap() + pub async fn lift_distances( + &mut self, + distances: Vec>, + ) -> eyre::Result>> { + let mut player_session = self.get_owner_session(); + let distances = batch_signed_lift_vec(&mut player_session, distances).await?; + Ok(distances + .chunks(2) + .map(|dot_products| { + DistanceShare::new(dot_products[0].clone(), dot_products[1].clone()) + }) + .collect::>()) + } + + /// Assumes that the first iris of each pair is preprocessed. + async fn eval_pairwise_distances( + &mut self, + pairs: Vec<(GaloisRingSharedIris, GaloisRingSharedIris)>, + ) -> Vec> { + let mut player_session = self.get_owner_session(); + let ds_and_ts = galois_ring_pairwise_distance(&mut player_session, &pairs) + .await + .unwrap(); + galois_ring_to_rep3(&mut player_session, ds_and_ts) + .await + .unwrap() + } } impl VectorStore for LocalNetAby3NgStoreProtocol { type QueryRef = QueryRef; // Point ID, pending insertion. type VectorRef = VectorId; // Point ID, inserted. - type DistanceRef = DistanceShare; // Distance represented as shares. + type DistanceRef = DistanceShare; // Distance represented as shares. async fn insert(&mut self, query: &Self::QueryRef) -> Self::VectorRef { self.storage.insert(query) @@ -223,11 +238,10 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { query: &Self::QueryRef, vector: &Self::VectorRef, ) -> Self::DistanceRef { - let mut player_session = self.get_owner_session(); let vector_point = self.storage.get_vector(vector); let pairs = vec![(query.processed_query.clone(), vector_point.clone())]; - let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; - DistanceShare::new(ds_and_ts[0].clone(), ds_and_ts[1].clone()) + let dist = self.eval_pairwise_distances(pairs).await; + self.lift_distances(dist).await.unwrap()[0].clone() } async fn eval_distance_batch( @@ -235,7 +249,6 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { query: &Self::QueryRef, vectors: &[Self::VectorRef], ) -> Vec { - let mut player_session = self.get_owner_session(); let pairs = vectors .iter() .map(|vector_id| { @@ -243,13 +256,8 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { (query.processed_query.clone(), vector_point.clone()) }) .collect::>(); - let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; - ds_and_ts - .chunks(2) - .map(|dot_products| { - DistanceShare::new(dot_products[0].clone(), dot_products[1].clone()) - }) - .collect::>() + let dist = self.eval_pairwise_distances(pairs).await; + self.lift_distances(dist).await.unwrap() } async fn is_match(&mut self, distance: &Self::DistanceRef) -> bool { @@ -282,10 +290,10 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { } impl LocalNetAby3NgStoreProtocol { - pub fn get_trivial_share(&self, distance: u16) -> Share { + pub fn get_trivial_share(&self, distance: u16) -> Share { let player = self.get_owner_index(); - let distance_elem = RingElement(distance); - let zero_elem = RingElement(0_u16); + let distance_elem = RingElement(distance as u32); + let zero_elem = RingElement(0_u32); match player { 0 => Share::new(distance_elem, zero_elem), @@ -300,14 +308,13 @@ impl LocalNetAby3NgStoreProtocol { vector1: &::VectorRef, vector2: &::VectorRef, ) -> ::DistanceRef { - let mut player_session = self.get_owner_session(); let point1 = self.storage.get_vector(vector1); let mut point2 = self.storage.get_vector(vector2).clone(); point2.code.preprocess_iris_code_query_share(); point2.mask.preprocess_mask_code_query_share(); let pairs = vec![(point1.clone(), point2.clone())]; - let ds_and_ts = eval_pairwise_distances(pairs, &mut player_session).await; - DistanceShare::new(ds_and_ts[0].clone(), ds_and_ts[1].clone()) + let dist = self.eval_pairwise_distances(pairs).await; + self.lift_distances(dist).await.unwrap()[0].clone() } async fn graph_from_plain( diff --git a/iris-mpc-cpu/src/network/grpc.rs b/iris-mpc-cpu/src/network/grpc.rs index b1e134413..73abff990 100644 --- a/iris-mpc-cpu/src/network/grpc.rs +++ b/iris-mpc-cpu/src/network/grpc.rs @@ -233,28 +233,26 @@ impl Networking for GrpcNetworking { multiplier: 1.1, ..Default::default() }; + let outgoing_stream = self + .outgoing_streams + .get_stream(*session_id, receiver.clone())?; + + // Send message via the outgoing stream + let request = SendRequest { data: value }; retry(backoff, || async { - let outgoing_stream = self - .outgoing_streams - .get_stream(*session_id, receiver.clone())?; - - // Send message via the outgoing stream - let request = SendRequest { - data: value.clone(), - }; tracing::trace!( "INIT: Sending message {:?} from {:?} to {:?} in session {:?}", - value, + request.data, self.party_id, receiver, session_id ); outgoing_stream - .send(request) + .send(request.clone()) .map_err(|e| eyre!(e.to_string()))?; tracing::trace!( "SUCCESS: Sending message {:?} from {:?} to {:?} in session {:?}", - value, + request.data, self.party_id, receiver, session_id diff --git a/iris-mpc-cpu/src/protocol/ops.rs b/iris-mpc-cpu/src/protocol/ops.rs index 94dd7897b..d76c50d25 100644 --- a/iris-mpc-cpu/src/protocol/ops.rs +++ b/iris-mpc-cpu/src/protocol/ops.rs @@ -1,10 +1,10 @@ -use super::binary::single_extract_msb_u32; +use super::binary::{mul_lift_2k, single_extract_msb_u32}; use crate::{ database_generators::GaloisRingSharedIris, execution::session::{BootSession, Session, SessionHandles}, network::value::NetworkValue::{self}, protocol::{ - binary::{lift, mul_lift_2k, open_bin}, + binary::{lift, open_bin}, prf::{Prf, PrfSeed}, }, shares::{ @@ -20,7 +20,6 @@ pub(crate) const MATCH_THRESHOLD_RATIO: f64 = iris_mpc_common::iris_db::iris::MA pub(crate) const B_BITS: u64 = 16; pub(crate) const B: u64 = 1 << B_BITS; pub(crate) const A: u64 = ((1. - 2. * MATCH_THRESHOLD_RATIO) * B as f64) as u64; -pub(crate) const A_BITS: u32 = u64::BITS - A.leading_zeros(); /// Setup the PRF seeds in the replicated protocol. /// Each party sends to the next party a random seed. @@ -51,30 +50,47 @@ pub async fn setup_replicated_prf(session: &BootSession, my_seed: PrfSeed) -> ey Ok(Prf::new(my_seed, other_seed)) } -/// Takes as input two code and mask dot products between two Irises: i, j. -/// i.e. code_dot = and mask_dot = -/// Then lifts the two dot products to the larger ring (Z_{2^32}), multiplies -/// with some predefined constants B = 2^16 -/// A = ((1. - 2. * MATCH_THRESHOLD_RATIO) * B as f64) -/// and then compares mask_dot * A < code_dot * B. +/// Compares the distance between two iris pairs to a threshold. +/// +/// - Takes as input two code and mask dot products between two Irises: i, j. +/// i.e. code_dot = and mask_dot = . +/// - Lifts the two dot products to the ring Z_{2^32}. +/// - Multiplies with predefined threshold constants B = 2^16 and A = ((1. - 2. +/// * MATCH_THRESHOLD_RATIO) * B as f64). +/// - Compares mask_dot * A < code_dot * B. pub async fn compare_threshold( + session: &mut Session, + code_dot: Share, + mask_dot: Share, +) -> eyre::Result> { + let mut x = mask_dot * A as u32; + let y = code_dot * B as u32; + x -= y; + + single_extract_msb_u32::<32>(session, x).await +} + +/// The same as compare_threshold, but the input shares are 16-bit and lifted to +/// 32-bit before threshold comparison. +/// +/// See compare_threshold for more details. +pub async fn lift_and_compare_threshold( session: &mut Session, code_dot: Share, mask_dot: Share, ) -> eyre::Result> { - debug_assert!(A_BITS as u64 <= B_BITS); - let y = mul_lift_2k::(&code_dot); let mut x = lift::<{ B_BITS as usize }>(session, VecShare::new_vec(vec![mask_dot])).await?; - debug_assert_eq!(x.len(), 1); - let mut x = x.pop().expect("Enough elements present"); + let mut x = x.pop().expect("Expected a single element in the VecShare"); x *= A as u32; x -= y; single_extract_msb_u32::<32>(session, x).await } -pub(crate) async fn batch_signed_lift( +/// Lifts a share of a vector (VecShare) of 16-bit values to a share of a vector +/// (VecShare) of 32-bit values. +pub async fn batch_signed_lift( session: &mut Session, mut pre_lift: VecShare, ) -> eyre::Result> { @@ -92,35 +108,28 @@ pub(crate) async fn batch_signed_lift( Ok(lifted_values) } -/// Computes [D1 * T2; D2 * T1] via lifting -pub(crate) async fn cross_mul_via_lift( +/// Wrapper over batch_signed_lift that lifts a vector (Vec) of 16-bit shares to +/// a vector (Vec) of 32-bit shares. +pub async fn batch_signed_lift_vec( session: &mut Session, - d1: Share, - t1: Share, - d2: Share, - t2: Share, -) -> eyre::Result<(Share, Share)> { - let mut pre_lift = VecShare::::with_capacity(4); - // Do preprocessing to lift all values - pre_lift.push(d1); - pre_lift.push(t2); - pre_lift.push(d2); - pre_lift.push(t1); - - let lifted_values = batch_signed_lift(session, pre_lift).await?; + pre_lift: Vec>, +) -> eyre::Result>> { + let pre_lift = VecShare::new_vec(pre_lift); + Ok(batch_signed_lift(session, pre_lift).await?.inner()) +} +/// Computes [D1 * T2; D2 * T1] +/// Assumes that the input shares are originally 16-bit and lifted to u32. +pub(crate) async fn cross_mul( + session: &mut Session, + d1: Share, + t1: Share, + d2: Share, + t2: Share, +) -> eyre::Result<(Share, Share)> { // Compute d1 * t2; t2 * d1 let mut exchanged_shares_a = Vec::with_capacity(2); - let pairs = [ - ( - lifted_values.shares[0].clone(), - lifted_values.shares[1].clone(), - ), - ( - lifted_values.shares[2].clone(), - lifted_values.shares[3].clone(), - ), - ]; + let pairs = [(d1, t2), (d2, t1)]; for pair in pairs.iter() { let (x, y) = pair; let res = session.prf_as_mut().gen_zero_share() + x * y; @@ -161,22 +170,24 @@ pub(crate) async fn cross_mul_via_lift( Ok((res[0].clone(), res[1].clone())) } -/// Computes (d2*t1 - d1*t2) > 0 by first lifting the values in a batch -/// from Z_{2^16} to a bigger ring Z_{2^32} +/// Computes (d2*t1 - d1*t2) > 0. /// Does the multiplication in Z_{2^32} and computes the MSB, to check the /// comparison result. /// d1, t1 are replicated shares that come from an iris code/mask dot product, /// ie: d1 = dot(c_x, c_y); t1 = dot(m_x, m_y). d2, t2 are replicated shares /// that come from an iris code and mask dot product, ie: /// d2 = dot(c_u, c_w), t2 = dot(m_u, m_w) +/// +/// Input values are assumed to be 16-bit shares that have been lifted to +/// 32-bit. pub async fn cross_compare( session: &mut Session, - d1: Share, - t1: Share, - d2: Share, - t2: Share, + d1: Share, + t1: Share, + d2: Share, + t2: Share, ) -> eyre::Result { - let (d1t2, d2t1) = cross_mul_via_lift(session, d1, t1, d2, t2).await?; + let (d1t2, d2t1) = cross_mul(session, d1, t1, d2, t2).await?; let diff = d2t1 - d1t2; // Compute bit <- MSB(D2 * T1 - D1 * T2) let bit = single_extract_msb_u32::<32>(session, diff).await?; @@ -252,10 +263,10 @@ pub async fn galois_ring_to_rep3( /// Checks whether first Iris entry in the pair matches the Iris in the second /// entry. This is done in the following manner: -/// Compute the dot product between the two Irises. -/// Convert the partial shamir share result to a replicated sharing and then -/// Compare the distance using the MATCH_THRESHOLD_RATIO from the -/// `compare_threshold` function. +/// - Compute the dot product between the two Irises. +/// - Convert the partial Shamir share result to a replicated sharing and then +/// - Compare the distance using the MATCH_THRESHOLD_RATIO from the +/// `lift_and_compare_threshold` function. pub async fn galois_ring_is_match( session: &mut Session, pairs: &[(GaloisRingSharedIris, GaloisRingSharedIris)], @@ -264,7 +275,7 @@ pub async fn galois_ring_is_match( let additive_dots = galois_ring_pairwise_distance(session, pairs).await?; let rep_dots = galois_ring_to_rep3(session, additive_dots).await?; // compute dots[0] - dots[1] - let bit = compare_threshold(session, rep_dots[0].clone(), rep_dots[1].clone()).await?; + let bit = lift_and_compare_threshold(session, rep_dots[0].clone(), rep_dots[1].clone()).await?; let opened = open_bin(session, bit).await?; Ok(opened.convert()) } @@ -272,7 +283,7 @@ pub async fn galois_ring_is_match( /// Compares the given distance to a threshold and reveal the result. pub async fn compare_threshold_and_open( session: &mut Session, - distance: DistanceShare, + distance: DistanceShare, ) -> eyre::Result { let bit = compare_threshold(session, distance.code_dot, distance.mask_dot).await?; let opened = open_bin(session, bit).await?; @@ -487,7 +498,10 @@ mod tests { let mut player_session = runtime.sessions.get(player).unwrap().clone(); let four_shares = four_share_map.get(player).unwrap().clone(); jobs.spawn(async move { - let out_shared = cross_mul_via_lift( + let four_shares = batch_signed_lift_vec(&mut player_session, four_shares) + .await + .unwrap(); + let out_shared = cross_mul( &mut player_session, four_shares[0].clone(), four_shares[1].clone(), From 7437e15050eeb31dc00f927289c1d5d1c989e076 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Tue, 10 Dec 2024 11:50:26 +0100 Subject: [PATCH 078/170] disable importer in prod (#780) * disable importer in prod * fmt * bump stage version and disable importer on stage to test before prod release * comment out prod value changes to avoid unnecesary restart * add RUSTSEC-2024-0421 to deny --- deny.toml | 1 + .../prod/smpcv2-0-prod/values-iris-mpc.yaml | 3 ++ .../prod/smpcv2-1-prod/values-iris-mpc.yaml | 3 ++ .../prod/smpcv2-2-prod/values-iris-mpc.yaml | 3 ++ deploy/stage/common-values-iris-mpc.yaml | 2 +- .../stage/smpcv2-0-stage/values-iris-mpc.yaml | 3 ++ .../stage/smpcv2-1-stage/values-iris-mpc.yaml | 3 ++ .../stage/smpcv2-2-stage/values-iris-mpc.yaml | 3 ++ iris-mpc-common/src/config/mod.rs | 3 ++ iris-mpc-store/src/lib.rs | 42 +++++++++++----- iris-mpc/src/bin/server.rs | 48 ++++++++++++------- 11 files changed, 84 insertions(+), 30 deletions(-) diff --git a/deny.toml b/deny.toml index 08f16a224..82f47c613 100644 --- a/deny.toml +++ b/deny.toml @@ -10,6 +10,7 @@ ignore = [ { id = "RUSTSEC-2024-0384", reason = "waiting for `web-time` crate to remove the dependency" }, { id = "RUSTSEC-2024-0388", reason = "waiting for `mongodb` crate to remove the deprecated dependency" }, { id = "RUSTSEC-2024-0402", reason = "wating for `index-map` crate to remove the dependency" }, + { id = "RUSTSEC-2024-0421", reason = "waiting for `mongodb` crate to remove the deprecated dependency" }, ] [sources] diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index e595df095..471a6da20 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -77,6 +77,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" +# - name: SMPC__ENABLE_S3_IMPORTER +# value: "false" + - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 82663a304..606902ca5 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -77,6 +77,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" +# - name: SMPC__ENABLE_S3_IMPORTER +# value: "false" + - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 80da57a5e..cd8c5d791 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -77,6 +77,9 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" +# - name: SMPC__ENABLE_S3_IMPORTER +# value: "false" + - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 5b9b99d1d..58c5a22b2 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.3" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index efaa0d52d..a2dfde76a 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -68,6 +68,9 @@ env: - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" + - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-stage-eu-north-1" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 81524f5f2..5b1397cf4 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -68,6 +68,9 @@ env: - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" + - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-stage-eu-north-1" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index e4f1a44c5..f93112411 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -68,6 +68,9 @@ env: - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" + - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-stage-eu-north-1" diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index 2f84f8d61..f1bf0e2ae 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -88,6 +88,9 @@ pub struct Config { #[serde(default)] pub image_name: String, + #[serde(default)] + pub enable_s3_importer: bool, + #[serde(default)] pub db_chunks_bucket_name: String, diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index c0d504674..32e62705e 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -167,7 +167,7 @@ impl Store { /// Stream irises in parallel, without a particular order. pub async fn stream_irises_par( &self, - min_last_modified_at: i64, + min_last_modified_at: Option, partitions: usize, ) -> impl Stream> + '_ { let count = self.count_irises().await.expect("Failed count_irises"); @@ -179,14 +179,23 @@ impl Store { let start_id = 1 + partition_size * i; let end_id = start_id + partition_size - 1; - let partition_stream = sqlx::query_as::<_, StoredIris>( - "SELECT * FROM irises WHERE id BETWEEN $1 AND $2 AND last_modified_at >= $3", - ) - .bind(start_id as i64) - .bind(end_id as i64) - .bind(min_last_modified_at) - .fetch(&self.pool) - .map_err(Into::into); + let partition_stream = match min_last_modified_at { + Some(min_last_modified_at) => sqlx::query_as::<_, StoredIris>( + "SELECT * FROM irises WHERE id BETWEEN $1 AND $2 AND last_modified_at >= $3", + ) + .bind(start_id as i64) + .bind(end_id as i64) + .bind(min_last_modified_at) + .fetch(&self.pool) + .map_err(Into::into), + None => sqlx::query_as::<_, StoredIris>( + "SELECT * FROM irises WHERE id BETWEEN $1 AND $2", + ) + .bind(start_id as i64) + .bind(end_id as i64) + .fetch(&self.pool) + .map_err(Into::into), + }; partition_streams.push(Box::pin(partition_stream) as Pin> + Send>>); @@ -509,7 +518,11 @@ mod tests { let got: Vec = store.stream_irises().await.try_collect().await?; assert_eq!(got.len(), 0); - let got: Vec = store.stream_irises_par(0, 2).await.try_collect().await?; + let got: Vec = store + .stream_irises_par(Some(0), 2) + .await + .try_collect() + .await?; assert_eq!(got.len(), 0); let codes_and_masks = &[ @@ -544,8 +557,11 @@ mod tests { let got_len = store.count_irises().await?; let got: Vec = store.stream_irises().await.try_collect().await?; - let mut got_par: Vec = - store.stream_irises_par(0, 2).await.try_collect().await?; + let mut got_par: Vec = store + .stream_irises_par(Some(0), 2) + .await + .try_collect() + .await?; got_par.sort_by_key(|iris| iris.id); assert_eq!(got, got_par); @@ -623,7 +639,7 @@ mod tests { // Compare with the parallel version with several edge-cases. for parallelism in [1, 5, MAX_CONNECTIONS as usize + 1] { let mut got_par: Vec = store - .stream_irises_par(0, parallelism) + .stream_irises_par(Some(0), parallelism) .await .try_collect() .await?; diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 67f64611c..693a9d0ca 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -983,22 +983,38 @@ async fn server_main(config: Config) -> eyre::Result<()> { ); let s3_store = S3Store::new(s3_client_clone, db_chunks_bucket_name); tokio::runtime::Handle::current().block_on(async { - // First fetch last snapshot from S3 - let last_snapshot_timestamp = last_snapshot_timestamp(&s3_store).await?; - let min_last_modified_at = - last_snapshot_timestamp - config.db_load_safety_overlap_seconds; - let stream_s3 = fetch_and_parse_chunks(&s3_store, load_chunks_parallelism) - .await - .map(|result| result.map(IrisSource::S3)) - .boxed(); - - let stream_db = store - .stream_irises_par(min_last_modified_at, parallelism) - .await - .map(|result| result.map(IrisSource::DB)) - .boxed(); - - let mut stream = select_all(vec![stream_s3, stream_db]); + let mut stream = match config.enable_s3_importer { + true => { + tracing::info!("S3 importer enabled. Fetching from s3 + db"); + // First fetch last snapshot from S3 + let last_snapshot_timestamp = + last_snapshot_timestamp(&s3_store).await?; + let min_last_modified_at = + last_snapshot_timestamp - config.db_load_safety_overlap_seconds; + let stream_s3 = + fetch_and_parse_chunks(&s3_store, load_chunks_parallelism) + .await + .map(|result| result.map(IrisSource::S3)) + .boxed(); + + let stream_db = store + .stream_irises_par(Some(min_last_modified_at), parallelism) + .await + .map(|result| result.map(IrisSource::DB)) + .boxed(); + + select_all(vec![stream_s3, stream_db]) + } + false => { + tracing::info!("S3 importer disabled. Fetching only from db"); + let stream_db = store + .stream_irises_par(None, parallelism) + .await + .map(|result| result.map(IrisSource::DB)) + .boxed(); + select_all(vec![stream_db]) + } + }; let now = Instant::now(); let mut now_load_summary = Instant::now(); From 3da4fdd28b7d7114d2737c811916f4d6d36a1822 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Tue, 10 Dec 2024 12:18:18 +0100 Subject: [PATCH 079/170] release v0.12.3 to prod with importer disabled (#781) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 4 ++-- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 4 ++-- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 4 ++-- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 62f8a5ffe..a5f399261 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.1" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.3" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 471a6da20..c4c0d67c7 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -77,8 +77,8 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" -# - name: SMPC__ENABLE_S3_IMPORTER -# value: "false" + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 606902ca5..ef0519f29 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -77,8 +77,8 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" -# - name: SMPC__ENABLE_S3_IMPORTER -# value: "false" + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index cd8c5d791..66a954204 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -77,8 +77,8 @@ env: - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpc.worldcoin.org" -# - name: SMPC__ENABLE_S3_IMPORTER -# value: "false" + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index a2dfde76a..58dd3cf9d 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-stage-eu-north-1" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 5b1397cf4..d5c66effd 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-stage-eu-north-1" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index f93112411..9d57157b6 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-stage-eu-north-1" From 34774860768b9c1f7d09ab54a5d38579f1f7ee76 Mon Sep 17 00:00:00 2001 From: Bryan Gillespie Date: Tue, 10 Dec 2024 12:53:02 -0700 Subject: [PATCH 080/170] Update Hawk Pack dependency and Python bindings (#767) * Update Hawk Pack dependency version New version supports variable ef and M parameters for construction and search. This commit also exposes the generalized functionality to the Python bindings. --------- Co-authored-by: Bryan Gillespie --- Cargo.lock | 13 +++- Cargo.toml | 2 +- iris-mpc-cpu/benches/hnsw.rs | 41 ++---------- iris-mpc-cpu/examples/hnsw-ex.rs | 8 +-- iris-mpc-cpu/src/hawkers/galois_store.rs | 64 ++++++------------- iris-mpc-cpu/src/hawkers/plaintext_store.rs | 21 ++---- iris-mpc-cpu/src/network/grpc.rs | 7 +- iris-mpc-cpu/src/py_bindings/hnsw.rs | 25 ++------ iris-mpc-py/README.md | 4 +- iris-mpc-py/examples-py/test_integration.py | 2 +- .../src/py_hnsw/pyclasses/hawk_searcher.rs | 32 +++++++++- 11 files changed, 87 insertions(+), 132 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f0a2bac70..fee4658d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2205,7 +2205,7 @@ dependencies = [ [[package]] name = "hawk-pack" version = "0.1.0" -source = "git+https://github.com/Inversed-Tech/hawk-pack.git?rev=29e888ed#29e888edfe19cd69e5925fa676ca07d1f64214da" +source = "git+https://github.com/Inversed-Tech/hawk-pack.git?rev=ba995e09#ba995e096116a564a0cc8fc43a6b75b2515c34ff" dependencies = [ "aes-prng 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion", @@ -2214,6 +2214,7 @@ dependencies = [ "futures", "rand", "rand_core", + "rand_distr", "serde", "serde_json", "sqlx", @@ -4157,6 +4158,16 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "raw-cpuid" version = "11.2.0" diff --git a/Cargo.toml b/Cargo.toml index 0385f091f..6e16547f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ bytemuck = { version = "1.17", features = ["derive"] } dotenvy = "0.15" eyre = "0.6" futures = "0.3.30" -hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "29e888ed" } +hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "ba995e09" } hex = "0.4.3" itertools = "0.13" num-traits = "0.2" diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index c988e1bbf..48cea2651 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -1,6 +1,6 @@ use aes_prng::AesRng; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode}; -use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher, VectorStore}; +use hawk_pack::{graph_store::GraphMem, HawkSearcher}; use iris_mpc_common::iris_db::{db::IrisDB, iris::IrisCode}; use iris_mpc_cpu::{ database_generators::{create_random_sharing, generate_galois_iris_shares}, @@ -33,18 +33,8 @@ fn bench_plaintext_hnsw(c: &mut Criterion) { for _ in 0..database_size { let raw_query = IrisCode::random_rng(&mut rng); let query = vector.prepare_query(raw_query.clone()); - let neighbors = searcher - .search_to_insert(&mut vector, &mut graph, &query) - .await; - let inserted = vector.insert(&query).await; searcher - .insert_from_search_results( - &mut vector, - &mut graph, - &mut rng, - inserted, - neighbors, - ) + .insert(&mut vector, &mut graph, &query, &mut rng) .await; } (vector, graph) @@ -58,17 +48,8 @@ fn bench_plaintext_hnsw(c: &mut Criterion) { let mut rng = AesRng::seed_from_u64(0_u64); let on_the_fly_query = IrisDB::new_random_rng(1, &mut rng).db[0].clone(); let query = db_vectors.prepare_query(on_the_fly_query); - let neighbors = searcher - .search_to_insert(&mut db_vectors, &mut graph, &query) - .await; searcher - .insert_from_search_results( - &mut db_vectors, - &mut graph, - &mut rng, - query, - neighbors, - ) + .insert(&mut db_vectors, &mut graph, &query, &mut rng) .await; }, criterion::BatchSize::SmallInput, @@ -220,18 +201,8 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let searcher = searcher.clone(); let mut rng = rng.clone(); jobs.spawn(async move { - let neighbors = searcher - .search_to_insert(&mut vector_store, &mut graph_store, &query) - .await; - let inserted_query = vector_store.insert(&query).await; searcher - .insert_from_search_results( - &mut vector_store, - &mut graph_store, - &mut rng, - inserted_query, - neighbors, - ) + .insert(&mut vector_store, &mut graph_store, &query, &mut rng) .await; }); } @@ -262,9 +233,9 @@ fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let searcher = searcher.clone(); jobs.spawn(async move { let neighbors = searcher - .search_to_insert(&mut vector_store, &mut graph_store, &query) + .search(&mut vector_store, &mut graph_store, &query, 1) .await; - searcher.is_match(&mut vector_store, &neighbors).await; + searcher.is_match(&mut vector_store, &[neighbors]).await; }); } jobs.join_all().await; diff --git a/iris-mpc-cpu/examples/hnsw-ex.rs b/iris-mpc-cpu/examples/hnsw-ex.rs index 041f0118a..71c925028 100644 --- a/iris-mpc-cpu/examples/hnsw-ex.rs +++ b/iris-mpc-cpu/examples/hnsw-ex.rs @@ -1,5 +1,5 @@ use aes_prng::AesRng; -use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher, VectorStore}; +use hawk_pack::{graph_store::GraphMem, HawkSearcher}; use iris_mpc_common::iris_db::iris::IrisCode; use iris_mpc_cpu::hawkers::plaintext_store::PlaintextStore; use rand::SeedableRng; @@ -21,12 +21,8 @@ fn main() { for idx in 0..DATABASE_SIZE { let raw_query = IrisCode::random_rng(&mut rng); let query = vector.prepare_query(raw_query.clone()); - let neighbors = searcher - .search_to_insert(&mut vector, &mut graph, &query) - .await; - let inserted = vector.insert(&query).await; searcher - .insert_from_search_results(&mut vector, &mut graph, &mut rng, inserted, neighbors) + .insert(&mut vector, &mut graph, &query, &mut rng) .await; if idx % 100 == 99 { println!("{}", idx + 1); diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index e2186a324..520570821 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -19,9 +19,9 @@ use crate::{ }; use aes_prng::AesRng; use hawk_pack::{ - graph_store::{graph_mem::Layer, EntryPoint, GraphMem}, - hnsw_db::{FurthestQueue, HawkSearcher}, - GraphStore, VectorStore, + data_structures::queue::FurthestQueue, + graph_store::{graph_mem::Layer, GraphMem}, + GraphStore, HawkSearcher, VectorStore, }; use iris_mpc_common::iris_db::{db::IrisDB, iris::IrisCode}; use rand::{CryptoRng, RngCore, SeedableRng}; @@ -323,10 +323,7 @@ impl LocalNetAby3NgStoreProtocol { recompute_distances: bool, ) -> GraphMem { let ep = graph_store.get_entry_point().await; - let new_ep = ep.map(|ep| EntryPoint { - vector_ref: VectorId { id: ep.vector_ref }, - layer_count: ep.layer_count, - }); + let new_ep = ep.map(|(vector_ref, layer_count)| (VectorId { id: vector_ref }, layer_count)); let layers = graph_store.get_layers(); @@ -476,18 +473,8 @@ impl LocalNetAby3NgStoreProtocol { let searcher = HawkSearcher::default(); // insert queries for query in queries.iter() { - let neighbors = searcher - .search_to_insert(&mut store, &mut graph_store, query) - .await; - let inserted_query = store.insert(query).await; searcher - .insert_from_search_results( - &mut store, - &mut graph_store, - &mut rng_searcher, - inserted_query, - neighbors, - ) + .insert(&mut store, &mut graph_store, query, &mut rng_searcher) .await; } (store, graph_store) @@ -525,7 +512,7 @@ mod tests { use super::*; use crate::database_generators::generate_galois_iris_shares; use aes_prng::AesRng; - use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher}; + use hawk_pack::{graph_store::GraphMem, HawkSearcher}; use itertools::Itertools; use rand::SeedableRng; use tracing_test::traced_test; @@ -559,30 +546,19 @@ mod tests { let mut inserted = vec![]; // insert queries for query in queries.iter() { - let neighbors = db - .search_to_insert(&mut store, &mut aby3_graph, query) + let inserted_vector = db + .insert(&mut store, &mut aby3_graph, query, &mut rng) .await; - let inserted_query = store.insert(query).await; - inserted.push(inserted_query); - db.insert_from_search_results( - &mut store, - &mut aby3_graph, - &mut rng, - inserted_query, - neighbors, - ) - .await; + inserted.push(inserted_vector) } tracing::debug!("FINISHED INSERTING"); // Search for the same codes and find matches. let mut matching_results = vec![]; for v in inserted.into_iter() { let query = store.prepare_query(store.storage.get_vector(&v).clone()); - let neighbors = db - .search_to_insert(&mut store, &mut aby3_graph, &query) - .await; + let neighbors = db.search(&mut store, &mut aby3_graph, &query, 1).await; tracing::debug!("Finished checking query"); - matching_results.push(db.is_match(&mut store, &neighbors).await) + matching_results.push(db.is_match(&mut store, &[neighbors]).await) } matching_results }); @@ -629,11 +605,11 @@ mod tests { for i in 0..database_size { let cleartext_neighbors = hawk_searcher - .search_to_insert(&mut cleartext_data.0, &mut cleartext_data.1, &i.into()) + .search(&mut cleartext_data.0, &mut cleartext_data.1, &i.into(), 1) .await; assert!( hawk_searcher - .is_match(&mut cleartext_data.0, &cleartext_neighbors) + .is_match(&mut cleartext_data.0, &[cleartext_neighbors]) .await, ); @@ -644,9 +620,9 @@ mod tests { let mut g = g.clone(); let q = v.prepare_query(v.storage.get_vector(&i.into()).clone()); jobs.spawn(async move { - let secret_neighbors = hawk_searcher.search_to_insert(&mut v, &mut g, &q).await; + let secret_neighbors = hawk_searcher.search(&mut v, &mut g, &q, 1).await; - hawk_searcher.is_match(&mut v, &secret_neighbors).await + hawk_searcher.is_match(&mut v, &[secret_neighbors]).await }); } let scratch_results = jobs.join_all().await; @@ -658,10 +634,9 @@ mod tests { let mut g = g.clone(); jobs.spawn(async move { let query = v.prepare_query(v.storage.get_vector(&i.into()).clone()); - let secret_neighbors = - hawk_searcher.search_to_insert(&mut v, &mut g, &query).await; + let secret_neighbors = hawk_searcher.search(&mut v, &mut g, &query, 1).await; - hawk_searcher.is_match(&mut v, &secret_neighbors).await + hawk_searcher.is_match(&mut v, &[secret_neighbors]).await }); } let premade_results = jobs.join_all().await; @@ -790,9 +765,8 @@ mod tests { let searcher = searcher.clone(); let q = store.prepare_query(store.storage.get_vector(&i.into()).clone()); jobs.spawn(async move { - let secret_neighbors = - searcher.search_to_insert(&mut store, &mut graph, &q).await; - searcher.is_match(&mut store, &secret_neighbors).await + let secret_neighbors = searcher.search(&mut store, &mut graph, &q, 1).await; + searcher.is_match(&mut store, &[secret_neighbors]).await }); } let res = jobs.join_all().await; diff --git a/iris-mpc-cpu/src/hawkers/plaintext_store.rs b/iris-mpc-cpu/src/hawkers/plaintext_store.rs index 2d0ebd062..2e10301e5 100644 --- a/iris-mpc-cpu/src/hawkers/plaintext_store.rs +++ b/iris-mpc-cpu/src/hawkers/plaintext_store.rs @@ -1,5 +1,5 @@ use aes_prng::AesRng; -use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher, VectorStore}; +use hawk_pack::{graph_store::GraphMem, HawkSearcher, VectorStore}; use iris_mpc_common::iris_db::{ db::IrisDB, iris::{IrisCode, MATCH_THRESHOLD_RATIO}, @@ -152,21 +152,12 @@ impl PlaintextStore { for raw_query in cleartext_database.iter() { let query = plaintext_vector_store.prepare_query(raw_query.clone()); - let neighbors = searcher - .search_to_insert( - &mut plaintext_vector_store, - &mut plaintext_graph_store, - &query, - ) - .await; - let inserted = plaintext_vector_store.insert(&query).await; searcher - .insert_from_search_results( + .insert( &mut plaintext_vector_store, &mut plaintext_graph_store, + &query, &mut rng_searcher1, - inserted, - neighbors, ) .await; } @@ -183,7 +174,7 @@ impl PlaintextStore { mod tests { use super::*; use aes_prng::AesRng; - use hawk_pack::hnsw_db::HawkSearcher; + use hawk_pack::HawkSearcher; use iris_mpc_common::iris_db::db::IrisDB; use rand::SeedableRng; use tracing_test::traced_test; @@ -272,11 +263,11 @@ mod tests { .unwrap(); for i in 0..database_size { let cleartext_neighbors = searcher - .search_to_insert(&mut ptxt_vector, &mut ptxt_graph, &i.into()) + .search(&mut ptxt_vector, &mut ptxt_graph, &i.into(), 1) .await; assert!( searcher - .is_match(&mut ptxt_vector, &cleartext_neighbors) + .is_match(&mut ptxt_vector, &[cleartext_neighbors]) .await, ); } diff --git a/iris-mpc-cpu/src/network/grpc.rs b/iris-mpc-cpu/src/network/grpc.rs index 73abff990..bd3185532 100644 --- a/iris-mpc-cpu/src/network/grpc.rs +++ b/iris-mpc-cpu/src/network/grpc.rs @@ -343,7 +343,7 @@ mod tests { hawkers::galois_store::LocalNetAby3NgStoreProtocol, }; use aes_prng::AesRng; - use hawk_pack::hnsw_db::HawkSearcher; + use hawk_pack::HawkSearcher; use rand::SeedableRng; use tokio::task::JoinSet; use tracing_test::traced_test; @@ -587,9 +587,8 @@ mod tests { let searcher = searcher.clone(); let q = store.prepare_query(store.storage.get_vector(&i.into()).clone()); jobs.spawn(async move { - let secret_neighbors = - searcher.search_to_insert(&mut store, &mut graph, &q).await; - searcher.is_match(&mut store, &secret_neighbors).await + let secret_neighbors = searcher.search(&mut store, &mut graph, &q, 1).await; + searcher.is_match(&mut store, &[secret_neighbors]).await }); } let res = jobs.join_all().await; diff --git a/iris-mpc-cpu/src/py_bindings/hnsw.rs b/iris-mpc-cpu/src/py_bindings/hnsw.rs index e57c85ff0..471de784a 100644 --- a/iris-mpc-cpu/src/py_bindings/hnsw.rs +++ b/iris-mpc-cpu/src/py_bindings/hnsw.rs @@ -1,6 +1,6 @@ use super::plaintext_store::Base64IrisCode; use crate::hawkers::plaintext_store::{PlaintextStore, PointId}; -use hawk_pack::{graph_store::GraphMem, hnsw_db::HawkSearcher, VectorStore}; +use hawk_pack::{graph_store::GraphMem, HawkSearcher}; use iris_mpc_common::iris_db::iris::IrisCode; use rand::rngs::ThreadRng; use serde_json::{self, Deserializer}; @@ -19,8 +19,8 @@ pub fn search( rt.block_on(async move { let query = vector.prepare_query(query); - let neighbors = searcher.search_to_insert(vector, graph, &query).await; - let (nearest, (dist_num, dist_denom)) = neighbors[0].get_nearest().unwrap(); + let neighbors = searcher.search(vector, graph, &query, 1).await; + let (nearest, (dist_num, dist_denom)) = neighbors.get_nearest().unwrap(); (*nearest, (*dist_num as f64) / (*dist_denom as f64)) }) } @@ -41,12 +41,7 @@ pub fn insert( let mut rng = ThreadRng::default(); let query = vector.prepare_query(iris); - let neighbors = searcher.search_to_insert(vector, graph, &query).await; - let inserted = vector.insert(&query).await; - searcher - .insert_from_search_results(vector, graph, &mut rng, inserted, neighbors) - .await; - inserted + searcher.insert(vector, graph, &query, &mut rng).await }) } @@ -78,11 +73,7 @@ pub fn fill_uniform_random( for idx in 0..num { let raw_query = IrisCode::random_rng(&mut rng); let query = vector.prepare_query(raw_query.clone()); - let neighbors = searcher.search_to_insert(vector, graph, &query).await; - let inserted = vector.insert(&query).await; - searcher - .insert_from_search_results(vector, graph, &mut rng, inserted, neighbors) - .await; + searcher.insert(vector, graph, &query, &mut rng).await; if idx % 100 == 99 { println!("{}", idx + 1); } @@ -116,11 +107,7 @@ pub fn fill_from_ndjson_file( for json_pt in stream { let raw_query = (&json_pt.unwrap()).into(); let query = vector.prepare_query(raw_query); - let neighbors = searcher.search_to_insert(vector, graph, &query).await; - let inserted = vector.insert(&query).await; - searcher - .insert_from_search_results(vector, graph, &mut rng, inserted, neighbors) - .await; + searcher.insert(vector, graph, &query, &mut rng).await; } }) } diff --git a/iris-mpc-py/README.md b/iris-mpc-py/README.md index e80736956..aea78cecb 100644 --- a/iris-mpc-py/README.md +++ b/iris-mpc-py/README.md @@ -23,7 +23,7 @@ Once successfully installed, the native rust module `iris_mpc_py` can be importe ```python from iris_mpc_py import PyHawkSearcher, PyPlaintextStore, PyGraphStore, PyIrisCode -hnsw = PyHawkSearcher.new_uniform(32, 32) # M, ef +hnsw = PyHawkSearcher(32, 64, 32) # M, ef_constr, ef_search vector = PyPlaintextStore() graph = PyGraphStore() @@ -61,7 +61,7 @@ graph = PyGraphStore.read_from_bin("graph.dat") Second, to construct an HNSW index dynamically from streamed database entries: ```python -hnsw = PyHawkSearcher.new_uniform(32, 32) +hnsw = PyHawkSearcher(32, 64, 32) vector = PyPlaintextStore() graph = PyGraphStore() hnsw.fill_from_ndjson_file("large_vector_database.ndjson", vector, graph, 10000) diff --git a/iris-mpc-py/examples-py/test_integration.py b/iris-mpc-py/examples-py/test_integration.py index 945069af4..d22bad8ee 100644 --- a/iris-mpc-py/examples-py/test_integration.py +++ b/iris-mpc-py/examples-py/test_integration.py @@ -12,7 +12,7 @@ vector_init.write_to_ndjson("vector.ndjson") print("Generating HNSW graphs for 10k imported iris codes...") -hnsw = PyHawkSearcher.new_uniform(32, 32) +hnsw = PyHawkSearcher(32, 64, 32) vector1 = PyPlaintextStore() graph1 = PyGraphStore() hnsw.fill_from_ndjson_file("vector.ndjson", vector1, graph1, 10000) diff --git a/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs b/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs index 05fb346ee..1d154346a 100644 --- a/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs +++ b/iris-mpc-py/src/py_hnsw/pyclasses/hawk_searcher.rs @@ -1,5 +1,8 @@ use super::{graph_store::PyGraphStore, iris_code::PyIrisCode, plaintext_store::PyPlaintextStore}; -use hawk_pack::hnsw_db::{HawkSearcher, Params}; +use hawk_pack::{ + hawk_searcher::{HawkerParams, N_PARAM_LAYERS}, + HawkSearcher, +}; use iris_mpc_cpu::py_bindings; use pyo3::{exceptions::PyIOError, prelude::*}; @@ -17,13 +20,36 @@ impl PyHawkSearcher { #[staticmethod] pub fn new_standard(M: usize, ef_constr: usize, ef_search: usize) -> Self { - let params = Params::new_standard(ef_constr, ef_search, M); + let params = HawkerParams::new(ef_constr, ef_search, M); Self(HawkSearcher { params }) } #[staticmethod] pub fn new_uniform(M: usize, ef: usize) -> Self { - let params = Params::new_uniform(ef, M); + let params = HawkerParams::new_uniform(ef, M); + Self(HawkSearcher { params }) + } + + /// Construct `HawkSearcher` with fully general parameters, specifying the + /// values of various parameters used during construction and search at + /// different levels of the graph hierarchy. + #[staticmethod] + pub fn new_general( + M: [usize; N_PARAM_LAYERS], + M_max: [usize; N_PARAM_LAYERS], + ef_constr_search: [usize; N_PARAM_LAYERS], + ef_constr_insert: [usize; N_PARAM_LAYERS], + ef_search: [usize; N_PARAM_LAYERS], + layer_probability: f64, + ) -> Self { + let params = HawkerParams { + M, + M_max, + ef_constr_search, + ef_constr_insert, + ef_search, + layer_probability, + }; Self(HawkSearcher { params }) } From 05e4d3f53d91cc46c8d2f9c49aa684662a45a5f6 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 11 Dec 2024 01:58:10 -0800 Subject: [PATCH 081/170] fix: also ignore supermatchers for in-memory (#782) --- iris-mpc-gpu/src/server/actor.rs | 57 +++++++++++++++----------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 3ceb6318f..16889ca56 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -68,6 +68,7 @@ impl ServerActorHandle { const DB_CHUNK_SIZE: usize = 1 << 14; const KDF_SALT: &str = "111a1a93518f670e9bb0c2c68888e2beb9406d4c4ed571dc77b801e676ae3091"; // Random 32 byte salt +const SUPERMATCH_THRESHOLD: usize = 4_000; pub struct ServerActor { job_queue: mpsc::Receiver, @@ -695,30 +696,6 @@ impl ServerActor { // Truncate the results to the batch size host_results.iter_mut().for_each(|x| x.truncate(batch_size)); - // Evaluate the results across devices - // Format: merged_results[query_index] - let mut merged_results = - get_merged_results(&host_results, self.device_manager.device_count()); - - // List the indices of the queries that did not match. - let insertion_list = merged_results - .iter() - .enumerate() - .filter(|&(_idx, &num)| num == NON_MATCH_ID) - .map(|(idx, _num)| idx) - .collect::>(); - - // Spread the insertions across devices. - let insertion_list = distribute_insertions(&insertion_list, &self.current_db_sizes); - - // Calculate the new indices for the inserted queries - let mut matches = calculate_insertion_indices( - &mut merged_results, - &insertion_list, - &self.current_db_sizes, - batch_size, - ); - // Fetch and truncate the match counters let match_counters_devices = self .distance_comparator @@ -757,13 +734,31 @@ impl ServerActor { } } - // Check for supermatchers for v1 compatibility and mark them as non-unique - const SUPERMATCH_THRESHOLD: usize = 4_000; - for i in 0..batch_size { - if match_counters[i] > SUPERMATCH_THRESHOLD { - matches[i] = true; - } - } + // Evaluate the results across devices + // Format: merged_results[query_index] + let mut merged_results = + get_merged_results(&host_results, self.device_manager.device_count()); + + // List the indices of the queries that did not match. + let insertion_list = merged_results + .iter() + .enumerate() + .filter(|&(idx, &num)| { + num == NON_MATCH_ID && match_counters[idx] <= SUPERMATCH_THRESHOLD + }) + .map(|(idx, _num)| idx) + .collect::>(); + + // Spread the insertions across devices. + let insertion_list = distribute_insertions(&insertion_list, &self.current_db_sizes); + + // Calculate the new indices for the inserted queries + let matches = calculate_insertion_indices( + &mut merged_results, + &insertion_list, + &self.current_db_sizes, + batch_size, + ); // Fetch the partial matches let (partial_match_ids_left, partial_match_ids_right) = if self.return_partial_results { From ca5558f522ff834a96673442e06c925c0ce9044e Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 11 Dec 2024 11:10:40 +0100 Subject: [PATCH 082/170] Add log statement for printing the batch query in order (#783) --- iris-mpc/src/bin/server.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 693a9d0ca..cf60621eb 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -449,6 +449,8 @@ async fn receive_batch( batch_query.query_right.mask.extend(mask_shares_right); } + tracing::info!("batch signups ids in order: {:?}", batch_query.request_ids); + // Preprocess query shares here already to avoid blocking the actor batch_query.query_left_preprocessed = BatchQueryEntriesPreprocessed::from(batch_query.query_left.clone()); From 7693ae91b5f17bb770f384649656dfc53dbb7e08 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 11 Dec 2024 11:10:53 +0100 Subject: [PATCH 083/170] Change importer to work with binary (#779) * add timing around get_object and parse csv * reduce terminationGracePeriodSeconds * bump image * convert to binary * treat id as u32 * change folder name * deal only with specified folder * use 8k * set parallelism to 8 * run 2k with 16 par * use 8 par * set to 2k + 32 par * switch to bytes * rebase * bump image * test config * test config * generate chunk names based on timestamp file * use 1k chunk in stage 0 * use 512 chunk in stage 0 * fix test * remove left overs * fmt * use IRIS_CODE_LENGTH and MASK_CODE_LENGTH instead of const --- .../workflows/temp-branch-build-and-push.yaml | 47 ++++ deploy/stage/common-values-iris-mpc.yaml | 4 +- .../stage/smpcv2-0-stage/values-iris-mpc.yaml | 6 + .../stage/smpcv2-1-stage/values-iris-mpc.yaml | 6 + .../stage/smpcv2-2-stage/values-iris-mpc.yaml | 6 + iris-mpc-common/src/config/mod.rs | 3 + iris-mpc-store/src/lib.rs | 40 ++++ iris-mpc-store/src/s3_importer.rs | 212 +++++++++--------- iris-mpc/src/bin/server.rs | 31 ++- 9 files changed, 242 insertions(+), 113 deletions(-) create mode 100644 .github/workflows/temp-branch-build-and-push.yaml diff --git a/.github/workflows/temp-branch-build-and-push.yaml b/.github/workflows/temp-branch-build-and-push.yaml new file mode 100644 index 000000000..461118538 --- /dev/null +++ b/.github/workflows/temp-branch-build-and-push.yaml @@ -0,0 +1,47 @@ +name: Branch - Build and push docker image + +on: + push: + branches: + - "ertugrul/change-to-binary" + +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + cancel-in-progress: true + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + docker: + runs-on: + labels: ubuntu-22.04-64core + permissions: + packages: write + contents: read + attestations: write + id-token: write + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and Push + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} + platforms: linux/amd64 + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 58c5a22b2..cdaa36dab 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.3" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.4" environment: stage replicaCount: 1 @@ -82,4 +82,4 @@ preStop: sleepPeriod: 10 # terminationGracePeriodSeconds specifies the grace time between SIGTERM and SIGKILL -terminationGracePeriodSeconds: 180 # 3x SMPC__PROCESSING_TIMEOUT_SECS +terminationGracePeriodSeconds: 20 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 58dd3cf9d..79f2885a9 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -74,6 +74,12 @@ env: - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-stage-eu-north-1" + - name: SMPC__DB_CHUNKS_FOLDER_NAME + value: "binary_output_2k" + + - name: SMPC__LOAD_CHUNKS_PARALLELISM + value: "32" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index d5c66effd..902c69a54 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -74,6 +74,12 @@ env: - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-stage-eu-north-1" + - name: SMPC__DB_CHUNKS_FOLDER_NAME + value: "binary_output_2k" + + - name: SMPC__LOAD_CHUNKS_PARALLELISM + value: "32" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 9d57157b6..2f497bb7c 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -74,6 +74,12 @@ env: - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-stage-eu-north-1" + - name: SMPC__DB_CHUNKS_FOLDER_NAME + value: "binary_output_2k" + + - name: SMPC__LOAD_CHUNKS_PARALLELISM + value: "32" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index f1bf0e2ae..666884068 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -102,6 +102,9 @@ pub struct Config { /// updated during the DB export to S3 #[serde(default = "default_db_load_safety_overlap_seconds")] pub db_load_safety_overlap_seconds: i64, + + #[serde(default)] + pub db_chunks_folder_name: String, } fn default_load_chunks_parallelism() -> usize { diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index 32e62705e..ee3792448 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(int_roundings)] + mod s3_importer; use bytemuck::cast_slice; @@ -10,6 +12,7 @@ use iris_mpc_common::{ config::Config, galois_engine::degree4::{GaloisRingIrisCodeShare, GaloisRingTrimmedMaskCodeShare}, iris_db::iris::IrisCode, + IRIS_CODE_LENGTH, MASK_CODE_LENGTH, }; use rand::{rngs::StdRng, Rng, SeedableRng}; pub use s3_importer::{fetch_and_parse_chunks, last_snapshot_timestamp, ObjectStore, S3Store}; @@ -74,6 +77,43 @@ impl StoredIris { pub fn id(&self) -> i64 { self.id } + + pub fn from_bytes(bytes: &[u8]) -> Result { + let mut cursor = 0; + + // Helper closure to extract a slice of a given size + let extract_slice = + |bytes: &[u8], cursor: &mut usize, size: usize| -> Result, eyre::Error> { + if *cursor + size > bytes.len() { + return Err(eyre!("Exceeded total bytes while extracting slice",)); + } + let slice = &bytes[*cursor..*cursor + size]; + *cursor += size; + Ok(slice.to_vec()) + }; + + // Parse `id` (i64) + let id_bytes = extract_slice(bytes, &mut cursor, 4)?; + let id = u32::from_be_bytes( + id_bytes + .try_into() + .map_err(|_| eyre!("Failed to convert id bytes to i64"))?, + ) as i64; + + // parse codes and masks + let left_code = extract_slice(bytes, &mut cursor, IRIS_CODE_LENGTH * size_of::())?; + let left_mask = extract_slice(bytes, &mut cursor, MASK_CODE_LENGTH * size_of::())?; + let right_code = extract_slice(bytes, &mut cursor, IRIS_CODE_LENGTH * size_of::())?; + let right_mask = extract_slice(bytes, &mut cursor, MASK_CODE_LENGTH * size_of::())?; + + Ok(StoredIris { + id, + left_code, + left_mask, + right_code, + right_mask, + }) + } } #[derive(Clone)] diff --git a/iris-mpc-store/src/s3_importer.rs b/iris-mpc-store/src/s3_importer.rs index 0ec5f4ab8..896b4af64 100644 --- a/iris-mpc-store/src/s3_importer.rs +++ b/iris-mpc-store/src/s3_importer.rs @@ -4,20 +4,17 @@ use aws_sdk_s3::Client; use bytes::Bytes; use futures::{stream, Stream, StreamExt}; use iris_mpc_common::{IRIS_CODE_LENGTH, MASK_CODE_LENGTH}; -use rayon::{iter::ParallelIterator, prelude::ParallelBridge}; -use serde::Deserialize; -use std::{io::Cursor, mem, pin::Pin, sync::Arc}; +use std::{mem, pin::Pin, sync::Arc, time::Instant}; use tokio::task; const SINGLE_ELEMENT_SIZE: usize = IRIS_CODE_LENGTH * mem::size_of::() * 2 + MASK_CODE_LENGTH * mem::size_of::() * 2 + mem::size_of::(); // 75 KB -const CSV_BUFFER_CAPACITY: usize = SINGLE_ELEMENT_SIZE * 10; #[async_trait] pub trait ObjectStore: Send + Sync + 'static { async fn get_object(&self, key: &str) -> eyre::Result; - async fn list_objects(&self) -> eyre::Result>; + async fn list_objects(&self, prefix: &str) -> eyre::Result>; } pub struct S3Store { @@ -46,12 +43,16 @@ impl ObjectStore for S3Store { Ok(data.into_bytes()) } - async fn list_objects(&self) -> eyre::Result> { + async fn list_objects(&self, prefix: &str) -> eyre::Result> { let mut objects = Vec::new(); let mut continuation_token = None; loop { - let mut request = self.client.list_objects_v2().bucket(&self.bucket); + let mut request = self + .client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(prefix); if let Some(token) = continuation_token { request = request.continuation_token(token); @@ -76,101 +77,98 @@ impl ObjectStore for S3Store { } } -#[derive(Debug, Deserialize)] -struct CsvIrisRecord { - id: String, - left_code: String, - left_mask: String, - right_code: String, - right_mask: String, +#[derive(Debug)] +pub struct LastSnapshotDetails { + pub timestamp: i64, + pub last_serial_id: i64, + pub chunk_size: i64, } -fn hex_to_bytes(hex: &str, byte_len: usize) -> eyre::Result> { - if hex.is_empty() { - return Ok(vec![]); +impl LastSnapshotDetails { + // Parse last snapshot from s3 file name. + // It is in {unixTime}_{batchSize}_{lastSerialId} format. + pub fn new_from_str(last_snapshot_str: &str) -> Option { + let parts: Vec<&str> = last_snapshot_str.split('_').collect(); + match parts.len() { + 3 => Some(Self { + timestamp: parts[0].parse().unwrap(), + chunk_size: parts[1].parse().unwrap(), + last_serial_id: parts[2].parse().unwrap(), + }), + _ => { + tracing::warn!("Invalid export timestamp file name: {}", last_snapshot_str); + None + } + } } - let mut bytes = vec![0; byte_len]; - hex::decode_to_slice(hex, &mut bytes)?; - Ok(bytes) } -pub async fn last_snapshot_timestamp(store: &impl ObjectStore) -> eyre::Result { +pub async fn last_snapshot_timestamp( + store: &impl ObjectStore, + prefix_name: String, +) -> eyre::Result { + tracing::info!("Looking for last snapshot time in prefix: {}", prefix_name); + let timestamps_path = format!("{}/timestamps/", prefix_name); store - .list_objects() + .list_objects(timestamps_path.as_str()) .await? .into_iter() - .filter(|f| f.starts_with("output/") && f.ends_with(".timestamp")) - .filter_map(|f| { - f.replace(".timestamp", "") - .replace("output/", "") - .parse::() - .ok() + .filter_map(|f| match f.split('/').last() { + Some(file_name) => LastSnapshotDetails::new_from_str(file_name), + _ => None, }) - .max() + .max_by_key(|s| s.timestamp) .ok_or_else(|| eyre::eyre!("No snapshot found")) } pub async fn fetch_and_parse_chunks( store: &impl ObjectStore, concurrency: usize, + prefix_name: String, + last_snapshot_details: LastSnapshotDetails, ) -> Pin> + Send + '_>> { - let chunks = store.list_objects().await.unwrap(); - stream::iter(chunks) - .filter_map(|chunk| async move { - if chunk.ends_with(".csv") { - tracing::info!("Processing chunk: {}", chunk); - Some(chunk) - } else { - None - } - }) + tracing::info!("Generating chunk files using: {:?}", last_snapshot_details); + let chunks: Vec = (1..=last_snapshot_details.last_serial_id) + .step_by(last_snapshot_details.chunk_size as usize) + .map(|num| format!("{}/{}.bin", prefix_name, num)) + .collect(); + tracing::info!("Generated {} chunk names", chunks.len()); + + let result_stream = stream::iter(chunks) .map(move |chunk| async move { + let mut now = Instant::now(); let result = store.get_object(&chunk).await?; - task::spawn_blocking(move || { - let cursor = Cursor::new(result); - let reader = csv::ReaderBuilder::new() - .has_headers(true) - .buffer_capacity(CSV_BUFFER_CAPACITY) - .from_reader(cursor); - - let records: Vec> = reader - .into_deserialize() - .par_bridge() - .map(|r: Result| { - let raw = r.map_err(|e| eyre::eyre!("CSV parse error: {}", e))?; - - Ok(StoredIris { - id: raw.id.parse()?, - left_code: hex_to_bytes( - &raw.left_code, - IRIS_CODE_LENGTH * mem::size_of::(), - )?, - left_mask: hex_to_bytes( - &raw.left_mask, - MASK_CODE_LENGTH * mem::size_of::(), - )?, - right_code: hex_to_bytes( - &raw.right_code, - IRIS_CODE_LENGTH * mem::size_of::(), - )?, - right_mask: hex_to_bytes( - &raw.right_mask, - MASK_CODE_LENGTH * mem::size_of::(), - )?, - }) - }) - .collect(); + let get_object_time = now.elapsed(); + tracing::info!("Got chunk object: {} in {:?}", chunk, get_object_time,); + + now = Instant::now(); + let task = task::spawn_blocking(move || { + let n_records = result.len().div_floor(SINGLE_ELEMENT_SIZE); + + let mut records = Vec::with_capacity(n_records); + for i in 0..n_records { + let start = i * SINGLE_ELEMENT_SIZE; + let end = (i + 1) * SINGLE_ELEMENT_SIZE; + let chunk = &result[start..end]; + let iris = StoredIris::from_bytes(chunk); + records.push(iris); + } Ok::<_, eyre::Error>(stream::iter(records)) }) - .await? + .await?; + let parse_time = now.elapsed(); + tracing::info!("Parsed chunk: {} in {:?}", chunk, parse_time,); + task }) .buffer_unordered(concurrency) .flat_map(|result| match result { Ok(stream) => stream.boxed(), Err(e) => stream::once(async move { Err(e) }).boxed(), }) - .boxed() + .boxed(); + + result_stream } #[cfg(test)] @@ -189,27 +187,20 @@ mod tests { Self::default() } + pub fn add_timestamp_file(&mut self, key: &str) { + self.objects.insert(key.to_string(), Vec::new()); + } + pub fn add_test_data(&mut self, key: &str, records: Vec) { - let mut csv = Vec::new(); - { - let mut writer = csv::Writer::from_writer(&mut csv); - writer - .write_record(["id", "left_code", "left_mask", "right_code", "right_mask"]) - .unwrap(); - - for record in records { - writer - .write_record(&[ - record.id.to_string(), - hex::encode(record.left_code), - hex::encode(record.left_mask), - hex::encode(record.right_code), - hex::encode(record.right_mask), - ]) - .unwrap(); - } + let mut result = Vec::new(); + for record in records { + result.extend_from_slice(&(record.id as u32).to_be_bytes()); + result.extend_from_slice(&record.left_code); + result.extend_from_slice(&record.left_mask); + result.extend_from_slice(&record.right_code); + result.extend_from_slice(&record.right_mask); } - self.objects.insert(key.to_string(), csv); + self.objects.insert(key.to_string(), result); } } @@ -223,7 +214,7 @@ mod tests { .ok_or_else(|| eyre::eyre!("Object not found: {}", key)) } - async fn list_objects(&self) -> eyre::Result> { + async fn list_objects(&self, _: &str) -> eyre::Result> { Ok(self.objects.keys().cloned().collect()) } } @@ -245,6 +236,21 @@ mod tests { } } + #[tokio::test] + async fn test_last_snapshot_timestamp() { + let mut store = MockStore::new(); + store.add_timestamp_file("out/timestamps/123_100_954"); + store.add_timestamp_file("out/timestamps/124_100_958"); + store.add_timestamp_file("out/timestamps/125_100_958"); + + let last_snapshot = last_snapshot_timestamp(&store, "out".to_string()) + .await + .unwrap(); + assert_eq!(last_snapshot.timestamp, 125); + assert_eq!(last_snapshot.last_serial_id, 958); + assert_eq!(last_snapshot.chunk_size, 100); + } + #[tokio::test] async fn test_fetch_and_parse_chunks() { const MOCK_ENTRIES: usize = 107; @@ -255,17 +261,19 @@ mod tests { let start_serial_id = i * MOCK_CHUNK_SIZE + 1; let end_serial_id = min((i + 1) * MOCK_CHUNK_SIZE, MOCK_ENTRIES); store.add_test_data( - &format!("{start_serial_id}.csv"), + &format!("out/{start_serial_id}.bin"), (start_serial_id..=end_serial_id).map(dummy_entry).collect(), ); } - assert_eq!( - store.list_objects().await.unwrap().len(), - MOCK_ENTRIES.div_ceil(MOCK_CHUNK_SIZE) - ); - - let mut chunks = fetch_and_parse_chunks(&store, 1).await; + assert_eq!(store.list_objects("").await.unwrap().len(), n_chunks); + let last_snapshot_details = LastSnapshotDetails { + timestamp: 0, + last_serial_id: MOCK_ENTRIES as i64, + chunk_size: MOCK_CHUNK_SIZE as i64, + }; + let mut chunks = + fetch_and_parse_chunks(&store, 1, "out".to_string(), last_snapshot_details).await; let mut count = 0; let mut ids: HashSet = HashSet::from_iter(1..MOCK_ENTRIES); while let Some(chunk) = chunks.next().await { diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index cf60621eb..6b7208ccf 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -905,6 +905,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { let load_chunks_parallelism = config.load_chunks_parallelism; let db_chunks_bucket_name = config.db_chunks_bucket_name.clone(); + let db_chunks_folder_name = config.db_chunks_folder_name.clone(); let (tx, rx) = oneshot::channel(); background_tasks.spawn_blocking(move || { @@ -989,15 +990,27 @@ async fn server_main(config: Config) -> eyre::Result<()> { true => { tracing::info!("S3 importer enabled. Fetching from s3 + db"); // First fetch last snapshot from S3 - let last_snapshot_timestamp = - last_snapshot_timestamp(&s3_store).await?; - let min_last_modified_at = - last_snapshot_timestamp - config.db_load_safety_overlap_seconds; - let stream_s3 = - fetch_and_parse_chunks(&s3_store, load_chunks_parallelism) - .await - .map(|result| result.map(IrisSource::S3)) - .boxed(); + let last_snapshot_details = last_snapshot_timestamp( + &s3_store, + db_chunks_folder_name.clone(), + ) + .await?; + let min_last_modified_at = last_snapshot_details.timestamp + - config.db_load_safety_overlap_seconds; + tracing::info!( + "Last snapshot timestamp: {}, min_last_modified_at: {}", + last_snapshot_details.timestamp, + min_last_modified_at + ); + let stream_s3 = fetch_and_parse_chunks( + &s3_store, + load_chunks_parallelism, + db_chunks_folder_name, + last_snapshot_details, + ) + .await + .map(|result| result.map(IrisSource::S3)) + .boxed(); let stream_db = store .stream_irises_par(Some(min_last_modified_at), parallelism) From b80c8b064d2ad0eb80086eba05d6d872959b1341 Mon Sep 17 00:00:00 2001 From: iliailia Date: Wed, 11 Dec 2024 11:30:43 +0100 Subject: [PATCH 084/170] Generate and read benchmark data from files (#778) --- iris-mpc-cpu/.gitignore | 1 + iris-mpc-cpu/Cargo.toml | 6 +- iris-mpc-cpu/benches/hnsw.rs | 26 ++++-- iris-mpc-cpu/bin/generate_benchmark_data.rs | 26 ++++++ iris-mpc-cpu/src/hawkers/galois_store.rs | 88 +++++++++++++++++++-- iris-mpc-cpu/src/hawkers/plaintext_store.rs | 50 ++++++++++-- 6 files changed, 178 insertions(+), 19 deletions(-) create mode 100644 iris-mpc-cpu/.gitignore create mode 100644 iris-mpc-cpu/bin/generate_benchmark_data.rs diff --git a/iris-mpc-cpu/.gitignore b/iris-mpc-cpu/.gitignore new file mode 100644 index 000000000..249cda967 --- /dev/null +++ b/iris-mpc-cpu/.gitignore @@ -0,0 +1 @@ +/data \ No newline at end of file diff --git a/iris-mpc-cpu/Cargo.toml b/iris-mpc-cpu/Cargo.toml index 05292c77d..6940012ac 100644 --- a/iris-mpc-cpu/Cargo.toml +++ b/iris-mpc-cpu/Cargo.toml @@ -53,4 +53,8 @@ name = "hnsw-ex" [[bin]] name = "local_hnsw" -path = "bin/local_hnsw.rs" \ No newline at end of file +path = "bin/local_hnsw.rs" + +[[bin]] +name = "generate_benchmark_data" +path = "bin/generate_benchmark_data.rs" \ No newline at end of file diff --git a/iris-mpc-cpu/benches/hnsw.rs b/iris-mpc-cpu/benches/hnsw.rs index 48cea2651..62be58814 100644 --- a/iris-mpc-cpu/benches/hnsw.rs +++ b/iris-mpc-cpu/benches/hnsw.rs @@ -162,23 +162,39 @@ fn bench_gr_primitives(c: &mut Criterion) { }); } +/// To run this benchmark, you need to generate the data first by running the +/// following commands: +/// +/// cargo run --release --bin generate_benchmark_data fn bench_gr_ready_made_hnsw(c: &mut Criterion) { let mut group = c.benchmark_group("gr_ready_made_hnsw"); group.sample_size(10); - for database_size in [1, 10, 100, 1000, 10000, 100000] { + for database_size in [1, 10, 100, 1000, 10_000, 100_000] { let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .build() .unwrap(); - let (_, secret_searcher) = rt.block_on(async move { + let secret_searcher = rt.block_on(async move { let mut rng = AesRng::seed_from_u64(0_u64); - LocalNetAby3NgStoreProtocol::lazy_random_setup_with_grpc(&mut rng, database_size, false) - .await - .unwrap() + LocalNetAby3NgStoreProtocol::lazy_setup_from_files_with_grpc( + "./data/store.ndjson", + &format!("./data/graph_{}.dat", database_size), + &mut rng, + database_size, + false, + ) + .await }); + if let Err(e) = secret_searcher { + eprintln!("bench_gr_ready_made_hnsw failed. {e:?}"); + rt.shutdown_timeout(std::time::Duration::from_secs(5)); + return; + } + let (_, secret_searcher) = secret_searcher.unwrap(); + group.bench_function( BenchmarkId::new("gr-big-hnsw-insertions", database_size), |b| { diff --git a/iris-mpc-cpu/bin/generate_benchmark_data.rs b/iris-mpc-cpu/bin/generate_benchmark_data.rs new file mode 100644 index 000000000..f89c285b6 --- /dev/null +++ b/iris-mpc-cpu/bin/generate_benchmark_data.rs @@ -0,0 +1,26 @@ +use aes_prng::AesRng; +use iris_mpc_cpu::{ + hawkers::plaintext_store::PlaintextStore, + py_bindings::{io::write_bin, plaintext_store::to_ndjson_file}, +}; +use rand::SeedableRng; +use std::error::Error; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // create a folder ./iris-mpc-cpu/data if it is non-existent + let crate_root = env!("CARGO_MANIFEST_DIR"); + std::fs::create_dir_all(format!("{crate_root}/data"))?; + let mut rng = AesRng::seed_from_u64(0_u64); + println!("Generating plaintext store with 100_000 irises"); + let mut store = PlaintextStore::create_random_store(&mut rng, 100_000).await?; + println!("Writing store to file"); + to_ndjson_file(&store, &format!("{crate_root}/data/store.ndjson"))?; + + for graph_size in [1, 10, 100, 1000, 10_000, 100_000] { + println!("Generating graph with {} vertices", graph_size); + let graph = store.create_graph(&mut rng, graph_size).await?; + write_bin(&graph, &format!("{crate_root}/data/graph_{graph_size}.dat"))?; + } + Ok(()) +} diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index 520570821..9fc15c281 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -12,6 +12,7 @@ use crate::{ batch_signed_lift_vec, compare_threshold_and_open, cross_compare, galois_ring_pairwise_distance, galois_ring_to_rep3, }, + py_bindings::{io::read_bin, plaintext_store::from_ndjson_file}, shares::{ ring_impl::RingElement, share::{DistanceShare, Share}, @@ -23,7 +24,7 @@ use hawk_pack::{ graph_store::{graph_mem::Layer, GraphMem}, GraphStore, HawkSearcher, VectorStore, }; -use iris_mpc_common::iris_db::{db::IrisDB, iris::IrisCode}; +use iris_mpc_common::iris_db::db::IrisDB; use rand::{CryptoRng, RngCore, SeedableRng}; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, fmt::Debug, sync::Arc, vec}; @@ -113,15 +114,15 @@ pub fn setup_local_player_preloaded_db( pub async fn setup_local_aby3_players_with_preloaded_db( rng: &mut R, - database: Vec, + plain_store: &PlaintextStore, network_t: NetworkType, ) -> eyre::Result> { let identities = generate_local_identities(); let mut shared_irises = vec![vec![]; identities.len()]; - for iris in database { - let all_shares = generate_galois_iris_shares(rng, iris); + for iris in plain_store.points.iter() { + let all_shares = generate_galois_iris_shares(rng, iris.data.0.clone()); for (i, shares) in all_shares.iter().enumerate() { shared_irises[i].push(shares.clone()); } @@ -359,6 +360,80 @@ impl LocalNetAby3NgStoreProtocol { } impl LocalNetAby3NgStoreProtocol { + /// Generates 3 pairs of vector stores and graphs from a plaintext + /// vector store and graph read from disk, which are returned as well. + /// The network type is specified by the user. + /// A recompute flag is used to determine whether to recompute the distances + /// from stored shares. If recompute is set to false, the distances are + /// naively converted from plaintext. + pub async fn lazy_setup_from_files( + plainstore_file: &str, + plaingraph_file: &str, + rng: &mut R, + database_size: usize, + network_t: NetworkType, + recompute_distances: bool, + ) -> eyre::Result<( + (PlaintextStore, GraphMem), + Vec<(Self, GraphMem)>, + )> { + if database_size > 100_000 { + return Err(eyre::eyre!("Database size too large, max. 100,000")); + } + let generation_comment = "Please, generate benchmark data with cargo run --release --bin \ + generate_benchmark_data."; + let plaintext_vector_store = from_ndjson_file(plainstore_file, Some(database_size)) + .map_err(|e| eyre::eyre!("Cannot find store: {e}. {generation_comment}"))?; + let plaintext_graph_store: GraphMem = read_bin(plaingraph_file) + .map_err(|e| eyre::eyre!("Cannot find graph: {e}. {generation_comment}"))?; + + let protocol_stores = + setup_local_aby3_players_with_preloaded_db(rng, &plaintext_vector_store, network_t) + .await?; + + let mut jobs = JoinSet::new(); + for store in protocol_stores.iter() { + let mut store = store.clone(); + let plaintext_graph_store = plaintext_graph_store.clone(); + jobs.spawn(async move { + ( + store.clone(), + store + .graph_from_plain(&plaintext_graph_store, recompute_distances) + .await, + ) + }); + } + let mut secret_shared_stores = jobs.join_all().await; + secret_shared_stores.sort_by_key(|(store, _)| store.get_owner_index()); + let plaintext = (plaintext_vector_store, plaintext_graph_store); + Ok((plaintext, secret_shared_stores)) + } + + /// Generates 3 pairs of vector stores and graphs from a plaintext + /// vector store and graph read from disk, which are returned as well. + /// Networking is based on gRPC. + pub async fn lazy_setup_from_files_with_grpc( + plainstore_file: &str, + plaingraph_file: &str, + rng: &mut R, + database_size: usize, + recompute_distances: bool, + ) -> eyre::Result<( + (PlaintextStore, GraphMem), + Vec<(Self, GraphMem)>, + )> { + Self::lazy_setup_from_files( + plainstore_file, + plaingraph_file, + rng, + database_size, + NetworkType::GrpcChannel, + recompute_distances, + ) + .await + } + /// Generates 3 pairs of vector stores and graphs from a random plaintext /// vector store and graph, which are returned as well. /// The network type is specified by the user. @@ -374,11 +449,12 @@ impl LocalNetAby3NgStoreProtocol { (PlaintextStore, GraphMem), Vec<(Self, GraphMem)>, )> { - let (cleartext_database, plaintext_vector_store, plaintext_graph_store) = + let (plaintext_vector_store, plaintext_graph_store) = PlaintextStore::create_random(rng, database_size).await?; let protocol_stores = - setup_local_aby3_players_with_preloaded_db(rng, cleartext_database, network_t).await?; + setup_local_aby3_players_with_preloaded_db(rng, &plaintext_vector_store, network_t) + .await?; let mut jobs = JoinSet::new(); for store in protocol_stores.iter() { diff --git a/iris-mpc-cpu/src/hawkers/plaintext_store.rs b/iris-mpc-cpu/src/hawkers/plaintext_store.rs index 2e10301e5..80a462a82 100644 --- a/iris-mpc-cpu/src/hawkers/plaintext_store.rs +++ b/iris-mpc-cpu/src/hawkers/plaintext_store.rs @@ -141,7 +141,7 @@ impl PlaintextStore { pub async fn create_random( rng: &mut R, database_size: usize, - ) -> eyre::Result<(Vec, Self, GraphMem)> { + ) -> eyre::Result<(Self, GraphMem)> { // makes sure the searcher produces same graph structure by having the same rng let mut rng_searcher1 = AesRng::from_rng(rng.clone())?; let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; @@ -162,11 +162,47 @@ impl PlaintextStore { .await; } - Ok(( - cleartext_database, - plaintext_vector_store, - plaintext_graph_store, - )) + Ok((plaintext_vector_store, plaintext_graph_store)) + } + + pub async fn create_random_store( + rng: &mut R, + database_size: usize, + ) -> eyre::Result { + let cleartext_database = IrisDB::new_random_rng(database_size, rng).db; + + let mut plaintext_vector_store = PlaintextStore::default(); + + for raw_query in cleartext_database.iter() { + let query = plaintext_vector_store.prepare_query(raw_query.clone()); + let _ = plaintext_vector_store.insert(&query).await; + } + + Ok(plaintext_vector_store) + } + + pub async fn create_graph( + &mut self, + rng: &mut R, + graph_size: usize, + ) -> eyre::Result> { + let mut rng_searcher1 = AesRng::from_rng(rng.clone())?; + + let mut plaintext_graph_store = GraphMem::new(); + let searcher = HawkSearcher::default(); + + for i in 0..graph_size { + searcher + .insert( + self, + &mut plaintext_graph_store, + &i.into(), + &mut rng_searcher1, + ) + .await; + } + + Ok(plaintext_graph_store) } } @@ -257,7 +293,7 @@ mod tests { let mut rng = AesRng::seed_from_u64(0_u64); let database_size = 1; let searcher = HawkSearcher::default(); - let (_, mut ptxt_vector, mut ptxt_graph) = + let (mut ptxt_vector, mut ptxt_graph) = PlaintextStore::create_random(&mut rng, database_size) .await .unwrap(); From b4d43791985689c489fc0487e16b285c4ef945c6 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 11 Dec 2024 12:46:55 +0100 Subject: [PATCH 085/170] release v0.12.4 to prod (#784) * release v0.12.4 to prod * enable importer --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 8 +++++++- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 8 +++++++- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 8 +++++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index a5f399261..19bb4927f 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.3" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.4" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index c4c0d67c7..ff5701b48 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -78,11 +78,17 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" + - name: SMPC__DB_CHUNKS_FOLDER_NAME + value: "binary_output_2k" + + - name: SMPC__LOAD_CHUNKS_PARALLELISM + value: "32" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ef0519f29..3bf1744a1 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -78,11 +78,17 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" + - name: SMPC__DB_CHUNKS_FOLDER_NAME + value: "binary_output_2k" + + - name: SMPC__LOAD_CHUNKS_PARALLELISM + value: "32" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 66a954204..c47b8bc5b 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -78,11 +78,17 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" + - name: SMPC__DB_CHUNKS_FOLDER_NAME + value: "binary_output_2k" + + - name: SMPC__LOAD_CHUNKS_PARALLELISM + value: "32" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 356f6389ef95e467651fc9b04fafe9d0b1e2e018 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 11 Dec 2024 13:55:28 +0100 Subject: [PATCH 086/170] disable importer in prod for debug (#787) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index ff5701b48..1ba1d01d6 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 3bf1744a1..82467e0f6 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index c47b8bc5b..b2016fc61 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" From 3a738e7faa269d1823a7b786fe4b1fd27691086b Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 11 Dec 2024 16:16:59 +0100 Subject: [PATCH 087/170] fix: avoid incrementing memory size for record overrides from db (#789) * change max db size to 1M in stage * do not increment memory size if the record is duplicate * bump stage img --- deploy/stage/common-values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- iris-mpc-gpu/src/dot/share_db.rs | 2 +- iris-mpc-gpu/src/server/actor.rs | 3 +++ iris-mpc/src/bin/server.rs | 6 ++++++ 7 files changed, 14 insertions(+), 5 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index cdaa36dab..6570427c0 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.5" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 79f2885a9..e393ad3e3 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "800000" - name: SMPC__MAX_DB_SIZE - value: "1100000" + value: "1000000" - name: SMPC__MAX_BATCH_SIZE value: "64" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 902c69a54..1138c123f 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "800000" - name: SMPC__MAX_DB_SIZE - value: "1100000" + value: "1000000" - name: SMPC__MAX_BATCH_SIZE value: "64" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 2f497bb7c..c1dfd7c32 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "800000" - name: SMPC__MAX_DB_SIZE - value: "1100000" + value: "1000000" - name: SMPC__MAX_BATCH_SIZE value: "64" diff --git a/iris-mpc-gpu/src/dot/share_db.rs b/iris-mpc-gpu/src/dot/share_db.rs index d1169a761..bd0faf29c 100644 --- a/iris-mpc-gpu/src/dot/share_db.rs +++ b/iris-mpc-gpu/src/dot/share_db.rs @@ -281,7 +281,7 @@ impl ShareDB { n_shards: usize, code_length: usize, ) { - assert!(record.len() == code_length); + assert_eq!(record.len(), code_length); let a0_host = record .iter() diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 16889ca56..65b5952d4 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -461,6 +461,9 @@ impl ServerActor { self.device_manager.device_count(), MASK_CODE_LENGTH, ); + } + + pub fn increment_db_size(&mut self, index: usize) { self.current_db_sizes[index % self.device_manager.device_count()] += 1; } diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 6b7208ccf..bb2abacef 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -1087,6 +1087,12 @@ async fn server_main(config: Config) -> eyre::Result<()> { iris.right_code(), iris.right_mask(), ); + + // if the serial id hasn't been loaded before, count is as unique record + if all_serial_ids.contains(&(iris.index() as i64)) { + actor.increment_db_size(iris.index() - 1); + } + time_loading_into_memory += now_load_summary.elapsed(); now_load_summary = Instant::now(); From 2f93afc0e57777aeb21753401b5100031bfd0189 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 11 Dec 2024 16:57:35 +0100 Subject: [PATCH 088/170] release v0.12.5 to prod (#790) * release v0.12.5 to prod * bump prod heartbeat timeout and max db size (#788) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 8 ++++---- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 8 ++++---- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 19bb4927f..ecbc9d3a3 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.5" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 1ba1d01d6..2f4e25a4a 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -60,7 +60,7 @@ env: value: "2" - name: SMPC__HEARTBEAT_INITIAL_RETRIES - value: "30" + value: "65" - name: SMPC__PATH value: "/data/" @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" @@ -96,7 +96,7 @@ env: value: "0" - name: SMPC__MAX_DB_SIZE - value: "10000000" + value: "20000000" - name: SMPC__FAKE_DB_SIZE value: "0" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 82467e0f6..ba0e0be23 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -60,7 +60,7 @@ env: value: "2" - name: SMPC__HEARTBEAT_INITIAL_RETRIES - value: "30" + value: "65" - name: SMPC__PATH value: "/data/" @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" @@ -96,7 +96,7 @@ env: value: "0" - name: SMPC__MAX_DB_SIZE - value: "10000000" + value: "20000000" - name: SMPC__FAKE_DB_SIZE value: "0" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index b2016fc61..396984abf 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -60,7 +60,7 @@ env: value: "2" - name: SMPC__HEARTBEAT_INITIAL_RETRIES - value: "30" + value: "65" - name: SMPC__PATH value: "/data/" @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" @@ -96,7 +96,7 @@ env: value: "0" - name: SMPC__MAX_DB_SIZE - value: "10000000" + value: "20000000" - name: SMPC__FAKE_DB_SIZE value: "0" From 6d6afa2fd86e99d4156b0b39887766a2543faffa Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 11 Dec 2024 08:50:03 -0800 Subject: [PATCH 089/170] [POP-1990] add e2e values for iris-mpc (#785) * add e2e values for iris-mpc * use tpl values --- deploy/e2e/iris-mpc-0.yaml.tpl | 239 +++++++++++++++++++++++++++++++++ deploy/e2e/iris-mpc-1.yaml.tpl | 239 +++++++++++++++++++++++++++++++++ deploy/e2e/iris-mpc-2.yaml.tpl | 238 ++++++++++++++++++++++++++++++++ 3 files changed, 716 insertions(+) create mode 100644 deploy/e2e/iris-mpc-0.yaml.tpl create mode 100644 deploy/e2e/iris-mpc-1.yaml.tpl create mode 100644 deploy/e2e/iris-mpc-2.yaml.tpl diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl new file mode 100644 index 000000000..cbf2f1a7e --- /dev/null +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -0,0 +1,239 @@ +iris-mpc-0: + fullNameOverride: "iris-mpc-0" + image: "ghcr.io/worldcoin/iris-mpc:v0.12.2" + + environment: e2e + replicaCount: 1 + + strategy: + type: Recreate + + datadog: + enabled: false + + ports: + - containerPort: 3000 + name: health + protocol: TCP + + livenessProbe: + httpGet: + path: /health + port: health + + readinessProbe: + periodSeconds: 30 + httpGet: + path: /ready + port: health + + startupProbe: + initialDelaySeconds: 60 + failureThreshold: 40 + periodSeconds: 30 + httpGet: + path: /ready + port: health + + resources: + limits: + cpu: 31 + memory: 600Gi + nvidia.com/gpu: 1 + vpc.amazonaws.com/efa: 1 + requests: + cpu: 30 + memory: 55Gi + nvidia.com/gpu: 1 + vpc.amazonaws.com/efa: 1 + + imagePullSecrets: + - name: github-secret + + nodeSelector: + kubernetes.io/arch: amd64 + + hostNetwork: true + + podSecurityContext: + runAsUser: 65534 + runAsGroup: 65534 + + tolerations: + - key: "dedicated" + operator: "Equal" + value: "gpuGroup" + effect: "NoSchedule" + + keelPolling: + # -- Specifies whether keel should poll for container updates + enabled: true + + libsDir: + enabled: true + path: "/libs" + size: 2Gi + files: + - path: "/usr/local/cuda-12.2/targets/x86_64-linux/lib/libcublasLt.so.12.2.5.6" + file: "libcublasLt.so.12.2.5.6" + - path: "/usr/local/cuda-12.2/targets/x86_64-linux/lib/libcublas.so.12.2.5.6" + file: "libcublas.so.12.2.5.6" + + preStop: + # preStop.sleepPeriod specifies the time spent in Terminating state before SIGTERM is sent + sleepPeriod: 10 + + # terminationGracePeriodSeconds specifies the grace time between SIGTERM and SIGKILL + terminationGracePeriodSeconds: 180 # 3x SMPC__PROCESSING_TIMEOUT_SECS + + env: + - name: RUST_LOG + value: "info" + + - name: RUST_BACKTRACE + value: "full" + + - name: NCCL_SOCKET_IFNAME + value: "eth0" + + - name: NCCL_COMM_ID + value: "iris-mpc-node.1.e2e.smpcv2.worldcoin.dev:4000" + + - name: SMPC__ENVIRONMENT + value: "e2e" + + - name: SMPC__SERVICE__SERVICE_NAME + value: "smpcv2-server-e2e" + + - name: SMPC__DATABASE__URL + valueFrom: + secretKeyRef: + key: DATABASE_AURORA_URL + name: application + + - name: SMPC__DATABASE__MIGRATE + value: "true" + + - name: SMPC__DATABASE__CREATE + value: "true" + + - name: SMPC__DATABASE__LOAD_PARALLELISM + value: "8" + + - name: SMPC__AWS__REGION + value: "eu-north-1" + + - name: SMPC__REQUESTS_QUEUE_URL + value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" + + - name: SMPC__RESULTS_TOPIC_ARN + value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-results" + + - name: SMPC__PROCESSING_TIMEOUT_SECS + value: "60" + + - name: SMPC__PATH + value: "/data/" + + - name: SMPC__KMS_KEY_ARNS + value: [ + "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000", + "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001", + "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002" + ] + + - name: SMPC__PARTY_ID + value: "0" + + - name: SMPC__PUBLIC_KEY_BASE_URL + value: "https://pki-smpcv2-stage.worldcoin.org" + + - name: SMPC__ENABLE_S3_IMPORTER + value: "false" + + - name: SMPC__SHARES_BUCKET_NAME + value: "wf-smpcv2-stage-sns-requests" + + - name: SMPC__CLEAR_DB_BEFORE_INIT + value: "true" + + - name: SMPC__INIT_DB_SIZE + value: "80000" + + - name: SMPC__MAX_DB_SIZE + value: "110000" + + - name: SMPC__MAX_BATCH_SIZE + value: "64" + + - name: SMPC__SERVICE__METRICS__HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP + + - name: SMPC__SERVICE__METRICS__PORT + value: "8125" + + - name: SMPC__SERVICE__METRICS__QUEUE_SIZE + value: "5000" + + - name: SMPC__SERVICE__METRICS__BUFFER_SIZE + value: "256" + + - name: SMPC__SERVICE__METRICS__PREFIX + value: "smpcv2-e2e-0" + + - name: SMPC__RETURN_PARTIAL_RESULTS + value: "true" + + - name: SMPC__NODE_HOSTNAMES + value: '["iris-mpc-node.1.e2e.smpcv2.worldcoin.dev","iris-mpc-node.2.e2e.smpcv2.worldcoin.dev","iris-mpc-node.3.e2e.smpcv2.worldcoin.dev"]' + + - name: SMPC__IMAGE_NAME + value: $(IMAGE_NAME) + + initContainer: + enabled: true + image: "amazon/aws-cli:2.17.62" + name: "iris-mpc-copy-cuda-libs" + env: + - name: PARTY_ID + value: "1" + - name: MY_NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + configMap: + init.sh: | + #!/usr/bin/env bash + + # Set up environment variables + HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name "$PARTY_ID".e2e.smpcv2.worldcoin.dev --query "HostedZones[].Id" --output text) + + # Generate the JSON content in memory + BATCH_JSON=$(cat < Date: Thu, 12 Dec 2024 08:40:26 +0100 Subject: [PATCH 090/170] try 192 parallelism on s3 import (#791) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 2f4e25a4a..b68f0dc4e 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "192" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ba0e0be23..cb9a91b0b 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "192" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 396984abf..7512d136c 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "192" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 9f4b46e616de84b2460610fd4a3d0ae5ba4d20f9 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Thu, 12 Dec 2024 09:08:22 +0100 Subject: [PATCH 091/170] try 32 parallelism on s3 import (#792) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index b68f0dc4e..5a65612ff 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "192" + value: "32" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index cb9a91b0b..e3382df00 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "192" + value: "32" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 7512d136c..e6b104b90 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "192" + value: "32" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 283a9d618c3b855e5da2093d5290bfd5bce6e62e Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Thu, 12 Dec 2024 00:47:31 -0800 Subject: [PATCH 092/170] fixed image name (#793) --- deploy/e2e/iris-mpc-0.yaml.tpl | 2 +- deploy/e2e/iris-mpc-1.yaml.tpl | 2 +- deploy/e2e/iris-mpc-2.yaml.tpl | 76 +++++++++++++++++----------------- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index cbf2f1a7e..dc9de1c9d 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -190,7 +190,7 @@ iris-mpc-0: value: '["iris-mpc-node.1.e2e.smpcv2.worldcoin.dev","iris-mpc-node.2.e2e.smpcv2.worldcoin.dev","iris-mpc-node.3.e2e.smpcv2.worldcoin.dev"]' - name: SMPC__IMAGE_NAME - value: $(IMAGE_NAME) + value: "ghcr.io/worldcoin/iris-mpc:v0.12.2" initContainer: enabled: true diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index ed7a18ded..91ea16e6e 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -190,7 +190,7 @@ iris-mpc-1: value: '["iris-mpc-node.1.e2e.smpcv2.worldcoin.dev","iris-mpc-node.2.e2e.smpcv2.worldcoin.dev","iris-mpc-node.3.e2e.smpcv2.worldcoin.dev"]' - name: SMPC__IMAGE_NAME - value: $(IMAGE_NAME) + value: "ghcr.io/worldcoin/iris-mpc:v0.12.2" initContainer: enabled: true diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 424a59509..ac433aa1f 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -85,112 +85,112 @@ iris-mpc-2: # terminationGracePeriodSeconds specifies the grace time between SIGTERM and SIGKILL terminationGracePeriodSeconds: 180 # 3x SMPC__PROCESSING_TIMEOUT_SECS - + env: - name: RUST_LOG value: "info" - + - name: RUST_BACKTRACE value: "full" - + - name: NCCL_SOCKET_IFNAME value: "eth0" - + - name: NCCL_COMM_ID value: "iris-mpc-node.1.e2e.smpcv2.worldcoin.dev:4000" - + - name: SMPC__ENVIRONMENT value: "e2e" - + - name: SMPC__SERVICE__SERVICE_NAME value: "smpcv2-server-e2e" - + - name: SMPC__DATABASE__URL valueFrom: secretKeyRef: key: DATABASE_AURORA_URL name: application - + - name: SMPC__DATABASE__MIGRATE value: "true" - + - name: SMPC__DATABASE__CREATE value: "true" - + - name: SMPC__DATABASE__LOAD_PARALLELISM value: "8" - + - name: SMPC__AWS__REGION value: "eu-north-1" - + - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" - + - name: SMPC__RESULTS_TOPIC_ARN value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-results" - + - name: SMPC__PROCESSING_TIMEOUT_SECS value: "60" - + - name: SMPC__PATH value: "/data/" - + - name: SMPC__KMS_KEY_ARNS valueFrom: secretKeyRef: key: KMS_KEYS name: application - + - name: SMPC__PARTY_ID value: "2" - + - name: SMPC__PUBLIC_KEY_BASE_URL value: "https://pki-smpcv2-stage.worldcoin.org" - + - name: SMPC__ENABLE_S3_IMPORTER value: "false" - + - name: SMPC__SHARES_BUCKET_NAME value: "wf-smpcv2-stage-sns-requests" - + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" - + - name: SMPC__INIT_DB_SIZE value: "80000" - + - name: SMPC__MAX_DB_SIZE value: "110000" - + - name: SMPC__MAX_BATCH_SIZE value: "64" - + - name: SMPC__SERVICE__METRICS__HOST valueFrom: fieldRef: fieldPath: status.hostIP - + - name: SMPC__SERVICE__METRICS__PORT value: "8125" - + - name: SMPC__SERVICE__METRICS__QUEUE_SIZE value: "5000" - + - name: SMPC__SERVICE__METRICS__BUFFER_SIZE value: "256" - + - name: SMPC__SERVICE__METRICS__PREFIX value: "smpcv2-e2e-2" - + - name: SMPC__RETURN_PARTIAL_RESULTS value: "true" - + - name: SMPC__NODE_HOSTNAMES value: '["iris-mpc-node.1.e2e.smpcv2.worldcoin.dev","iris-mpc-node.2.e2e.smpcv2.worldcoin.dev","iris-mpc-node.3.e2e.smpcv2.worldcoin.dev"]' - + - name: SMPC__IMAGE_NAME - value: $(IMAGE_NAME) - + value: "ghcr.io/worldcoin/iris-mpc:v0.12.2" + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" @@ -205,10 +205,10 @@ iris-mpc-2: configMap: init.sh: | #!/usr/bin/env bash - + # Set up environment variables HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name "$PARTY_ID".e2e.smpcv2.worldcoin.dev --query "HostedZones[].Id" --output text) - + # Generate the JSON content in memory BATCH_JSON=$(cat < Date: Thu, 12 Dec 2024 01:09:51 -0800 Subject: [PATCH 093/170] scale down (#794) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index ecbc9d3a3..62b5aef62 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.12.5" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From cc127358ad655c8db2ef8a60681bd5f25585eca9 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Thu, 12 Dec 2024 01:33:57 -0800 Subject: [PATCH 094/170] scale back up (#795) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 62b5aef62..ecbc9d3a3 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.12.5" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From e3cd63c0a19d9dff891c396b8d55e614ae9307d6 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Thu, 12 Dec 2024 13:35:37 +0100 Subject: [PATCH 095/170] fix array in values --- deploy/e2e/iris-mpc-0.yaml.tpl | 7 +------ deploy/e2e/iris-mpc-1.yaml.tpl | 6 +----- deploy/e2e/iris-mpc-2.yaml.tpl | 5 +---- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index dc9de1c9d..c6d3d9017 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -136,12 +136,7 @@ iris-mpc-0: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: [ - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002" - ] - + value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "0" diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 91ea16e6e..d3bd5c169 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -136,11 +136,7 @@ iris-mpc-1: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: [ - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002" - ] + value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "1" diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index ac433aa1f..3c0b70716 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -136,10 +136,7 @@ iris-mpc-2: value: "/data/" - name: SMPC__KMS_KEY_ARNS - valueFrom: - secretKeyRef: - key: KMS_KEYS - name: application + value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "2" From 67869eacc105f13cee72ab35691309547a6e36d2 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Thu, 12 Dec 2024 04:36:16 -0800 Subject: [PATCH 096/170] fix array in values (#796) --- deploy/e2e/iris-mpc-0.yaml.tpl | 7 +------ deploy/e2e/iris-mpc-1.yaml.tpl | 6 +----- deploy/e2e/iris-mpc-2.yaml.tpl | 5 +---- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index dc9de1c9d..c6d3d9017 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -136,12 +136,7 @@ iris-mpc-0: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: [ - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002" - ] - + value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "0" diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 91ea16e6e..d3bd5c169 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -136,11 +136,7 @@ iris-mpc-1: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: [ - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001", - "arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002" - ] + value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "1" diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index ac433aa1f..3c0b70716 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -136,10 +136,7 @@ iris-mpc-2: value: "/data/" - name: SMPC__KMS_KEY_ARNS - valueFrom: - secretKeyRef: - key: KMS_KEYS - name: application + value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "2" From 091352082199d52ba80d42ae8ab7eade26ab42dc Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 13 Dec 2024 08:46:56 +0100 Subject: [PATCH 097/170] change memory limits --- deploy/e2e/iris-mpc-0.yaml.tpl | 2 +- deploy/e2e/iris-mpc-1.yaml.tpl | 2 +- deploy/e2e/iris-mpc-2.yaml.tpl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index c6d3d9017..be777b964 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -38,7 +38,7 @@ iris-mpc-0: resources: limits: cpu: 31 - memory: 600Gi + memory: 60Gi nvidia.com/gpu: 1 vpc.amazonaws.com/efa: 1 requests: diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index d3bd5c169..bea355ae1 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -43,7 +43,7 @@ iris-mpc-1: vpc.amazonaws.com/efa: 1 requests: cpu: 30 - memory: 60Gi + memory: 55Gi nvidia.com/gpu: 1 vpc.amazonaws.com/efa: 1 diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 3c0b70716..2bdf81311 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -38,7 +38,7 @@ iris-mpc-2: resources: limits: cpu: 31 - memory: 600Gi + memory: 60Gi nvidia.com/gpu: 1 vpc.amazonaws.com/efa: 1 requests: From 899bde9faf3ad2ae28ebae8e2b90d9086791eb6c Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 13 Dec 2024 01:22:43 -0800 Subject: [PATCH 098/170] host network false (#798) --- deploy/e2e/iris-mpc-0.yaml.tpl | 2 +- deploy/e2e/iris-mpc-1.yaml.tpl | 2 +- deploy/e2e/iris-mpc-2.yaml.tpl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index be777b964..dcc39efa2 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -53,7 +53,7 @@ iris-mpc-0: nodeSelector: kubernetes.io/arch: amd64 - hostNetwork: true + hostNetwork: false podSecurityContext: runAsUser: 65534 diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index bea355ae1..7838389d8 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -53,7 +53,7 @@ iris-mpc-1: nodeSelector: kubernetes.io/arch: amd64 - hostNetwork: true + hostNetwork: false podSecurityContext: runAsUser: 65534 diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 2bdf81311..5e3ce5b7e 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -53,7 +53,7 @@ iris-mpc-2: nodeSelector: kubernetes.io/arch: amd64 - hostNetwork: true + hostNetwork: false podSecurityContext: runAsUser: 65534 From c62abe1184587321fb219e9a10177f8145c374ab Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Fri, 13 Dec 2024 02:43:22 -0800 Subject: [PATCH 099/170] filter out supermatchers on partial results (#799) * filter out supermatchers on partial results * cap to batch_size --- iris-mpc-gpu/src/dot/distance_comparator.rs | 2 +- iris-mpc-gpu/src/server/actor.rs | 89 +++++++++++++++------ 2 files changed, 65 insertions(+), 26 deletions(-) diff --git a/iris-mpc-gpu/src/dot/distance_comparator.rs b/iris-mpc-gpu/src/dot/distance_comparator.rs index 6c459c717..455a9c70a 100644 --- a/iris-mpc-gpu/src/dot/distance_comparator.rs +++ b/iris-mpc-gpu/src/dot/distance_comparator.rs @@ -276,7 +276,7 @@ impl DistanceComparator { pub fn fetch_all_match_ids( &self, - match_counters: Vec>, + match_counters: &[Vec], matches: &[CudaSlice], ) -> Vec> { let mut results = vec![]; diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 65b5952d4..df5c15be8 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -720,7 +720,7 @@ impl ServerActor { // Transfer all match ids let match_ids = self.distance_comparator.fetch_all_match_ids( - match_counters_devices, + &match_counters_devices, &self.distance_comparator.all_matches, ); @@ -737,6 +737,65 @@ impl ServerActor { } } + // Fetch the partial matches + let ( + partial_match_ids_left, + partial_match_counters_left, + partial_match_ids_right, + partial_match_counters_right, + ) = if self.return_partial_results { + // Transfer the partial results to the host + let partial_match_counters_left = self + .distance_comparator + .fetch_match_counters(&self.distance_comparator.match_counters_left) + .into_iter() + .map(|x| x[..batch_size].to_vec()) + .collect::>(); + let partial_match_counters_right = self + .distance_comparator + .fetch_match_counters(&self.distance_comparator.match_counters_right) + .into_iter() + .map(|x| x[..batch_size].to_vec()) + .collect::>(); + + let partial_results_left = self.distance_comparator.fetch_all_match_ids( + &partial_match_counters_left, + &self.distance_comparator.partial_results_left, + ); + let partial_results_right = self.distance_comparator.fetch_all_match_ids( + &partial_match_counters_right, + &self.distance_comparator.partial_results_right, + ); + ( + partial_results_left, + partial_match_counters_left, + partial_results_right, + partial_match_counters_right, + ) + } else { + (vec![], vec![], vec![], vec![]) + }; + + let partial_match_counters_left = partial_match_counters_left.iter().fold( + vec![0usize; batch_size], + |mut acc, counters| { + for (i, &value) in counters.iter().enumerate() { + acc[i] += value as usize; + } + acc + }, + ); + + let partial_match_counters_right = partial_match_counters_right.iter().fold( + vec![0usize; batch_size], + |mut acc, counters| { + for (i, &value) in counters.iter().enumerate() { + acc[i] += value as usize; + } + acc + }, + ); + // Evaluate the results across devices // Format: merged_results[query_index] let mut merged_results = @@ -747,7 +806,10 @@ impl ServerActor { .iter() .enumerate() .filter(|&(idx, &num)| { - num == NON_MATCH_ID && match_counters[idx] <= SUPERMATCH_THRESHOLD + num == NON_MATCH_ID + // Filter-out supermatchers on both sides (TODO: remove this in the future) + && partial_match_counters_left[idx] <= SUPERMATCH_THRESHOLD + && partial_match_counters_right[idx] <= SUPERMATCH_THRESHOLD }) .map(|(idx, _num)| idx) .collect::>(); @@ -763,29 +825,6 @@ impl ServerActor { batch_size, ); - // Fetch the partial matches - let (partial_match_ids_left, partial_match_ids_right) = if self.return_partial_results { - // Transfer the partial results to the host - let partial_match_counters_left = self - .distance_comparator - .fetch_match_counters(&self.distance_comparator.match_counters_left); - let partial_match_counters_right = self - .distance_comparator - .fetch_match_counters(&self.distance_comparator.match_counters_right); - - let partial_results_left = self.distance_comparator.fetch_all_match_ids( - partial_match_counters_left, - &self.distance_comparator.partial_results_left, - ); - let partial_results_right = self.distance_comparator.fetch_all_match_ids( - partial_match_counters_right, - &self.distance_comparator.partial_results_right, - ); - (partial_results_left, partial_results_right) - } else { - (vec![], vec![]) - }; - // Check for batch matches let matched_batch_request_ids = match_ids .iter() From 8293d606681b1340c1ea2127ce876033b0cefc2c Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Fri, 13 Dec 2024 15:11:11 +0100 Subject: [PATCH 100/170] release v0.12.6 (#800) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index ecbc9d3a3..ca4344083 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.5" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.6" environment: prod replicaCount: 1 diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 6570427c0..741a025fd 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.5" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.6" environment: stage replicaCount: 1 From b3de9c84b67a82661f5efcd42b8c63f4814787c2 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 13 Dec 2024 15:19:31 +0100 Subject: [PATCH 101/170] change pod sec context --- deploy/e2e/iris-mpc-0.yaml.tpl | 6 +----- deploy/e2e/iris-mpc-1.yaml.tpl | 6 +----- deploy/e2e/iris-mpc-2.yaml.tpl | 6 +----- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index dcc39efa2..d1675576a 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -55,10 +55,6 @@ iris-mpc-0: hostNetwork: false - podSecurityContext: - runAsUser: 65534 - runAsGroup: 65534 - tolerations: - key: "dedicated" operator: "Equal" @@ -190,7 +186,7 @@ iris-mpc-0: initContainer: enabled: true image: "amazon/aws-cli:2.17.62" - name: "iris-mpc-copy-cuda-libs" + name: "iris-mpc-0-copy-cuda-libs" env: - name: PARTY_ID value: "1" diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 7838389d8..533179439 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -55,10 +55,6 @@ iris-mpc-1: hostNetwork: false - podSecurityContext: - runAsUser: 65534 - runAsGroup: 65534 - tolerations: - key: "dedicated" operator: "Equal" @@ -191,7 +187,7 @@ iris-mpc-1: initContainer: enabled: true image: "amazon/aws-cli:2.17.62" - name: "iris-mpc-copy-cuda-libs" + name: "iris-mpc-1-copy-cuda-libs" env: - name: PARTY_ID value: "2" diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 5e3ce5b7e..b90c7f979 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -55,10 +55,6 @@ iris-mpc-2: hostNetwork: false - podSecurityContext: - runAsUser: 65534 - runAsGroup: 65534 - tolerations: - key: "dedicated" operator: "Equal" @@ -191,7 +187,7 @@ iris-mpc-2: initContainer: enabled: true image: "amazon/aws-cli:2.17.62" - name: "iris-mpc-copy-cuda-libs" + name: "iris-mpc-2-copy-cuda-libs" env: - name: PARTY_ID value: "3" From 7fde3f62d16079c32522e1a88a1f2da3dea3a21e Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 13 Dec 2024 08:34:53 -0800 Subject: [PATCH 102/170] specific name for configmap (#802) --- deploy/e2e/iris-mpc-0.yaml.tpl | 1 + deploy/e2e/iris-mpc-1.yaml.tpl | 1 + deploy/e2e/iris-mpc-2.yaml.tpl | 1 + 3 files changed, 3 insertions(+) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index d1675576a..a83f5c5f7 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -195,6 +195,7 @@ iris-mpc-0: fieldRef: fieldPath: status.hostIP configMap: + name: "iris-mpc-0-init" init.sh: | #!/usr/bin/env bash diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 533179439..2365a06cd 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -196,6 +196,7 @@ iris-mpc-1: fieldRef: fieldPath: status.hostIP configMap: + name: "iris-mpc-1-init" init.sh: | #!/usr/bin/env bash diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index b90c7f979..8729bc489 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -196,6 +196,7 @@ iris-mpc-2: fieldRef: fieldPath: status.hostIP configMap: + name: "iris-mpc-2-init" init.sh: | #!/usr/bin/env bash From 530d3226a79b58fcafc3b255c1381f0a7b864039 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 16 Dec 2024 02:37:53 +0100 Subject: [PATCH 103/170] test smaller chunk loader in prod (#805) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 12 +++++++++--- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 12 +++++++++--- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 12 +++++++++--- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index ca4344083..db22d373b 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.6" +image: "ghcr.io/worldcoin/iris-mpc:4e250f9b023012dff9580d0f5bd5943345d42848" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 5a65612ff..b6b06bdf3 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "80" + value: "4" - name: SMPC__AWS__REGION value: "eu-north-1" @@ -84,11 +84,17 @@ env: value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_200_partitioned" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" + + - name: SMPC__DB_CHUNKS_PARTITION_SIZE + value: "1000000" + - name: SMPC__LOAD_CHUNKS_S3_CLIENTS + value: "8" + - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index e3382df00..99063a30f 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "80" + value: "4" - name: SMPC__AWS__REGION value: "eu-north-1" @@ -84,10 +84,16 @@ env: value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_200_partitioned" + - name: SMPC__DB_CHUNKS_PARTITION_SIZE + value: "1000000" + + - name: SMPC__LOAD_CHUNKS_S3_CLIENTS + value: "8" + - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index e6b104b90..a43dde73e 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "80" + value: "4" - name: SMPC__AWS__REGION value: "eu-north-1" @@ -84,10 +84,16 @@ env: value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_200_partitioned" + - name: SMPC__DB_CHUNKS_PARTITION_SIZE + value: "1000000" + + - name: SMPC__LOAD_CHUNKS_S3_CLIENTS + value: "8" + - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 63986a7512a7e6a91f846ff43b95eceaecc79c81 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 16 Dec 2024 03:08:08 +0100 Subject: [PATCH 104/170] try 128 s3 load parallelism in prod (#806) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index b6b06bdf3..2ee85a022 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_200_partitioned" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "128" - name: SMPC__DB_CHUNKS_PARTITION_SIZE value: "1000000" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 99063a30f..b17471eae 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -93,7 +93,7 @@ env: value: "8" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "128" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index a43dde73e..0e4abfbc7 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -93,7 +93,7 @@ env: value: "8" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "128" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 4a952a4ab5529bf1dd958dd88b8aef290bc6e0e7 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 16 Dec 2024 03:32:59 +0100 Subject: [PATCH 105/170] try 192 s3 load parallelism in prod (#807) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 2ee85a022..5841e4f67 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_200_partitioned" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "128" + value: "192" - name: SMPC__DB_CHUNKS_PARTITION_SIZE value: "1000000" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index b17471eae..4efd7c935 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -93,7 +93,7 @@ env: value: "8" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "128" + value: "192" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 0e4abfbc7..01791ea7d 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -93,7 +93,7 @@ env: value: "8" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "128" + value: "192" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 36fb18d69f7e1e6ce51c29118999bdcc45193eb2 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 16 Dec 2024 04:03:49 +0100 Subject: [PATCH 106/170] revert load time tests in prod (#808) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 10 ++-------- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 10 ++-------- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 10 ++-------- 4 files changed, 7 insertions(+), 25 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index db22d373b..ca4344083 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:4e250f9b023012dff9580d0f5bd5943345d42848" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.6" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 5841e4f67..c98d0ee0c 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -84,17 +84,11 @@ env: value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_200_partitioned" + value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "192" + value: "32" - - name: SMPC__DB_CHUNKS_PARTITION_SIZE - value: "1000000" - - - name: SMPC__LOAD_CHUNKS_S3_CLIENTS - value: "8" - - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 4efd7c935..defbd9218 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -84,16 +84,10 @@ env: value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_200_partitioned" + value: "binary_output_2k" - - name: SMPC__DB_CHUNKS_PARTITION_SIZE - value: "1000000" - - - name: SMPC__LOAD_CHUNKS_S3_CLIENTS - value: "8" - - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "192" + value: "32" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 01791ea7d..06d61fcea 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -84,16 +84,10 @@ env: value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_200_partitioned" - - - name: SMPC__DB_CHUNKS_PARTITION_SIZE - value: "1000000" - - - name: SMPC__LOAD_CHUNKS_S3_CLIENTS - value: "8" + value: "binary_output_2k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "192" + value: "32" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 95058a7676c0c90587e4a6b901bb1077064c1cc1 Mon Sep 17 00:00:00 2001 From: iliailia Date: Mon, 16 Dec 2024 09:22:42 +0100 Subject: [PATCH 107/170] Handle empty batches in eval_distance (#797) --- iris-mpc-cpu/src/hawkers/galois_store.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/iris-mpc-cpu/src/hawkers/galois_store.rs b/iris-mpc-cpu/src/hawkers/galois_store.rs index 9fc15c281..c960801f8 100644 --- a/iris-mpc-cpu/src/hawkers/galois_store.rs +++ b/iris-mpc-cpu/src/hawkers/galois_store.rs @@ -200,6 +200,9 @@ impl LocalNetAby3NgStoreProtocol { &mut self, distances: Vec>, ) -> eyre::Result>> { + if distances.is_empty() { + return Ok(vec![]); + } let mut player_session = self.get_owner_session(); let distances = batch_signed_lift_vec(&mut player_session, distances).await?; Ok(distances @@ -215,6 +218,9 @@ impl LocalNetAby3NgStoreProtocol { &mut self, pairs: Vec<(GaloisRingSharedIris, GaloisRingSharedIris)>, ) -> Vec> { + if pairs.is_empty() { + return vec![]; + } let mut player_session = self.get_owner_session(); let ds_and_ts = galois_ring_pairwise_distance(&mut player_session, &pairs) .await @@ -250,6 +256,9 @@ impl VectorStore for LocalNetAby3NgStoreProtocol { query: &Self::QueryRef, vectors: &[Self::VectorRef], ) -> Vec { + if vectors.is_empty() { + return vec![]; + } let pairs = vectors .iter() .map(|vector_id| { From 6a3e63fdfd2e7295d6388ca35182bc5d7bacd0e8 Mon Sep 17 00:00:00 2001 From: iliailia Date: Mon, 16 Dec 2024 09:42:27 +0100 Subject: [PATCH 108/170] Optimize rounds in transposition (#801) --- iris-mpc-cpu/src/protocol/binary.rs | 30 +++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/iris-mpc-cpu/src/protocol/binary.rs b/iris-mpc-cpu/src/protocol/binary.rs index cb7525525..49ddccd88 100644 --- a/iris-mpc-cpu/src/protocol/binary.rs +++ b/iris-mpc-cpu/src/protocol/binary.rs @@ -143,16 +143,26 @@ pub(crate) async fn transposed_pack_and( session: &mut Session, x1: Vec>, x2: Vec>, -) -> Result>, Error> -where - Standard: Distribution, -{ - // TODO(Dragos) this could probably be parallelized even more. - let mut res = Vec::with_capacity(x1.len()); - for (x1, x2) in x1.iter().zip(x2.iter()) { - let shares_a = and_many_send(session, x1.as_slice(), x2.as_slice()).await?; - let shares_b = and_many_receive(session).await?; - res.push(VecShare::from_ab(shares_a, shares_b)) +) -> Result>, Error> { + if x1.len() != x2.len() { + return Err(eyre!("Inputs have different length")); + } + let chunk_sizes = x1.iter().map(VecShare::len).collect::>(); + let chunk_sizes2 = x2.iter().map(VecShare::len).collect::>(); + if chunk_sizes != chunk_sizes2 { + return Err(eyre!("VecShare lengths are not equal")); + } + + let x1 = VecShare::flatten(x1); + let x2 = VecShare::flatten(x2); + let mut shares_a = and_many_send(session, x1.as_slice(), x2.as_slice()).await?; + let mut shares_b = and_many_receive(session).await?; + + let mut res = Vec::with_capacity(chunk_sizes.len()); + for l in chunk_sizes { + let a = shares_a.drain(..l).collect(); + let b = shares_b.drain(..l).collect(); + res.push(VecShare::from_ab(a, b)); } Ok(res) } From 64353b0ada8b0b9ec5dca1c1a633885c5a81a626 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Mon, 16 Dec 2024 02:15:52 -0800 Subject: [PATCH 109/170] increase batch size (#809) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index c98d0ee0c..fa7abacd7 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "64" + value: "256" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index defbd9218..6ebbf6549 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "64" + value: "256" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 06d61fcea..0ef1586e1 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -85,7 +85,7 @@ env: - name: SMPC__DB_CHUNKS_FOLDER_NAME value: "binary_output_2k" - + - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "64" + value: "256" - name: SMPC__SERVICE__METRICS__HOST valueFrom: From 27d03f3b08eec1583138988220526f78f48c909a Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Mon, 16 Dec 2024 02:54:34 -0800 Subject: [PATCH 110/170] increase timeout (#810) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index fa7abacd7..658615fe7 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -54,7 +54,7 @@ env: name: application - name: SMPC__PROCESSING_TIMEOUT_SECS - value: "120" + value: "300" - name: SMPC__HEARTBEAT_INTERVAL_SECS value: "2" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 6ebbf6549..59b4b46f1 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -54,7 +54,7 @@ env: name: application - name: SMPC__PROCESSING_TIMEOUT_SECS - value: "120" + value: "300" - name: SMPC__HEARTBEAT_INTERVAL_SECS value: "2" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 0ef1586e1..698f15216 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -54,7 +54,7 @@ env: name: application - name: SMPC__PROCESSING_TIMEOUT_SECS - value: "120" + value: "300" - name: SMPC__HEARTBEAT_INTERVAL_SECS value: "2" From fc8f03ff3e26a6368640b0405d09afa85c74d15e Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Mon, 16 Dec 2024 04:26:06 -0800 Subject: [PATCH 111/170] dbg: increase chunk size (#811) --- iris-mpc-gpu/src/server/actor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index df5c15be8..57b18bcbc 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -66,7 +66,7 @@ impl ServerActorHandle { } } -const DB_CHUNK_SIZE: usize = 1 << 14; +const DB_CHUNK_SIZE: usize = 1 << 15; const KDF_SALT: &str = "111a1a93518f670e9bb0c2c68888e2beb9406d4c4ed571dc77b801e676ae3091"; // Random 32 byte salt const SUPERMATCH_THRESHOLD: usize = 4_000; From 00fc4f5e37c0f67516211415c9640f5dffc86001 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 16 Dec 2024 04:30:53 -0800 Subject: [PATCH 112/170] template variables to avoid name clashes (#812) --- deploy/e2e/iris-mpc-0.yaml.tpl | 22 +++++++++++----------- deploy/e2e/iris-mpc-1.yaml.tpl | 22 +++++++++++----------- deploy/e2e/iris-mpc-2.yaml.tpl | 20 ++++++++++---------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index a83f5c5f7..3c3a76f5d 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -1,8 +1,8 @@ iris-mpc-0: - fullNameOverride: "iris-mpc-0" - image: "ghcr.io/worldcoin/iris-mpc:v0.12.2" + fullnameOverride: "iris-mpc-0" + image: "ghcr.io/worldcoin/iris-mpc:$IRIS_MPC_IMAGE_TAG" - environment: e2e + environment: $ENV replicaCount: 1 strategy: @@ -93,13 +93,13 @@ iris-mpc-0: value: "eth0" - name: NCCL_COMM_ID - value: "iris-mpc-node.1.e2e.smpcv2.worldcoin.dev:4000" + value: "iris-mpc-node.1.$ENV.smpcv2.worldcoin.dev:4000" - name: SMPC__ENVIRONMENT - value: "e2e" + value: "$ENV" - name: SMPC__SERVICE__SERVICE_NAME - value: "smpcv2-server-e2e" + value: "smpcv2-server-$ENV" - name: SMPC__DATABASE__URL valueFrom: @@ -172,16 +172,16 @@ iris-mpc-0: value: "256" - name: SMPC__SERVICE__METRICS__PREFIX - value: "smpcv2-e2e-0" + value: "smpcv2-$ENV-0" - name: SMPC__RETURN_PARTIAL_RESULTS value: "true" - name: SMPC__NODE_HOSTNAMES - value: '["iris-mpc-node.1.e2e.smpcv2.worldcoin.dev","iris-mpc-node.2.e2e.smpcv2.worldcoin.dev","iris-mpc-node.3.e2e.smpcv2.worldcoin.dev"]' + value: '["iris-mpc-node.1.$ENV.smpcv2.worldcoin.dev","iris-mpc-node.2.$ENV.smpcv2.worldcoin.dev","iris-mpc-node.3.$ENV.smpcv2.worldcoin.dev"]' - name: SMPC__IMAGE_NAME - value: "ghcr.io/worldcoin/iris-mpc:v0.12.2" + value: "ghcr.io/worldcoin/iris-mpc:$IRIS_MPC_IMAGE_TAG" initContainer: enabled: true @@ -200,7 +200,7 @@ iris-mpc-0: #!/usr/bin/env bash # Set up environment variables - HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name "$PARTY_ID".e2e.smpcv2.worldcoin.dev --query "HostedZones[].Id" --output text) + HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name "$PARTY_ID".$ENV.smpcv2.worldcoin.dev --query "HostedZones[].Id" --output text) # Generate the JSON content in memory BATCH_JSON=$(cat < Date: Mon, 16 Dec 2024 05:06:18 -0800 Subject: [PATCH 113/170] release v0.12.7 (#813) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index ca4344083..a987ec017 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.6" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.7" environment: prod replicaCount: 1 From 599346b1464cefd42c3f619da38df6d019ddf656 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 16 Dec 2024 05:20:51 -0800 Subject: [PATCH 114/170] use correct key for tolerations (#814) --- deploy/e2e/iris-mpc-0.yaml.tpl | 4 ++-- deploy/e2e/iris-mpc-1.yaml.tpl | 4 ++-- deploy/e2e/iris-mpc-2.yaml.tpl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 3c3a76f5d..ec69f9f53 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -56,9 +56,9 @@ iris-mpc-0: hostNetwork: false tolerations: - - key: "dedicated" + - key: "gpuGroup" operator: "Equal" - value: "gpuGroup" + value: "dedicated" effect: "NoSchedule" keelPolling: diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index f94a6bdc0..303a0a89a 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -56,9 +56,9 @@ iris-mpc-1: hostNetwork: false tolerations: - - key: "dedicated" + - key: "gpuGroup" operator: "Equal" - value: "gpuGroup" + value: "dedicated" effect: "NoSchedule" keelPolling: diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 79944202f..03340baec 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -56,9 +56,9 @@ iris-mpc-2: hostNetwork: false tolerations: - - key: "dedicated" + - key: "gpuGroup" operator: "Equal" - value: "gpuGroup" + value: "dedicated" effect: "NoSchedule" keelPolling: From 871f0955c325cbd5be69d8eceddf53f2dd25c4e0 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Mon, 16 Dec 2024 05:36:00 -0800 Subject: [PATCH 115/170] remove efa requestas (#815) --- deploy/e2e/iris-mpc-0.yaml.tpl | 3 +-- deploy/e2e/iris-mpc-1.yaml.tpl | 3 +-- deploy/e2e/iris-mpc-2.yaml.tpl | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index ec69f9f53..9d46b5d1e 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -40,12 +40,11 @@ iris-mpc-0: cpu: 31 memory: 60Gi nvidia.com/gpu: 1 - vpc.amazonaws.com/efa: 1 + requests: cpu: 30 memory: 55Gi nvidia.com/gpu: 1 - vpc.amazonaws.com/efa: 1 imagePullSecrets: - name: github-secret diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 303a0a89a..38322ccda 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -40,12 +40,11 @@ iris-mpc-1: cpu: 31 memory: 60Gi nvidia.com/gpu: 1 - vpc.amazonaws.com/efa: 1 + requests: cpu: 30 memory: 55Gi nvidia.com/gpu: 1 - vpc.amazonaws.com/efa: 1 imagePullSecrets: - name: github-secret diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 03340baec..b2a970c0d 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -40,12 +40,11 @@ iris-mpc-2: cpu: 31 memory: 60Gi nvidia.com/gpu: 1 - vpc.amazonaws.com/efa: 1 + requests: cpu: 30 memory: 55Gi nvidia.com/gpu: 1 - vpc.amazonaws.com/efa: 1 imagePullSecrets: - name: github-secret From 8e33106a60f58e93bc9f15ab6953d762f1f91c37 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Mon, 16 Dec 2024 06:04:10 -0800 Subject: [PATCH 116/170] dbg: reduce max db (#816) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 658615fe7..9a0d32f25 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -96,7 +96,7 @@ env: value: "0" - name: SMPC__MAX_DB_SIZE - value: "20000000" + value: "10000000" - name: SMPC__FAKE_DB_SIZE value: "0" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 59b4b46f1..ecf6a5bf4 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -96,7 +96,7 @@ env: value: "0" - name: SMPC__MAX_DB_SIZE - value: "20000000" + value: "10000000" - name: SMPC__FAKE_DB_SIZE value: "0" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 698f15216..bab1814f4 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -96,7 +96,7 @@ env: value: "0" - name: SMPC__MAX_DB_SIZE - value: "20000000" + value: "10000000" - name: SMPC__FAKE_DB_SIZE value: "0" From 90fc996a667e4f62145141a2884bf75111908d42 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Tue, 17 Dec 2024 01:26:07 -0800 Subject: [PATCH 117/170] debug: sync after dot (#818) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- iris-mpc-gpu/src/server/actor.rs | 5 +++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 9a0d32f25..1316b6847 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "256" + value: "64" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ecf6a5bf4..a570777d9 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "256" + value: "64" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index bab1814f4..756e620b9 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "256" + value: "64" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 57b18bcbc..5f599ed48 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -1166,6 +1166,11 @@ impl ServerActor { ); }); + // DEBUG + for device in self.device_manager.devices() { + device.synchronize().unwrap(); + } + // wait for the exchange result buffers to be ready self.device_manager .await_event(request_streams, ¤t_exchange_event); From 4dbe6c221a2782e0e263ffb7c45157bbe4f176f4 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Tue, 17 Dec 2024 02:25:32 -0800 Subject: [PATCH 118/170] debug: wait for dot event (#820) --- iris-mpc-gpu/src/server/actor.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 5f599ed48..4d2c86524 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -1166,11 +1166,6 @@ impl ServerActor { ); }); - // DEBUG - for device in self.device_manager.devices() { - device.synchronize().unwrap(); - } - // wait for the exchange result buffers to be ready self.device_manager .await_event(request_streams, ¤t_exchange_event); @@ -1196,6 +1191,9 @@ impl ServerActor { self.device_manager .record_event(request_streams, &next_dot_event); + self.device_manager + .await_event(request_streams, &next_dot_event); + record_stream_time!( &self.device_manager, request_streams, From ecf25054672373f9a94b3a36ea1f38a2e7048c33 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Tue, 17 Dec 2024 03:16:52 -0800 Subject: [PATCH 119/170] release 12.9 (#821) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index a987ec017..f682d4b83 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.7" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.9" environment: prod replicaCount: 1 From a5fc6b4dc9d9dea7e3cf781b0e6defab9e661172 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:41:14 +0100 Subject: [PATCH 120/170] chore: Deploy `async-importer` test to stage (#822) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 741a025fd..6557ad04a 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.6" +image: "ghcr.io/worldcoin/iris-mpc:async-importer" environment: stage replicaCount: 1 From b86bc2893d2e4e457ae3108699f2f6c10001f2d5 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Tue, 17 Dec 2024 04:52:07 -0800 Subject: [PATCH 121/170] turn off exporter (#823) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 1316b6847..b7a7c128e 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index a570777d9..5b5281e52 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 756e620b9..79e6ff693 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" From 926608ed43a44f43c7c7faaa76bd477913d2570d Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Tue, 17 Dec 2024 10:10:04 -0800 Subject: [PATCH 122/170] enable importer (#825) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index b7a7c128e..1316b6847 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 5b5281e52..a570777d9 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 79e6ff693..756e620b9 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -78,7 +78,7 @@ env: value: "https://pki-smpc.worldcoin.org" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" From 12d9e2c51a66381aa87bbdac193c41990b0c9ee6 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Tue, 17 Dec 2024 10:12:49 -0800 Subject: [PATCH 123/170] run as root (#826) --- deploy/e2e/iris-mpc-0.yaml.tpl | 6 ++++++ deploy/e2e/iris-mpc-1.yaml.tpl | 5 +++++ deploy/e2e/iris-mpc-2.yaml.tpl | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 9d46b5d1e..347892f48 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -35,6 +35,12 @@ iris-mpc-0: path: /ready port: health + podSecurityContext: + runAsNonRoot: false + seccompProfile: + type: RuntimeDefault + + resources: limits: cpu: 31 diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 38322ccda..f34041054 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -35,6 +35,11 @@ iris-mpc-1: path: /ready port: health + podSecurityContext: + runAsNonRoot: false + seccompProfile: + type: RuntimeDefault + resources: limits: cpu: 31 diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index b2a970c0d..fb26bb831 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -35,6 +35,11 @@ iris-mpc-2: path: /ready port: health + podSecurityContext: + runAsNonRoot: false + seccompProfile: + type: RuntimeDefault + resources: limits: cpu: 31 From 29bee9d7ea0d02cc367374b710bd2b9a5ea42071 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Tue, 17 Dec 2024 13:06:22 -0800 Subject: [PATCH 124/170] increase failure threshold (#827) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index f682d4b83..463f7d55b 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -29,7 +29,7 @@ readinessProbe: startupProbe: initialDelaySeconds: 900 - failureThreshold: 40 + failureThreshold: 50 periodSeconds: 30 httpGet: path: /ready From 4f7ba95945a0537a4e3756559f1edae90007de15 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 18 Dec 2024 00:17:06 -0800 Subject: [PATCH 125/170] prefetch the next chunk to gpu mem manually (#824) * prefetch the next chunk to gpu mem manually --- iris-mpc-gpu/src/dot/share_db.rs | 84 +++++++++++++++++---- iris-mpc-gpu/src/helpers/mod.rs | 28 ++++++- iris-mpc-gpu/src/helpers/query_processor.rs | 19 +++-- iris-mpc-gpu/src/server/actor.rs | 77 +++++++++++++++---- 4 files changed, 172 insertions(+), 36 deletions(-) diff --git a/iris-mpc-gpu/src/dot/share_db.rs b/iris-mpc-gpu/src/dot/share_db.rs index bd0faf29c..c7e8eeb0f 100644 --- a/iris-mpc-gpu/src/dot/share_db.rs +++ b/iris-mpc-gpu/src/dot/share_db.rs @@ -20,8 +20,8 @@ use cudarc::{ CudaBlas, }, driver::{ - result::{self, malloc_async, malloc_managed}, - sys::{CUdeviceptr, CUmemAttach_flags}, + result::{self, malloc_async}, + sys::CUdeviceptr, CudaFunction, CudaSlice, CudaStream, CudaView, DevicePtr, DeviceSlice, LaunchAsync, }, nccl, @@ -114,6 +114,12 @@ pub struct SlicedProcessedDatabase { pub code_sums_gr: CudaVec2DSlicerU32, } +#[derive(Clone)] +pub struct DBChunkBuffers { + pub limb_0: Vec>, + pub limb_1: Vec>, +} + pub struct ShareDB { peer_id: usize, is_remote: bool, @@ -237,22 +243,17 @@ impl ShareDB { .devices() .iter() .map(|device| unsafe { + let mut host_mem0: *mut c_void = std::ptr::null_mut(); + let mut host_mem1: *mut c_void = std::ptr::null_mut(); + let _ = cudarc::driver::sys::lib() + .cuMemAllocHost_v2(&mut host_mem0, max_size * self.code_length); + let _ = cudarc::driver::sys::lib() + .cuMemAllocHost_v2(&mut host_mem1, max_size * self.code_length); ( StreamAwareCudaSlice::from(device.alloc(max_size).unwrap()), ( StreamAwareCudaSlice::from(device.alloc(max_size).unwrap()), - ( - malloc_managed( - max_size * self.code_length, - CUmemAttach_flags::CU_MEM_ATTACH_GLOBAL, - ) - .unwrap(), - malloc_managed( - max_size * self.code_length, - CUmemAttach_flags::CU_MEM_ATTACH_GLOBAL, - ) - .unwrap(), - ), + (host_mem0 as u64, host_mem1 as u64), ), ) }) @@ -450,6 +451,61 @@ impl ShareDB { } } + pub fn alloc_db_chunk_buffer(&self, max_chunk_size: usize) -> DBChunkBuffers { + let mut limb_0 = vec![]; + let mut limb_1 = vec![]; + for device in self.device_manager.devices() { + unsafe { + limb_0.push(device.alloc(max_chunk_size * self.code_length).unwrap()); + limb_1.push(device.alloc(max_chunk_size * self.code_length).unwrap()); + } + } + DBChunkBuffers { limb_0, limb_1 } + } + + pub fn prefetch_db_chunk( + &self, + db: &SlicedProcessedDatabase, + buffers: &DBChunkBuffers, + chunk_sizes: &[usize], + offset: &[usize], + db_sizes: &[usize], + streams: &[CudaStream], + ) { + for idx in 0..self.device_manager.device_count() { + let device = self.device_manager.device(idx); + device.bind_to_thread().unwrap(); + + if offset[idx] >= db_sizes[idx] || offset[idx] + chunk_sizes[idx] > db_sizes[idx] { + continue; + } + + unsafe { + cudarc::driver::sys::lib() + .cuMemcpyHtoDAsync_v2( + *buffers.limb_0[idx].device_ptr(), + (db.code_gr.limb_0[idx] as usize + offset[idx] * self.code_length) + as *mut _, + chunk_sizes[idx] * self.code_length, + streams[idx].stream, + ) + .result() + .unwrap(); + + cudarc::driver::sys::lib() + .cuMemcpyHtoDAsync_v2( + *buffers.limb_1[idx].device_ptr(), + (db.code_gr.limb_1[idx] as usize + offset[idx] * self.code_length) + as *mut _, + chunk_sizes[idx] * self.code_length, + streams[idx].stream, + ) + .result() + .unwrap(); + } + } + } + pub fn dot( &mut self, queries: &CudaVec2DSlicer, diff --git a/iris-mpc-gpu/src/helpers/mod.rs b/iris-mpc-gpu/src/helpers/mod.rs index 149fc10cf..e4bd114ee 100644 --- a/iris-mpc-gpu/src/helpers/mod.rs +++ b/iris-mpc-gpu/src/helpers/mod.rs @@ -1,7 +1,7 @@ use crate::threshold_ring::protocol::ChunkShare; use cudarc::driver::{ result::{self, memcpy_dtoh_async, memcpy_htod_async, stream}, - sys::{CUdeviceptr, CUstream, CUstream_st}, + sys::{lib, CUdeviceptr, CUstream, CUstream_st}, CudaDevice, CudaSlice, CudaStream, DevicePtr, DevicePtrMut, DeviceRepr, DriverError, LaunchConfig, }; @@ -104,6 +104,32 @@ pub unsafe fn dtod_at_offset( } } +/// Copy a slice from device to host with respective offsets. +/// # Safety +/// +/// The caller must ensure that the `dst` and `src` pointers are valid +/// with the respective offsets +pub unsafe fn dtoh_at_offset( + dst: u64, + dst_offset: usize, + src: CUdeviceptr, + src_offset: usize, + len: usize, + stream_ptr: CUstream, +) { + unsafe { + lib() + .cuMemcpyDtoHAsync_v2( + (dst + dst_offset as u64) as *mut _, + (src + src_offset as u64) as CUdeviceptr, + len, + stream_ptr, + ) + .result() + .unwrap(); + } +} + pub fn dtoh_on_stream_sync>( input: &U, device: &Arc, diff --git a/iris-mpc-gpu/src/helpers/query_processor.rs b/iris-mpc-gpu/src/helpers/query_processor.rs index a02a3b4bd..de27e4e5d 100644 --- a/iris-mpc-gpu/src/helpers/query_processor.rs +++ b/iris-mpc-gpu/src/helpers/query_processor.rs @@ -1,6 +1,6 @@ use crate::{ dot::{ - share_db::{ShareDB, SlicedProcessedDatabase}, + share_db::{DBChunkBuffers, ShareDB, SlicedProcessedDatabase}, IRIS_CODE_LENGTH, MASK_CODE_LENGTH, }, helpers::device_manager::DeviceManager, @@ -82,6 +82,15 @@ impl From<&CudaVec2DSlicer> for CudaVec2DSlicerRawPointer { } } +impl From<&DBChunkBuffers> for CudaVec2DSlicerRawPointer { + fn from(buffers: &DBChunkBuffers) -> Self { + CudaVec2DSlicerRawPointer { + limb_0: buffers.limb_0.iter().map(|s| *s.device_ptr()).collect(), + limb_1: buffers.limb_1.iter().map(|s| *s.device_ptr()).collect(), + } + } +} + pub struct CudaVec2DSlicer { pub limb_0: Vec>, pub limb_1: Vec>, @@ -193,8 +202,8 @@ impl DeviceCompactQuery { &self, code_engine: &mut ShareDB, mask_engine: &mut ShareDB, - sliced_code_db: &SlicedProcessedDatabase, - sliced_mask_db: &SlicedProcessedDatabase, + sliced_code_db: &CudaVec2DSlicerRawPointer, + sliced_mask_db: &CudaVec2DSlicerRawPointer, database_sizes: &[usize], offset: usize, streams: &[CudaStream], @@ -202,7 +211,7 @@ impl DeviceCompactQuery { ) { code_engine.dot( &self.code_query, - &sliced_code_db.code_gr, + sliced_code_db, database_sizes, offset, streams, @@ -210,7 +219,7 @@ impl DeviceCompactQuery { ); mask_engine.dot( &self.mask_query, - &sliced_mask_db.code_gr, + sliced_mask_db, database_sizes, offset, streams, diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 4d2c86524..95c5aa289 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -2,14 +2,16 @@ use super::{BatchQuery, Eye, ServerJob, ServerJobResult}; use crate::{ dot::{ distance_comparator::DistanceComparator, - share_db::{preprocess_query, ShareDB, SlicedProcessedDatabase}, + share_db::{preprocess_query, DBChunkBuffers, ShareDB, SlicedProcessedDatabase}, IRIS_CODE_LENGTH, MASK_CODE_LENGTH, ROTATIONS, }, helpers::{ self, comm::NcclComm, device_manager::DeviceManager, - query_processor::{CompactQuery, DeviceCompactQuery, DeviceCompactSums}, + query_processor::{ + CompactQuery, CudaVec2DSlicerRawPointer, DeviceCompactQuery, DeviceCompactSums, + }, }, threshold_ring::protocol::{ChunkShare, Circuits}, }; @@ -103,6 +105,8 @@ pub struct ServerActor { max_db_size: usize, return_partial_results: bool, disable_persistence: bool, + code_chunk_buffers: Vec, + mask_chunk_buffers: Vec, } const NON_MATCH_ID: u32 = u32::MAX; @@ -317,9 +321,11 @@ impl ServerActor { let batch_match_list_right = distance_comparator.prepare_db_match_list(n_queries); let query_db_size = vec![n_queries; device_manager.device_count()]; - let current_db_sizes = vec![0; device_manager.device_count()]; + let code_chunk_buffers = vec![codes_engine.alloc_db_chunk_buffer(DB_CHUNK_SIZE); 2]; + let mask_chunk_buffers = vec![masks_engine.alloc_db_chunk_buffer(DB_CHUNK_SIZE); 2]; + for dev in device_manager.devices() { dev.synchronize().unwrap(); } @@ -355,6 +361,8 @@ impl ServerActor { max_db_size, return_partial_results, disable_persistence, + code_chunk_buffers, + mask_chunk_buffers, }) } @@ -1111,6 +1119,30 @@ impl ServerActor { let mut current_phase2_event = self.device_manager.create_events(); let mut next_phase2_event = self.device_manager.create_events(); + let chunk_sizes = |chunk_idx: usize| { + self.current_db_sizes + .iter() + .map(|s| (s - DB_CHUNK_SIZE * chunk_idx).clamp(1, DB_CHUNK_SIZE)) + .collect::>() + }; + + self.codes_engine.prefetch_db_chunk( + code_db_slices, + &self.code_chunk_buffers[0], + &chunk_sizes(0), + &vec![0; self.device_manager.device_count()], + &self.current_db_sizes, + &self.streams[0], + ); + self.masks_engine.prefetch_db_chunk( + mask_db_slices, + &self.mask_chunk_buffers[0], + &chunk_sizes(0), + &vec![0; self.device_manager.device_count()], + &self.current_db_sizes, + &self.streams[0], + ); + // ---- START DATABASE DEDUP ---- tracing::info!(party_id = self.party_id, "Start DB deduplication"); let ignore_device_results: Vec = @@ -1118,14 +1150,12 @@ impl ServerActor { let mut db_chunk_idx = 0; loop { let request_streams = &self.streams[db_chunk_idx % 2]; + let next_request_streams = &self.streams[(db_chunk_idx + 1) % 2]; let request_cublas_handles = &self.cublas_handles[db_chunk_idx % 2]; let offset = db_chunk_idx * DB_CHUNK_SIZE; - let chunk_size = self - .current_db_sizes - .iter() - .map(|s| (s - DB_CHUNK_SIZE * db_chunk_idx).clamp(1, DB_CHUNK_SIZE)) - .collect::>(); + let chunk_size = chunk_sizes(db_chunk_idx); + let next_chunk_size = chunk_sizes(db_chunk_idx + 1); // We need to pad the chunk size for two reasons: // 1. Chunk size needs to be a multiple of 4, because the underlying @@ -1149,6 +1179,24 @@ impl ServerActor { .record_event(request_streams, ¤t_phase2_event); } + // Prefetch next chunk + self.codes_engine.prefetch_db_chunk( + code_db_slices, + &self.code_chunk_buffers[(db_chunk_idx + 1) % 2], + &next_chunk_size, + &chunk_size.iter().map(|s| offset + s).collect::>(), + &self.current_db_sizes, + next_request_streams, + ); + self.masks_engine.prefetch_db_chunk( + mask_db_slices, + &self.mask_chunk_buffers[(db_chunk_idx + 1) % 2], + &next_chunk_size, + &chunk_size.iter().map(|s| offset + s).collect::>(), + &self.current_db_sizes, + next_request_streams, + ); + self.device_manager .await_event(request_streams, ¤t_dot_event); @@ -1157,10 +1205,10 @@ impl ServerActor { compact_device_queries.dot_products_against_db( &mut self.codes_engine, &mut self.masks_engine, - code_db_slices, - mask_db_slices, + &CudaVec2DSlicerRawPointer::from(&self.code_chunk_buffers[db_chunk_idx % 2]), + &CudaVec2DSlicerRawPointer::from(&self.mask_chunk_buffers[db_chunk_idx % 2]), &dot_chunk_size, - offset, + 0, request_streams, request_cublas_handles, ); @@ -1191,9 +1239,6 @@ impl ServerActor { self.device_manager .record_event(request_streams, &next_dot_event); - self.device_manager - .await_event(request_streams, &next_dot_event); - record_stream_time!( &self.device_manager, request_streams, @@ -1621,7 +1666,7 @@ fn write_db_at_index( ), ] { unsafe { - helpers::dtod_at_offset( + helpers::dtoh_at_offset( db.code_gr.limb_0[device_index], dst_index * code_length, *query.limb_0[device_index].device_ptr(), @@ -1630,7 +1675,7 @@ fn write_db_at_index( streams[device_index].stream, ); - helpers::dtod_at_offset( + helpers::dtoh_at_offset( db.code_gr.limb_1[device_index], dst_index * code_length, *query.limb_1[device_index].device_ptr(), From 9e669f32cd93dae62060582232a96baaa8e05604 Mon Sep 17 00:00:00 2001 From: danielle-tfh Date: Wed, 18 Dec 2024 09:25:40 +0100 Subject: [PATCH 126/170] Add testing pre-fetching of chunk to gpu mem manually --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 6557ad04a..3f5042ecd 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:async-importer" +image: "ghcr.io/worldcoin/iris-mpc:v0.12.10" environment: stage replicaCount: 1 From fc5c699d95de80c2812f4f7f6dc7740d4d3dfdc7 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 18 Dec 2024 02:46:43 -0800 Subject: [PATCH 127/170] increase DB load paralleism (#830) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 1316b6847..7f1a3a831 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "4" + value: "80" - name: SMPC__AWS__REGION value: "eu-north-1" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index a570777d9..39c1b72ac 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "4" + value: "80" - name: SMPC__AWS__REGION value: "eu-north-1" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 756e620b9..8733f6891 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "4" + value: "80" - name: SMPC__AWS__REGION value: "eu-north-1" From 4ea5a67f6215ab75db1bbde6512709a5cdf5b924 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 18 Dec 2024 04:24:26 -0800 Subject: [PATCH 128/170] async read chunk (#803) * async read chunk * dynamic chunksize * log throughput --- iris-mpc-store/src/s3_importer.rs | 86 ++++++++++++++++++------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/iris-mpc-store/src/s3_importer.rs b/iris-mpc-store/src/s3_importer.rs index 896b4af64..e4ae76028 100644 --- a/iris-mpc-store/src/s3_importer.rs +++ b/iris-mpc-store/src/s3_importer.rs @@ -1,11 +1,18 @@ use crate::StoredIris; use async_trait::async_trait; -use aws_sdk_s3::Client; -use bytes::Bytes; +use aws_sdk_s3::{primitives::ByteStream, Client}; use futures::{stream, Stream, StreamExt}; use iris_mpc_common::{IRIS_CODE_LENGTH, MASK_CODE_LENGTH}; -use std::{mem, pin::Pin, sync::Arc, time::Instant}; -use tokio::task; +use std::{ + mem, + pin::Pin, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Instant, +}; +use tokio::io::AsyncReadExt; const SINGLE_ELEMENT_SIZE: usize = IRIS_CODE_LENGTH * mem::size_of::() * 2 + MASK_CODE_LENGTH * mem::size_of::() * 2 @@ -13,7 +20,7 @@ const SINGLE_ELEMENT_SIZE: usize = IRIS_CODE_LENGTH * mem::size_of::() * 2 #[async_trait] pub trait ObjectStore: Send + Sync + 'static { - async fn get_object(&self, key: &str) -> eyre::Result; + async fn get_object(&self, key: &str) -> eyre::Result; async fn list_objects(&self, prefix: &str) -> eyre::Result>; } @@ -30,8 +37,8 @@ impl S3Store { #[async_trait] impl ObjectStore for S3Store { - async fn get_object(&self, key: &str) -> eyre::Result { - let result = self + async fn get_object(&self, key: &str) -> eyre::Result { + let res = self .client .get_object() .bucket(&self.bucket) @@ -39,8 +46,7 @@ impl ObjectStore for S3Store { .send() .await?; - let data = result.body.collect().await?; - Ok(data.into_bytes()) + Ok(res.body) } async fn list_objects(&self, prefix: &str) -> eyre::Result> { @@ -133,33 +139,32 @@ pub async fn fetch_and_parse_chunks( .map(|num| format!("{}/{}.bin", prefix_name, num)) .collect(); tracing::info!("Generated {} chunk names", chunks.len()); + let total_bytes = Arc::new(AtomicUsize::new(0)); + let now = Instant::now(); let result_stream = stream::iter(chunks) - .map(move |chunk| async move { - let mut now = Instant::now(); - let result = store.get_object(&chunk).await?; - let get_object_time = now.elapsed(); - tracing::info!("Got chunk object: {} in {:?}", chunk, get_object_time,); - - now = Instant::now(); - let task = task::spawn_blocking(move || { - let n_records = result.len().div_floor(SINGLE_ELEMENT_SIZE); - - let mut records = Vec::with_capacity(n_records); - for i in 0..n_records { - let start = i * SINGLE_ELEMENT_SIZE; - let end = (i + 1) * SINGLE_ELEMENT_SIZE; - let chunk = &result[start..end]; - let iris = StoredIris::from_bytes(chunk); - records.push(iris); + .map({ + let total_bytes_clone = total_bytes.clone(); + move |chunk| { + let counter = total_bytes_clone.clone(); + async move { + let mut object_stream = store.get_object(&chunk).await?.into_async_read(); + let mut records = Vec::with_capacity(last_snapshot_details.chunk_size as usize); + let mut buf = vec![0u8; SINGLE_ELEMENT_SIZE]; + loop { + match object_stream.read_exact(&mut buf).await { + Ok(_) => { + let iris = StoredIris::from_bytes(&buf); + records.push(iris); + counter.fetch_add(SINGLE_ELEMENT_SIZE, Ordering::Relaxed); + } + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break, + Err(e) => return Err(e.into()), + } + } + Ok::<_, eyre::Error>(stream::iter(records)) } - - Ok::<_, eyre::Error>(stream::iter(records)) - }) - .await?; - let parse_time = now.elapsed(); - tracing::info!("Parsed chunk: {} in {:?}", chunk, parse_time,); - task + } }) .buffer_unordered(concurrency) .flat_map(|result| match result { @@ -168,12 +173,18 @@ pub async fn fetch_and_parse_chunks( }) .boxed(); + tracing::info!( + "Overall download throughput: {:.2} Gbps", + total_bytes.load(Ordering::Relaxed) as f32 * 8.0 / 1e9 / now.elapsed().as_secs_f32() + ); + result_stream } #[cfg(test)] mod tests { use super::*; + use aws_sdk_s3::primitives::SdkBody; use rand::Rng; use std::{cmp::min, collections::HashSet}; @@ -206,12 +217,13 @@ mod tests { #[async_trait] impl ObjectStore for MockStore { - async fn get_object(&self, key: &str) -> eyre::Result { - self.objects + async fn get_object(&self, key: &str) -> eyre::Result { + let bytes = self + .objects .get(key) .cloned() - .map(Bytes::from) - .ok_or_else(|| eyre::eyre!("Object not found: {}", key)) + .ok_or_else(|| eyre::eyre!("Object not found: {}", key))?; + Ok(ByteStream::from(SdkBody::from(bytes))) } async fn list_objects(&self, _: &str) -> eyre::Result> { From 7cbeb3dbba14e0dc12f58ae1934d790e273a8231 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:41:39 +0100 Subject: [PATCH 129/170] chore: Release 0.13 (#831) - async chunk reads - parallelism of db fetches --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 463f7d55b..c43c58de4 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.9" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.0" environment: prod replicaCount: 1 From 9205ce5f2eb5391266a401e391e206b3524ca1e9 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:44:54 +0100 Subject: [PATCH 130/170] chore: Use the 16k bucket (#832) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 7f1a3a831..f2edcc443 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "iris-mpc-db-exporter-store-node-0-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 39c1b72ac..ad82dc5c1 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "iris-mpc-db-exporter-store-node-1-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 8733f6891..8b2f06d7b 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "iris-mpc-db-exporter-store-node-2-prod-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" From 4294bde4e1c65a167ad760aebf97a542f135e7bc Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Wed, 18 Dec 2024 04:58:26 -0800 Subject: [PATCH 131/170] use k8s internal DNS (#833) --- deploy/e2e/iris-mpc-0.yaml.tpl | 32 ++------------------------------ deploy/e2e/iris-mpc-1.yaml.tpl | 32 ++------------------------------ deploy/e2e/iris-mpc-2.yaml.tpl | 32 ++------------------------------ 3 files changed, 6 insertions(+), 90 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 347892f48..b100a56e2 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -98,7 +98,7 @@ iris-mpc-0: value: "eth0" - name: NCCL_COMM_ID - value: "iris-mpc-node.1.$ENV.smpcv2.worldcoin.dev:4000" + value: "iris-mpc-0.svc.cluster.local:4000" - name: SMPC__ENVIRONMENT value: "$ENV" @@ -183,7 +183,7 @@ iris-mpc-0: value: "true" - name: SMPC__NODE_HOSTNAMES - value: '["iris-mpc-node.1.$ENV.smpcv2.worldcoin.dev","iris-mpc-node.2.$ENV.smpcv2.worldcoin.dev","iris-mpc-node.3.$ENV.smpcv2.worldcoin.dev"]' + value: '["iris-mpc-0.svc.cluster.local","iris-mpc-1.svc.cluster.local","iris-mpc-2.svc.cluster.local"]' - name: SMPC__IMAGE_NAME value: "ghcr.io/worldcoin/iris-mpc:$IRIS_MPC_IMAGE_TAG" @@ -203,34 +203,6 @@ iris-mpc-0: name: "iris-mpc-0-init" init.sh: | #!/usr/bin/env bash - - # Set up environment variables - HOSTED_ZONE_ID=$(aws route53 list-hosted-zones-by-name --dns-name "$PARTY_ID".$ENV.smpcv2.worldcoin.dev --query "HostedZones[].Id" --output text) - - # Generate the JSON content in memory - BATCH_JSON=$(cat < Date: Wed, 18 Dec 2024 14:02:08 +0100 Subject: [PATCH 132/170] chore: Increase batch size to 256 (#834) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index f2edcc443..940ecb0b3 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "64" + value: "256" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ad82dc5c1..281524eac 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "64" + value: "256" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 8b2f06d7b..827e83f40 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "64" + value: "256" - name: SMPC__SERVICE__METRICS__HOST valueFrom: From a1efc5db1c35df18a6559eaa1c0129745aeaefdc Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 18 Dec 2024 06:56:52 -0800 Subject: [PATCH 133/170] fix: log throughput correctly (#835) --- iris-mpc-store/src/s3_importer.rs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/iris-mpc-store/src/s3_importer.rs b/iris-mpc-store/src/s3_importer.rs index e4ae76028..d402576e8 100644 --- a/iris-mpc-store/src/s3_importer.rs +++ b/iris-mpc-store/src/s3_importer.rs @@ -171,13 +171,23 @@ pub async fn fetch_and_parse_chunks( Ok(stream) => stream.boxed(), Err(e) => stream::once(async move { Err(e) }).boxed(), }) + .inspect({ + let counter = Arc::new(AtomicUsize::new(0)); + move |_| { + if counter.fetch_add(1, Ordering::Relaxed) % 1000 == 0 { + let elapsed = now.elapsed().as_secs_f32(); + if elapsed > 0.0 { + let bytes = total_bytes.load(Ordering::Relaxed); + tracing::info!( + "Current download throughput: {:.2} Gbps", + bytes as f32 * 8.0 / 1e9 / elapsed + ); + } + } + } + }) .boxed(); - tracing::info!( - "Overall download throughput: {:.2} Gbps", - total_bytes.load(Ordering::Relaxed) as f32 * 8.0 / 1e9 / now.elapsed().as_secs_f32() - ); - result_stream } From 8ed5547b7b709a8de92d5b675c0bdcc9d7b60364 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 18 Dec 2024 16:06:15 +0100 Subject: [PATCH 134/170] Update stage to not have fake DB (#836) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 3f5042ecd..984d0a7bd 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.12.10" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.1" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index e393ad3e3..b751e630a 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "800000" + value: "0" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 1138c123f..43110a4ef 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "800000" + value: "0" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index c1dfd7c32..2a12108db 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "800000" + value: "0" - name: SMPC__MAX_DB_SIZE value: "1000000" From e148ebab8a7a2c0ba3204575af1083918cdaa717 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:16:31 +0100 Subject: [PATCH 135/170] chore: Release v0.13.2 (#837) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index c43c58de4..d43ada5a7 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.0" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.2" environment: prod replicaCount: 1 From e39d44173577d4842d7b0ec288f2cfcce9145d16 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 18 Dec 2024 17:11:41 +0100 Subject: [PATCH 136/170] Deploy initial DB size for stage (#838) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 984d0a7bd..f99b86d94 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.1" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.2" environment: stage replicaCount: 1 diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index b751e630a..0e6f4745e 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "0" + value: "1000" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 43110a4ef..11743380f 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "0" + value: "1000" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 2a12108db..32010193d 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "0" + value: "1000" - name: SMPC__MAX_DB_SIZE value: "1000000" From cd13f5947f84581b78fa364c32690d0342c584c9 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 18 Dec 2024 17:29:03 +0100 Subject: [PATCH 137/170] Disable s3 exporter on stage (#839) --- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 0e6f4745e..668ca3b37 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-stage-eu-north-1" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 11743380f..2b5ff1e4e 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-stage-eu-north-1" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 32010193d..33ebb55fa 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -69,7 +69,7 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "true" + value: "false" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-stage-eu-north-1" From c35c1ef16fd71855da45430855dd21455e33d3d6 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 18 Dec 2024 09:26:30 -0800 Subject: [PATCH 138/170] reduce batch size (#840) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 940ecb0b3..f2edcc443 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "256" + value: "64" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 281524eac..ad82dc5c1 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "256" + value: "64" - name: SMPC__SERVICE__METRICS__HOST valueFrom: diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 827e83f40..8b2f06d7b 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -102,7 +102,7 @@ env: value: "0" - name: SMPC__MAX_BATCH_SIZE - value: "256" + value: "64" - name: SMPC__SERVICE__METRICS__HOST valueFrom: From 6fddb6ece4bc1e5dd4ca0ff96e3944c58ec565fd Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 18 Dec 2024 14:47:43 -0800 Subject: [PATCH 139/170] reduce logs (#841) --- iris-mpc-store/src/s3_importer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iris-mpc-store/src/s3_importer.rs b/iris-mpc-store/src/s3_importer.rs index d402576e8..d7c1760b4 100644 --- a/iris-mpc-store/src/s3_importer.rs +++ b/iris-mpc-store/src/s3_importer.rs @@ -174,7 +174,7 @@ pub async fn fetch_and_parse_chunks( .inspect({ let counter = Arc::new(AtomicUsize::new(0)); move |_| { - if counter.fetch_add(1, Ordering::Relaxed) % 1000 == 0 { + if counter.fetch_add(1, Ordering::Relaxed) % 1_000_000 == 0 { let elapsed = now.elapsed().as_secs_f32(); if elapsed > 0.0 { let bytes = total_bytes.load(Ordering::Relaxed); From fa2b511f4303ef8b8d6adce9b5c2b255a46d54df Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Wed, 18 Dec 2024 14:48:07 -0800 Subject: [PATCH 140/170] measure prefetch time (#829) --- iris-mpc-gpu/src/server/actor.rs | 72 +++++++++++++++++++------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 95c5aa289..cfb58f96f 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -1126,21 +1126,29 @@ impl ServerActor { .collect::>() }; - self.codes_engine.prefetch_db_chunk( - code_db_slices, - &self.code_chunk_buffers[0], - &chunk_sizes(0), - &vec![0; self.device_manager.device_count()], - &self.current_db_sizes, - &self.streams[0], - ); - self.masks_engine.prefetch_db_chunk( - mask_db_slices, - &self.mask_chunk_buffers[0], - &chunk_sizes(0), - &vec![0; self.device_manager.device_count()], - &self.current_db_sizes, + record_stream_time!( + &self.device_manager, &self.streams[0], + events, + "prefetch_db_chunk", + { + self.codes_engine.prefetch_db_chunk( + code_db_slices, + &self.code_chunk_buffers[0], + &chunk_sizes(0), + &vec![0; self.device_manager.device_count()], + &self.current_db_sizes, + &self.streams[0], + ); + self.masks_engine.prefetch_db_chunk( + mask_db_slices, + &self.mask_chunk_buffers[0], + &chunk_sizes(0), + &vec![0; self.device_manager.device_count()], + &self.current_db_sizes, + &self.streams[0], + ); + } ); // ---- START DATABASE DEDUP ---- @@ -1180,21 +1188,29 @@ impl ServerActor { } // Prefetch next chunk - self.codes_engine.prefetch_db_chunk( - code_db_slices, - &self.code_chunk_buffers[(db_chunk_idx + 1) % 2], - &next_chunk_size, - &chunk_size.iter().map(|s| offset + s).collect::>(), - &self.current_db_sizes, - next_request_streams, - ); - self.masks_engine.prefetch_db_chunk( - mask_db_slices, - &self.mask_chunk_buffers[(db_chunk_idx + 1) % 2], - &next_chunk_size, - &chunk_size.iter().map(|s| offset + s).collect::>(), - &self.current_db_sizes, + record_stream_time!( + &self.device_manager, next_request_streams, + events, + "prefetch_db_chunk", + { + self.codes_engine.prefetch_db_chunk( + code_db_slices, + &self.code_chunk_buffers[(db_chunk_idx + 1) % 2], + &next_chunk_size, + &chunk_size.iter().map(|s| offset + s).collect::>(), + &self.current_db_sizes, + next_request_streams, + ); + self.masks_engine.prefetch_db_chunk( + mask_db_slices, + &self.mask_chunk_buffers[(db_chunk_idx + 1) % 2], + &next_chunk_size, + &chunk_size.iter().map(|s| offset + s).collect::>(), + &self.current_db_sizes, + next_request_streams, + ); + } ); self.device_manager From 91d7741d21c23459225d42babf464c737bf43b2e Mon Sep 17 00:00:00 2001 From: iliailia Date: Thu, 19 Dec 2024 15:34:55 +0100 Subject: [PATCH 141/170] Send less in cross_mul (#817) --- iris-mpc-cpu/src/protocol/ops.rs | 46 ++++++++------------------------ 1 file changed, 11 insertions(+), 35 deletions(-) diff --git a/iris-mpc-cpu/src/protocol/ops.rs b/iris-mpc-cpu/src/protocol/ops.rs index d76c50d25..510bf7fad 100644 --- a/iris-mpc-cpu/src/protocol/ops.rs +++ b/iris-mpc-cpu/src/protocol/ops.rs @@ -118,7 +118,7 @@ pub async fn batch_signed_lift_vec( Ok(batch_signed_lift(session, pre_lift).await?.inner()) } -/// Computes [D1 * T2; D2 * T1] +/// Computes D2 * T1 - T2 * D1 /// Assumes that the input shares are originally 16-bit and lifted to u32. pub(crate) async fn cross_mul( session: &mut Session, @@ -126,15 +126,8 @@ pub(crate) async fn cross_mul( t1: Share, d2: Share, t2: Share, -) -> eyre::Result<(Share, Share)> { - // Compute d1 * t2; t2 * d1 - let mut exchanged_shares_a = Vec::with_capacity(2); - let pairs = [(d1, t2), (d2, t1)]; - for pair in pairs.iter() { - let (x, y) = pair; - let res = session.prf_as_mut().gen_zero_share() + x * y; - exchanged_shares_a.push(res); - } +) -> eyre::Result> { + let res_a = session.prf_as_mut().gen_zero_share() + &d2 * &t1 - &t2 * &d1; let network = session.network(); let next_role = session.identity(&session.own_role()?.next(3))?; @@ -142,7 +135,7 @@ pub(crate) async fn cross_mul( network .send( - NetworkValue::VecRing32(exchanged_shares_a.clone()).to_network(), + NetworkValue::RingElement32(res_a).to_network(), next_role, &session.session_id(), ) @@ -150,24 +143,11 @@ pub(crate) async fn cross_mul( let serialized_reply = network.receive(prev_role, &session.session_id()).await; let res_b = match NetworkValue::from_network(serialized_reply) { - Ok(NetworkValue::VecRing32(element)) => element, - _ => return Err(eyre!("Could not deserialize VecRing16")), + Ok(NetworkValue::RingElement32(element)) => element, + _ => return Err(eyre!("Could not deserialize RingElement32")), }; - if exchanged_shares_a.len() != res_b.len() { - return Err(eyre!( - "Expected a VecRing32 with length {:?} but received with length: {:?}", - exchanged_shares_a.len(), - res_b.len() - )); - } - // vec![D1 * T2; T2 * D1] - let mut res = Vec::with_capacity(2); - for (a_share, b_share) in exchanged_shares_a.into_iter().zip(res_b) { - res.push(Share::new(a_share, b_share)); - } - - Ok((res[0].clone(), res[1].clone())) + Ok(Share::new(res_a, res_b)) } /// Computes (d2*t1 - d1*t2) > 0. @@ -187,8 +167,7 @@ pub async fn cross_compare( d2: Share, t2: Share, ) -> eyre::Result { - let (d1t2, d2t1) = cross_mul(session, d1, t1, d2, t2).await?; - let diff = d2t1 - d1t2; + let diff = cross_mul(session, d1, t1, d2, t2).await?; // Compute bit <- MSB(D2 * T1 - D1 * T2) let bit = single_extract_msb_u32::<32>(session, diff).await?; // Open bit @@ -510,16 +489,13 @@ mod tests { ) .await .unwrap(); - ( - open_single(&player_session, out_shared.0).await.unwrap(), - open_single(&player_session, out_shared.1).await.unwrap(), - ) + + open_single(&player_session, out_shared).await.unwrap() }); } // check first party output is equal to the expected result. let t = jobs.join_next().await.unwrap().unwrap(); - assert_eq!(t.0, RingElement(4)); - assert_eq!(t.1, RingElement(6)); + assert_eq!(t, RingElement(2)); } async fn open_additive(session: &Session, x: Vec>) -> eyre::Result> { From b3f6ac048cae40c61764736dd0abe592b0ef6053 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Thu, 19 Dec 2024 16:54:12 +0100 Subject: [PATCH 142/170] Remove selecting last_modified_at from DB (#843) --- iris-mpc-store/src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/iris-mpc-store/src/lib.rs b/iris-mpc-store/src/lib.rs index ee3792448..b1c13e923 100644 --- a/iris-mpc-store/src/lib.rs +++ b/iris-mpc-store/src/lib.rs @@ -221,7 +221,8 @@ impl Store { let partition_stream = match min_last_modified_at { Some(min_last_modified_at) => sqlx::query_as::<_, StoredIris>( - "SELECT * FROM irises WHERE id BETWEEN $1 AND $2 AND last_modified_at >= $3", + "SELECT id, left_code, left_mask, right_code, right_mask FROM irises WHERE id \ + BETWEEN $1 AND $2 AND last_modified_at >= $3", ) .bind(start_id as i64) .bind(end_id as i64) @@ -229,7 +230,8 @@ impl Store { .fetch(&self.pool) .map_err(Into::into), None => sqlx::query_as::<_, StoredIris>( - "SELECT * FROM irises WHERE id BETWEEN $1 AND $2", + "SELECT id, left_code, left_mask, right_code, right_mask FROM irises WHERE id \ + BETWEEN $1 AND $2", ) .bind(start_id as i64) .bind(end_id as i64) From e6422825d2fc4992fb5d66413c6e756774caf6c4 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Thu, 19 Dec 2024 17:51:04 +0100 Subject: [PATCH 143/170] Update stage version to include last modified fix (#845) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index f99b86d94..70493f1b3 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.3" environment: stage replicaCount: 1 From 36bae71f3774a093ea5908de30f706c66bc2e5ff Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:59:27 +0100 Subject: [PATCH 144/170] chore: Run the range download test in stage (#848) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 70493f1b3..00be2f848 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.3" +image: "ghcr.io/worldcoin/iris-mpc:load-in-ranges-test" environment: stage replicaCount: 1 From e4a7d7adbf9c13bd2a0c1c9769f6b251984bcebc Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:52:04 +0100 Subject: [PATCH 145/170] chore: Enable importer in stage and disable reshare (#849) * chore: Enable importer in stage * disable reshare server * revert junk * cleanup --- deploy/stage/common-values-reshare-server.yaml | 4 ++-- deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml | 4 ++-- deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml | 4 ++-- deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/deploy/stage/common-values-reshare-server.yaml b/deploy/stage/common-values-reshare-server.yaml index 49b22eecf..fb6e6257b 100644 --- a/deploy/stage/common-values-reshare-server.yaml +++ b/deploy/stage/common-values-reshare-server.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.10.4" environment: stage -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate @@ -138,4 +138,4 @@ nginxSidecar: access_log /dev/stdout basic; } - } \ No newline at end of file + } diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index 668ca3b37..dd27a8c76 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -69,13 +69,13 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-0-stage-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 2b5ff1e4e..6a2ea49d0 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -69,13 +69,13 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-1-stage-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 33ebb55fa..52b4c1926 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -69,13 +69,13 @@ env: value: "wf-smpcv2-stage-sns-requests" - name: SMPC__ENABLE_S3_IMPORTER - value: "false" + value: "true" - name: SMPC__DB_CHUNKS_BUCKET_NAME value: "iris-mpc-db-exporter-store-node-2-stage-eu-north-1" - name: SMPC__DB_CHUNKS_FOLDER_NAME - value: "binary_output_2k" + value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM value: "32" From f0497f8d8ead30a98678480ca90c4e1bf27d14e0 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Mon, 23 Dec 2024 08:21:46 +0100 Subject: [PATCH 146/170] Add retries to fetching iris shares (#847) --- iris-mpc/src/bin/server.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index bb2abacef..efd1aa590 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -1,6 +1,7 @@ #![allow(clippy::needless_range_loop)] -use aws_sdk_s3::Client as S3Client; +use aws_config::retry::RetryConfig; +use aws_sdk_s3::{config::Builder as S3ConfigBuilder, Client as S3Client}; use aws_sdk_sns::{types::MessageAttributeValue, Client as SNSClient}; use aws_sdk_sqs::{config::Region, Client}; use axum::{response::IntoResponse, routing::get, Router}; @@ -675,7 +676,13 @@ async fn server_main(config: Config) -> eyre::Result<()> { let shared_config = aws_config::from_env().region(region_provider).load().await; let sqs_client = Client::new(&shared_config); let sns_client = SNSClient::new(&shared_config); - let s3_client = Arc::new(S3Client::new(&shared_config)); + + // Increase S3 retries to 5 + let retry_config = RetryConfig::standard().with_max_attempts(5); + let s3_config = S3ConfigBuilder::from(&shared_config) + .retry_config(retry_config) + .build(); + let s3_client = Arc::new(S3Client::from_conf(s3_config)); let s3_client_clone = Arc::clone(&s3_client); let shares_encryption_key_pair = match SharesEncryptionKeyPairs::from_storage(config.clone()).await { From b1ea94e6a635380fd3cc8540e7a1efda68830d75 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 23 Dec 2024 10:36:26 +0100 Subject: [PATCH 147/170] scale down prod for upgrade proto (#851) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index d43ada5a7..2881e7a34 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.13.2" environment: prod -replicaCount: 1 +replicaCount: 0 strategy: type: Recreate From dd8f4a758bc937dba6d033af4241c1f8b9c0305c Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Mon, 23 Dec 2024 10:39:50 +0100 Subject: [PATCH 148/170] Update SMPC iris version (#850) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 2881e7a34..bc9c94aa7 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.2" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.4" environment: prod replicaCount: 0 diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 00be2f848..13756b5a7 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:load-in-ranges-test" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.4" environment: stage replicaCount: 1 From 694f6635aebdba67b49f27325fca21df05c4edcf Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Mon, 23 Dec 2024 11:06:00 +0100 Subject: [PATCH 149/170] scale up prod after upgrade proto (#852) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index bc9c94aa7..f7ec491f5 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,7 +1,7 @@ image: "ghcr.io/worldcoin/iris-mpc:v0.13.4" environment: prod -replicaCount: 0 +replicaCount: 1 strategy: type: Recreate From 2de96583b862c5aa508af24800770b2e9a4aa6ea Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Tue, 24 Dec 2024 10:17:16 +0100 Subject: [PATCH 150/170] trigger restarting node1 prod for test (#853) --- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ad82dc5c1..9ca00f773 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -133,6 +133,9 @@ env: - name: SMPC__IMAGE_NAME value: $(IMAGE_NAME) + - name: DUMMY_TRIGGER_RESTART + value: "true" + initContainer: enabled: true image: "amazon/aws-cli:2.17.62" From bddb82e66983526df5c5082dfdcbe5bec5d2d3ec Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Tue, 24 Dec 2024 03:10:30 -0800 Subject: [PATCH 151/170] load chunk in smaller ranges (#846) * load chunk in smaller ranges * load stage db with 800k entries * bump startup probe * test new image * change stage image * avoid prod restart --------- Co-authored-by: Ertugrul Aypek --- .../workflows/temp-branch-build-and-push.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 4 +- .../stage/smpcv2-0-stage/values-iris-mpc.yaml | 2 +- .../stage/smpcv2-1-stage/values-iris-mpc.yaml | 2 +- .../stage/smpcv2-2-stage/values-iris-mpc.yaml | 2 +- iris-mpc-store/src/s3_importer.rs | 120 +++++++++++------- 6 files changed, 78 insertions(+), 54 deletions(-) diff --git a/.github/workflows/temp-branch-build-and-push.yaml b/.github/workflows/temp-branch-build-and-push.yaml index 461118538..c4d88f920 100644 --- a/.github/workflows/temp-branch-build-and-push.yaml +++ b/.github/workflows/temp-branch-build-and-push.yaml @@ -3,7 +3,7 @@ name: Branch - Build and push docker image on: push: branches: - - "ertugrul/change-to-binary" + - "ps/feat/load-in-ranges" concurrency: group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 13756b5a7..15447d9d9 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.5" environment: stage replicaCount: 1 @@ -27,7 +27,7 @@ readinessProbe: startupProbe: initialDelaySeconds: 60 - failureThreshold: 40 + failureThreshold: 60 periodSeconds: 30 httpGet: path: /ready diff --git a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml index dd27a8c76..111e913cc 100644 --- a/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-0-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "1000" + value: "800000" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml index 6a2ea49d0..f568b95e2 100644 --- a/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-1-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "1000" + value: "800000" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml index 52b4c1926..c9a114dbd 100644 --- a/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml +++ b/deploy/stage/smpcv2-2-stage/values-iris-mpc.yaml @@ -84,7 +84,7 @@ env: value: "true" - name: SMPC__INIT_DB_SIZE - value: "1000" + value: "800000" - name: SMPC__MAX_DB_SIZE value: "1000000" diff --git a/iris-mpc-store/src/s3_importer.rs b/iris-mpc-store/src/s3_importer.rs index d7c1760b4..75693fafe 100644 --- a/iris-mpc-store/src/s3_importer.rs +++ b/iris-mpc-store/src/s3_importer.rs @@ -18,9 +18,11 @@ const SINGLE_ELEMENT_SIZE: usize = IRIS_CODE_LENGTH * mem::size_of::() * 2 + MASK_CODE_LENGTH * mem::size_of::() * 2 + mem::size_of::(); // 75 KB +const MAX_RANGE_SIZE: usize = 200; // Download chunks in sub-chunks of 200 elements = 15 MB + #[async_trait] pub trait ObjectStore: Send + Sync + 'static { - async fn get_object(&self, key: &str) -> eyre::Result; + async fn get_object(&self, key: &str, range: (usize, usize)) -> eyre::Result; async fn list_objects(&self, prefix: &str) -> eyre::Result>; } @@ -37,12 +39,13 @@ impl S3Store { #[async_trait] impl ObjectStore for S3Store { - async fn get_object(&self, key: &str) -> eyre::Result { + async fn get_object(&self, key: &str, range: (usize, usize)) -> eyre::Result { let res = self .client .get_object() .bucket(&self.bucket) .key(key) + .range(format!("bytes={}-{}", range.0, range.1 - 1)) .send() .await?; @@ -134,59 +137,74 @@ pub async fn fetch_and_parse_chunks( last_snapshot_details: LastSnapshotDetails, ) -> Pin> + Send + '_>> { tracing::info!("Generating chunk files using: {:?}", last_snapshot_details); - let chunks: Vec = (1..=last_snapshot_details.last_serial_id) - .step_by(last_snapshot_details.chunk_size as usize) - .map(|num| format!("{}/{}.bin", prefix_name, num)) - .collect(); - tracing::info!("Generated {} chunk names", chunks.len()); + let range_size = if last_snapshot_details.chunk_size as usize > MAX_RANGE_SIZE { + MAX_RANGE_SIZE + } else { + last_snapshot_details.chunk_size as usize + }; let total_bytes = Arc::new(AtomicUsize::new(0)); let now = Instant::now(); - let result_stream = stream::iter(chunks) - .map({ - let total_bytes_clone = total_bytes.clone(); - move |chunk| { - let counter = total_bytes_clone.clone(); - async move { - let mut object_stream = store.get_object(&chunk).await?.into_async_read(); - let mut records = Vec::with_capacity(last_snapshot_details.chunk_size as usize); - let mut buf = vec![0u8; SINGLE_ELEMENT_SIZE]; - loop { - match object_stream.read_exact(&mut buf).await { - Ok(_) => { - let iris = StoredIris::from_bytes(&buf); - records.push(iris); - counter.fetch_add(SINGLE_ELEMENT_SIZE, Ordering::Relaxed); + let result_stream = + stream::iter((1..=last_snapshot_details.last_serial_id).step_by(range_size)) + .map({ + let total_bytes_clone = total_bytes.clone(); + move |chunk| { + let counter = total_bytes_clone.clone(); + let prefix_name = prefix_name.clone(); + async move { + let chunk_id = (chunk / last_snapshot_details.chunk_size) + * last_snapshot_details.chunk_size + + 1; + let offset_within_chunk = (chunk - chunk_id) as usize; + let mut object_stream = store + .get_object( + &format!("{}/{}.bin", prefix_name, chunk_id), + ( + offset_within_chunk * SINGLE_ELEMENT_SIZE, + (offset_within_chunk + range_size) * SINGLE_ELEMENT_SIZE, + ), + ) + .await? + .into_async_read(); + let mut records = Vec::with_capacity(range_size); + let mut buf = vec![0u8; SINGLE_ELEMENT_SIZE]; + loop { + match object_stream.read_exact(&mut buf).await { + Ok(_) => { + let iris = StoredIris::from_bytes(&buf); + records.push(iris); + counter.fetch_add(SINGLE_ELEMENT_SIZE, Ordering::Relaxed); + } + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break, + Err(e) => return Err(e.into()), } - Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e.into()), } + Ok::<_, eyre::Error>(stream::iter(records)) } - Ok::<_, eyre::Error>(stream::iter(records)) } - } - }) - .buffer_unordered(concurrency) - .flat_map(|result| match result { - Ok(stream) => stream.boxed(), - Err(e) => stream::once(async move { Err(e) }).boxed(), - }) - .inspect({ - let counter = Arc::new(AtomicUsize::new(0)); - move |_| { - if counter.fetch_add(1, Ordering::Relaxed) % 1_000_000 == 0 { - let elapsed = now.elapsed().as_secs_f32(); - if elapsed > 0.0 { - let bytes = total_bytes.load(Ordering::Relaxed); - tracing::info!( - "Current download throughput: {:.2} Gbps", - bytes as f32 * 8.0 / 1e9 / elapsed - ); + }) + .buffer_unordered(concurrency) + .flat_map(|result| match result { + Ok(stream) => stream.boxed(), + Err(e) => stream::once(async move { Err(e) }).boxed(), + }) + .inspect({ + let counter = Arc::new(AtomicUsize::new(0)); + move |_| { + if counter.fetch_add(1, Ordering::Relaxed) % 1_000_000 == 0 { + let elapsed = now.elapsed().as_secs_f32(); + if elapsed > 0.0 { + let bytes = total_bytes.load(Ordering::Relaxed); + tracing::info!( + "Current download throughput: {:.2} Gbps", + bytes as f32 * 8.0 / 1e9 / elapsed + ); + } } } - } - }) - .boxed(); + }) + .boxed(); result_stream } @@ -227,13 +245,19 @@ mod tests { #[async_trait] impl ObjectStore for MockStore { - async fn get_object(&self, key: &str) -> eyre::Result { + async fn get_object(&self, key: &str, range: (usize, usize)) -> eyre::Result { let bytes = self .objects .get(key) .cloned() .ok_or_else(|| eyre::eyre!("Object not found: {}", key))?; - Ok(ByteStream::from(SdkBody::from(bytes))) + + // Handle the range parameter by slicing the bytes + let start = range.0; + let end = range.1.min(bytes.len()); + let sliced_bytes = bytes[start..end].to_vec(); + + Ok(ByteStream::from(SdkBody::from(sliced_bytes))) } async fn list_objects(&self, _: &str) -> eyre::Result> { From e9f5196c15f24b1122c72b70f032da160c312864 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Tue, 24 Dec 2024 12:25:34 +0100 Subject: [PATCH 152/170] release v0.13.5 to prod (#854) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index f7ec491f5..746cdc946 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.4" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.5" environment: prod replicaCount: 1 diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 9ca00f773..ad82dc5c1 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -133,9 +133,6 @@ env: - name: SMPC__IMAGE_NAME value: $(IMAGE_NAME) - - name: DUMMY_TRIGGER_RESTART - value: "true" - initContainer: enabled: true image: "amazon/aws-cli:2.17.62" From 1aad7084e09d1175ca75065456c1c09c9cd44fc1 Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 25 Dec 2024 09:47:46 +0100 Subject: [PATCH 153/170] try 64 import parallelism in prod (#855) try 64 parallelism in prod --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index f2edcc443..7a9a98262 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index ad82dc5c1..0aa43b3e9 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index 8b2f06d7b..ff7546fe5 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "32" + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From d7f21964375fa6f735849ec39fd8d54c948e1afa Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 25 Dec 2024 10:21:18 +0100 Subject: [PATCH 154/170] try 128 load parallelism in prod (#856) --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 2 +- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 7a9a98262..066e5bf72 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "128" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 0aa43b3e9..3d8259208 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "128" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index ff7546fe5..bb7094dd7 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -87,7 +87,7 @@ env: value: "binary_output_16k" - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "64" + value: "128" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 68d965450482cf21db1b979238e42b22f348e9de Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Wed, 25 Dec 2024 20:31:56 +0100 Subject: [PATCH 155/170] 64 s3 load parallelism + 8 aurora load parallelism in prod (#857) stick to 64 load parallelism in prod --- deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml | 6 +++--- deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml | 6 +++--- deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml index 066e5bf72..a22402550 100644 --- a/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-0-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "80" + value: "8" - name: SMPC__AWS__REGION value: "eu-north-1" @@ -86,8 +86,8 @@ env: - name: SMPC__DB_CHUNKS_FOLDER_NAME value: "binary_output_16k" - - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "128" + - name: SMPC__DATABASE__LOAD_PARALLELISM + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml index 3d8259208..e4aae1f37 100644 --- a/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-1-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "80" + value: "8" - name: SMPC__AWS__REGION value: "eu-north-1" @@ -86,8 +86,8 @@ env: - name: SMPC__DB_CHUNKS_FOLDER_NAME value: "binary_output_16k" - - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "128" + - name: SMPC__DATABASE__LOAD_PARALLELISM + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" diff --git a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml index bb7094dd7..6f35d9131 100644 --- a/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml +++ b/deploy/prod/smpcv2-2-prod/values-iris-mpc.yaml @@ -36,7 +36,7 @@ env: value: "true" - name: SMPC__DATABASE__LOAD_PARALLELISM - value: "80" + value: "8" - name: SMPC__AWS__REGION value: "eu-north-1" @@ -86,8 +86,8 @@ env: - name: SMPC__DB_CHUNKS_FOLDER_NAME value: "binary_output_16k" - - name: SMPC__LOAD_CHUNKS_PARALLELISM - value: "128" + - name: SMPC__DATABASE__LOAD_PARALLELISM + value: "64" - name: SMPC__CLEAR_DB_BEFORE_INIT value: "true" From 4d69c9d7f15689f41aae3a6ef1541da6ddde75eb Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 27 Dec 2024 01:39:49 -0800 Subject: [PATCH 156/170] generate keys on startup for e2es (#858) * generate keys on startup for e2es * typos --- deploy/e2e/iris-mpc-0.yaml.tpl | 7 +++++-- deploy/e2e/iris-mpc-1.yaml.tpl | 6 +++++- deploy/e2e/iris-mpc-2.yaml.tpl | 6 +++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index b100a56e2..5e1abb843 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -40,7 +40,6 @@ iris-mpc-0: seccompProfile: type: RuntimeDefault - resources: limits: cpu: 31 @@ -190,7 +189,7 @@ iris-mpc-0: initContainer: enabled: true - image: "amazon/aws-cli:2.17.62" + image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image name: "iris-mpc-0-copy-cuda-libs" env: - name: PARTY_ID @@ -203,6 +202,10 @@ iris-mpc-0: name: "iris-mpc-0-init" init.sh: | #!/usr/bin/env bash + apt-update && apt install -y awscli cd /libs aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . + + key-manager --node-id 0 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys + diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index f9e834bf6..a7f441059 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -190,7 +190,7 @@ iris-mpc-1: initContainer: enabled: true - image: "amazon/aws-cli:2.17.62" + image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image name: "iris-mpc-1-copy-cuda-libs" env: - name: PARTY_ID @@ -203,6 +203,10 @@ iris-mpc-1: name: "iris-mpc-1-init" init.sh: | #!/usr/bin/env bash + apt-update && apt install -y awscli cd /libs aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . + + key-manager --node-id 1 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys + diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 01345730f..d06064df1 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -190,7 +190,7 @@ iris-mpc-2: initContainer: enabled: true - image: "amazon/aws-cli:2.17.62" + image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image name: "iris-mpc-2-copy-cuda-libs" env: - name: PARTY_ID @@ -203,6 +203,10 @@ iris-mpc-2: name: "iris-mpc-2-init" init.sh: | #!/usr/bin/env bash + apt-update && apt install -y awscli cd /libs aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . + + key-manager --node-id 2 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys + From e6997569056acbe0f94c2b77672d271383b86aff Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 27 Dec 2024 03:57:29 -0800 Subject: [PATCH 157/170] fix apt command (#859) * fix apt command * fix s3 bucket url --- Dockerfile.nocuda | 5 +++-- deploy/e2e/iris-mpc-0.yaml.tpl | 4 ++-- deploy/e2e/iris-mpc-1.yaml.tpl | 2 +- deploy/e2e/iris-mpc-2.yaml.tpl | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Dockerfile.nocuda b/Dockerfile.nocuda index 22585d42d..13326ab73 100644 --- a/Dockerfile.nocuda +++ b/Dockerfile.nocuda @@ -29,12 +29,12 @@ FROM --platform=linux/amd64 build-image as build-app WORKDIR /src/gpu-iris-mpc COPY . . -RUN cargo build --release --target x86_64-unknown-linux-gnu --bin seed-v1-dbs --bin upgrade-server --bin upgrade-client --bin upgrade-checker --bin reshare-server +RUN cargo build --release --target x86_64-unknown-linux-gnu --bin seed-v1-dbs --bin upgrade-server --bin upgrade-client --bin upgrade-checker --bin reshare-server --bin key-manager FROM --platform=linux/amd64 ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y ca-certificates +RUN apt-get update && apt-get install -y ca-certificates awscli COPY certs /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -43,6 +43,7 @@ COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/ COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-client /bin/upgrade-client COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/upgrade-checker /bin/upgrade-checker COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/reshare-server /bin/reshare-server +COPY --from=build-app /src/gpu-iris-mpc/target/x86_64-unknown-linux-gnu/release/key-manager /bin/key-manager USER 65534 ENTRYPOINT ["/bin/upgrade-server"] diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 5e1abb843..40c53bde5 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -141,7 +141,7 @@ iris-mpc-0: value: "0" - name: SMPC__PUBLIC_KEY_BASE_URL - value: "https://pki-smpcv2-stage.worldcoin.org" + value: "http://wf-$ENV-stage-public-keys.s3.localhost.localstack.cloud:4566" - name: SMPC__ENABLE_S3_IMPORTER value: "false" @@ -202,7 +202,7 @@ iris-mpc-0: name: "iris-mpc-0-init" init.sh: | #!/usr/bin/env bash - apt-update && apt install -y awscli + apt update && apt install -y awscli cd /libs aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index a7f441059..ceeb86636 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -142,7 +142,7 @@ iris-mpc-1: value: "1" - name: SMPC__PUBLIC_KEY_BASE_URL - value: "https://pki-smpcv2-stage.worldcoin.org" + value: "http://wf-$ENV-stage-public-keys.s3.localhost.localstack.cloud:4566" - name: SMPC__ENABLE_S3_IMPORTER value: "false" diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index d06064df1..30f37b93c 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -142,7 +142,7 @@ iris-mpc-2: value: "2" - name: SMPC__PUBLIC_KEY_BASE_URL - value: "https://pki-smpcv2-stage.worldcoin.org" + value: "http://wf-$ENV-stage-public-keys.s3.localhost.localstack.cloud:4566" - name: SMPC__ENABLE_S3_IMPORTER value: "false" @@ -203,7 +203,7 @@ iris-mpc-2: name: "iris-mpc-2-init" init.sh: | #!/usr/bin/env bash - apt-update && apt install -y awscli + apt update && apt install -y awscli cd /libs aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . From 2440208e2be794e324533123fc6e2eb409d303f5 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 27 Dec 2024 08:41:54 -0800 Subject: [PATCH 158/170] configurable region for key-manager (#862) * configurable region for key-manager * remove leftover --- deploy/e2e/iris-mpc-0.yaml.tpl | 9 ++++++--- deploy/e2e/iris-mpc-1.yaml.tpl | 8 +++++--- deploy/e2e/iris-mpc-2.yaml.tpl | 8 +++++--- iris-mpc-common/src/bin/README.md | 24 ++++++++++++++++++++++++ iris-mpc-common/src/bin/key_manager.rs | 20 ++++++++++++++------ 5 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 iris-mpc-common/src/bin/README.md diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 40c53bde5..92888e28c 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -141,7 +141,7 @@ iris-mpc-0: value: "0" - name: SMPC__PUBLIC_KEY_BASE_URL - value: "http://wf-$ENV-stage-public-keys.s3.localhost.localstack.cloud:4566" + value: "http://wf-$ENV-public-keys.s3.localhost.localstack.cloud:4566" - name: SMPC__ENABLE_S3_IMPORTER value: "false" @@ -202,10 +202,13 @@ iris-mpc-0: name: "iris-mpc-0-init" init.sh: | #!/usr/bin/env bash - apt update && apt install -y awscli + set -e + cd /libs + aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - key-manager --node-id 0 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys + AWS_ENDPOINT="http://localstack:4566" key-manager --node-id 0 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys --region $AWS_REGION + diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index ceeb86636..84a5a5516 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -142,7 +142,7 @@ iris-mpc-1: value: "1" - name: SMPC__PUBLIC_KEY_BASE_URL - value: "http://wf-$ENV-stage-public-keys.s3.localhost.localstack.cloud:4566" + value: "http://wf-$ENV-public-keys.s3.localhost.localstack.cloud:4566" - name: SMPC__ENABLE_S3_IMPORTER value: "false" @@ -203,10 +203,12 @@ iris-mpc-1: name: "iris-mpc-1-init" init.sh: | #!/usr/bin/env bash - apt-update && apt install -y awscli + set -e + cd /libs + aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - key-manager --node-id 1 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys + key-manager --node-id 1 --env $ENV rotate --public-key-bucket-name wf-$ENV-public-keys diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 30f37b93c..c58cd76ec 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -142,7 +142,7 @@ iris-mpc-2: value: "2" - name: SMPC__PUBLIC_KEY_BASE_URL - value: "http://wf-$ENV-stage-public-keys.s3.localhost.localstack.cloud:4566" + value: "http://wf-$ENV-public-keys.s3.localhost.localstack.cloud:4566" - name: SMPC__ENABLE_S3_IMPORTER value: "false" @@ -203,10 +203,12 @@ iris-mpc-2: name: "iris-mpc-2-init" init.sh: | #!/usr/bin/env bash - apt update && apt install -y awscli + set -e + cd /libs + aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - key-manager --node-id 2 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys + key-manager --node-id 2 --env $ENV rotate --public-key-bucket-name wf-$ENV-public-keys diff --git a/iris-mpc-common/src/bin/README.md b/iris-mpc-common/src/bin/README.md new file mode 100644 index 000000000..a6ef7c67a --- /dev/null +++ b/iris-mpc-common/src/bin/README.md @@ -0,0 +1,24 @@ +# Key Manager CLI + +The Key Manager CLI is a command line interface to rotate public and private keys used to encode shares. +The initial private key is generated using `smpc-setup`, and it is empty. + +Key manager must be run from each of the participant accounts at least once before initiating the protocol. + +Keys can be rotated at any time using the `rotate` command. + +## Usage + +```bash +>>> key-manager --node-id 2 --env prod rotate --public-key-bucket-name wf-env-stage-public-keys +``` + +This will: + +1. Update the public key in the bucket `wf-env-stage-public-keys` for node 2. +2. Generate a new private key and store aws secrets manager under the secret name: `prod/iris-mpc/ecdh-private-key-2` + +This key will be immediately valid, though the previous key will retain a validity of 24 hours (dictated by the cloudfront caching behavior, +and by application logic that checks against AWSCURRENT and AWSPREVIOUS version of the secret). + + diff --git a/iris-mpc-common/src/bin/key_manager.rs b/iris-mpc-common/src/bin/key_manager.rs index a11398756..afc0cc1e6 100644 --- a/iris-mpc-common/src/bin/key_manager.rs +++ b/iris-mpc-common/src/bin/key_manager.rs @@ -15,9 +15,7 @@ use sodiumoxide::crypto::box_::{curve25519xsalsa20poly1305, PublicKey, SecretKey const PUBLIC_KEY_S3_BUCKET_NAME: &str = "wf-smpcv2-stage-public-keys"; const PUBLIC_KEY_S3_KEY_NAME_PREFIX: &str = "public-key"; -const REGION: &str = "eu-north-1"; -/// A fictional versioning CLI #[derive(Debug, Parser)] // requires `derive` feature #[command(name = "key-manager")] #[command(about = "Key manager CLI", long_about = None)] @@ -32,6 +30,9 @@ struct KeyManagerCli { #[arg(short, long, env, default_value = "stage")] env: String, + + #[arg(short, long, env, default_value = "eu-north-1")] + region: String, } #[derive(Debug, Subcommand)] @@ -67,8 +68,9 @@ async fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); let args = KeyManagerCli::parse(); + let region = args.region; - let region_provider = S3Region::new(REGION); + let region_provider = S3Region::new(region.clone()); let shared_config = aws_config::from_env().region(region_provider).load().await; let bucket_key_name = format!("{}-{}", PUBLIC_KEY_S3_KEY_NAME_PREFIX, args.node_id); @@ -101,6 +103,7 @@ async fn main() -> eyre::Result<()> { b64_pub_key, &bucket_key_name, public_key_bucket_name, + region.clone(), ) .await?; } @@ -115,6 +118,7 @@ async fn validate_keys( b64_pub_key: Option, bucket_key_name: &str, public_key_bucket_name: Option, + region: String, ) -> eyre::Result<()> { let sm_client = SecretsManagerClient::new(sdk_config); @@ -133,7 +137,7 @@ async fn validate_keys( } else { // Otherwise, get the latest one from S3 using HTTPS let user_pubkey_string = - download_key_from_s3(bucket_name.as_str(), bucket_key_name).await?; + download_key_from_s3(bucket_name.as_str(), bucket_key_name, region.clone()).await?; let user_pubkey = STANDARD.decode(user_pubkey_string.as_bytes()).unwrap(); match PublicKey::from_slice(&user_pubkey) { Some(key) => key, @@ -231,9 +235,13 @@ async fn rotate_keys( Ok(()) } -async fn download_key_from_s3(bucket: &str, key: &str) -> Result { +async fn download_key_from_s3( + bucket: &str, + key: &str, + region: String, +) -> Result { print!("Downloading key from S3 bucket: {} key: {}", bucket, key); - let s3_url = format!("https://{}.s3.{}.amazonaws.com/{}", bucket, REGION, key); + let s3_url = format!("https://{}.s3.{}.amazonaws.com/{}", bucket, region, key); let client = Client::new(); let response = client.get(&s3_url).send().await?.text().await?; Ok(response) From 79147068f7b1d758b2342166501c3054c0a88ff5 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 27 Dec 2024 09:17:08 -0800 Subject: [PATCH 159/170] fix localstack variables (#863) * fix localstack variables * typo --- deploy/e2e/iris-mpc-0.yaml.tpl | 9 +++++++-- deploy/e2e/iris-mpc-1.yaml.tpl | 9 +++++++-- deploy/e2e/iris-mpc-2.yaml.tpl | 9 +++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 92888e28c..ea652b8fc 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -121,7 +121,10 @@ iris-mpc-0: value: "8" - name: SMPC__AWS__REGION - value: "eu-north-1" + value: "$AWS_REGION" + + - name: SMPC__AWS__ENDPOINT + value: "http://localstack:4566" - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" @@ -192,6 +195,8 @@ iris-mpc-0: image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image name: "iris-mpc-0-copy-cuda-libs" env: + - name: AWS_REGION + value: "$AWS_REGION" - name: PARTY_ID value: "1" - name: MY_NODE_IP @@ -209,6 +214,6 @@ iris-mpc-0: aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - AWS_ENDPOINT="http://localstack:4566" key-manager --node-id 0 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys --region $AWS_REGION + AWS_ENDPOINT_URL="http://localstack:4566" key-manager --node-id 0 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys --region $AWS_REGION diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 84a5a5516..3d4ae3e73 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -121,7 +121,10 @@ iris-mpc-1: value: "8" - name: SMPC__AWS__REGION - value: "eu-north-1" + value: "$AWS_REGION" + + - name: SMPC__AWS__ENDPOINT + value: "http://localstack:4566" - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" @@ -193,6 +196,8 @@ iris-mpc-1: image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image name: "iris-mpc-1-copy-cuda-libs" env: + - name: AWS_REGION + value: "$AWS_REGION" - name: PARTY_ID value: "2" - name: MY_NODE_IP @@ -210,5 +215,5 @@ iris-mpc-1: aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - key-manager --node-id 1 --env $ENV rotate --public-key-bucket-name wf-$ENV-public-keys + AWS_ENDPOINT_URL="http://localstack:4566" key-manager --node-id 1 --env $ENV --region $AWS_REGION rotate --public-key-bucket-name wf-$ENV-public-keys diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index c58cd76ec..ccc51ece2 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -121,7 +121,10 @@ iris-mpc-2: value: "8" - name: SMPC__AWS__REGION - value: "eu-north-1" + value: "$AWS_REGION" + + - name: SMPC__AWS__ENDPOINT + value: "http://localstack:4566" - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" @@ -193,6 +196,8 @@ iris-mpc-2: image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image name: "iris-mpc-2-copy-cuda-libs" env: + - name: AWS_REGION + value: "$AWS_REGION" - name: PARTY_ID value: "3" - name: MY_NODE_IP @@ -210,5 +215,5 @@ iris-mpc-2: aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - key-manager --node-id 2 --env $ENV rotate --public-key-bucket-name wf-$ENV-public-keys + AWS_ENDPOINT_URL="http://localstack:4566" key-manager --node-id 2 --env $ENV --region $AWS_REGION rotate --public-key-bucket-name wf-$ENV-public-keys From fdaee46f5cfb32353295a2ea32df578872ac37f1 Mon Sep 17 00:00:00 2001 From: Carlo Mazzaferro Date: Fri, 27 Dec 2024 11:41:40 -0800 Subject: [PATCH 160/170] use custom aws endpoint (#864) * use custom aws endpoint * update sha --- deploy/e2e/iris-mpc-0.yaml.tpl | 7 ++++--- deploy/e2e/iris-mpc-1.yaml.tpl | 6 +++--- deploy/e2e/iris-mpc-2.yaml.tpl | 6 +++--- iris-mpc-common/src/bin/key_manager.rs | 29 +++++++++++++++++++++++--- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index ea652b8fc..a74a18111 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -139,7 +139,8 @@ iris-mpc-0: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' + value: '["arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000002"]' + - name: SMPC__PARTY_ID value: "0" @@ -192,7 +193,7 @@ iris-mpc-0: initContainer: enabled: true - image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image + image: "ghcr.io/worldcoin/iris-mpc:2694d8cbb37c278ed84951ef9aac3af47b21f146" # no-cuda image name: "iris-mpc-0-copy-cuda-libs" env: - name: AWS_REGION @@ -214,6 +215,6 @@ iris-mpc-0: aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - AWS_ENDPOINT_URL="http://localstack:4566" key-manager --node-id 0 --env $ENV rotate --public-key-bucket-name wf-$ENV-stage-public-keys --region $AWS_REGION + key-manager --node-id 0 --env $ENV --endpoint-url "http://localstack:4566" rotate --public-key-bucket-name wf-$ENV-stage-public-keys --region $AWS_REGION diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 3d4ae3e73..857776162 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -139,7 +139,7 @@ iris-mpc-1: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' + value: '["arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "1" @@ -193,7 +193,7 @@ iris-mpc-1: initContainer: enabled: true - image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image + image: "ghcr.io/worldcoin/iris-mpc:2694d8cbb37c278ed84951ef9aac3af47b21f146" # no-cuda image name: "iris-mpc-1-copy-cuda-libs" env: - name: AWS_REGION @@ -215,5 +215,5 @@ iris-mpc-1: aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - AWS_ENDPOINT_URL="http://localstack:4566" key-manager --node-id 1 --env $ENV --region $AWS_REGION rotate --public-key-bucket-name wf-$ENV-public-keys + key-manager --node-id 1 --env $ENV --region $AWS_REGION --endpoint-url "http://localstack:4566" rotate --public-key-bucket-name wf-$ENV-public-keys diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index ccc51ece2..3db2a986b 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -139,7 +139,7 @@ iris-mpc-2: value: "/data/" - name: SMPC__KMS_KEY_ARNS - value: '["arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:eu-north-1:000000000000:key/00000000-0000-0000-0000-000000000002"]' + value: '["arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000000","arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000001","arn:aws:kms:$AWS_REGION:000000000000:key/00000000-0000-0000-0000-000000000002"]' - name: SMPC__PARTY_ID value: "2" @@ -193,7 +193,7 @@ iris-mpc-2: initContainer: enabled: true - image: "ghcr.io/worldcoin/iris-mpc:146c2cae43dbeb586144d9d37d152a6b2bfacdd4" # no-cuda image + image: "ghcr.io/worldcoin/iris-mpc:2694d8cbb37c278ed84951ef9aac3af47b21f146" # no-cuda image name: "iris-mpc-2-copy-cuda-libs" env: - name: AWS_REGION @@ -215,5 +215,5 @@ iris-mpc-2: aws s3 cp s3://wf-smpcv2-stage-libs/libcublas.so.12.2.5.6 . aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . - AWS_ENDPOINT_URL="http://localstack:4566" key-manager --node-id 2 --env $ENV --region $AWS_REGION rotate --public-key-bucket-name wf-$ENV-public-keys + key-manager --node-id 2 --env $ENV --region $AWS_REGION --endpoint-url "http://localstack:4566" rotate --public-key-bucket-name wf-$ENV-public-keys diff --git a/iris-mpc-common/src/bin/key_manager.rs b/iris-mpc-common/src/bin/key_manager.rs index afc0cc1e6..bc8346f1b 100644 --- a/iris-mpc-common/src/bin/key_manager.rs +++ b/iris-mpc-common/src/bin/key_manager.rs @@ -33,6 +33,9 @@ struct KeyManagerCli { #[arg(short, long, env, default_value = "eu-north-1")] region: String, + + #[arg(short, long, env, default_value = None)] + endpoint_url: Option, } #[derive(Debug, Subcommand)] @@ -88,6 +91,7 @@ async fn main() -> eyre::Result<()> { &private_key_secret_id, dry_run, public_key_bucket_name, + args.endpoint_url, ) .await?; } @@ -104,6 +108,7 @@ async fn main() -> eyre::Result<()> { &bucket_key_name, public_key_bucket_name, region.clone(), + args.endpoint_url, ) .await?; } @@ -111,6 +116,7 @@ async fn main() -> eyre::Result<()> { Ok(()) } +#[allow(clippy::too_many_arguments)] async fn validate_keys( sdk_config: &SdkConfig, secret_id: &str, @@ -119,8 +125,15 @@ async fn validate_keys( bucket_key_name: &str, public_key_bucket_name: Option, region: String, + endpoint_url: Option, ) -> eyre::Result<()> { - let sm_client = SecretsManagerClient::new(sdk_config); + let mut sm_config_builder = aws_sdk_secretsmanager::config::Builder::from(sdk_config); + + if let Some(endpoint_url) = endpoint_url.as_ref() { + sm_config_builder = sm_config_builder.endpoint_url(endpoint_url); + } + + let sm_client = SecretsManagerClient::from_conf(sm_config_builder.build()); let bucket_name = if let Some(bucket_name) = public_key_bucket_name { bucket_name @@ -160,6 +173,7 @@ async fn rotate_keys( private_key_secret_id: &str, dry_run: Option, public_key_bucket_name: Option, + endpoint_url: Option, ) -> eyre::Result<()> { let mut rng = thread_rng(); @@ -173,8 +187,17 @@ async fn rotate_keys( rng.fill(&mut seedbuf); let pk_seed = Seed(seedbuf); - let s3_client = S3Client::new(sdk_config); - let sm_client = SecretsManagerClient::new(sdk_config); + let mut s3_config_builder = aws_sdk_s3::config::Builder::from(sdk_config); + let mut sm_config_builder = aws_sdk_secretsmanager::config::Builder::from(sdk_config); + + if let Some(endpoint_url) = endpoint_url.as_ref() { + s3_config_builder = s3_config_builder.endpoint_url(endpoint_url); + s3_config_builder = s3_config_builder.force_path_style(true); + sm_config_builder = sm_config_builder.endpoint_url(endpoint_url); + } + + let s3_client = S3Client::from_conf(s3_config_builder.build()); + let sm_client = SecretsManagerClient::from_conf(sm_config_builder.build()); let (public_key, private_key) = generate_key_pairs(pk_seed); let pub_key_str = STANDARD.encode(public_key); From 6d5b2f260babaf2b5368bde3e90f8704916e3b39 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 28 Dec 2024 09:31:46 -0800 Subject: [PATCH 161/170] first alloc, then register (#860) * first alloc, then register * both limbs * don't free mem * register in tests * fix * . * trigger image build * deploy test image to stage --------- Co-authored-by: Ertugrul Aypek --- .../workflows/temp-branch-build-and-push.yaml | 2 +- Cargo.lock | 10 ++++ Cargo.toml | 1 + deploy/stage/common-values-iris-mpc.yaml | 2 +- iris-mpc-gpu/Cargo.toml | 1 + iris-mpc-gpu/src/dot/share_db.rs | 49 +++++++++++++++---- iris-mpc-gpu/src/server/actor.rs | 15 ++++++ iris-mpc-gpu/tests/e2e.rs | 3 ++ iris-mpc/src/bin/server.rs | 3 ++ 9 files changed, 75 insertions(+), 11 deletions(-) diff --git a/.github/workflows/temp-branch-build-and-push.yaml b/.github/workflows/temp-branch-build-and-push.yaml index c4d88f920..678d37592 100644 --- a/.github/workflows/temp-branch-build-and-push.yaml +++ b/.github/workflows/temp-branch-build-and-push.yaml @@ -3,7 +3,7 @@ name: Branch - Build and push docker image on: push: branches: - - "ps/feat/load-in-ranges" + - "ps/host-mem-alloc" concurrency: group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' diff --git a/Cargo.lock b/Cargo.lock index fee4658d5..89face5ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2785,6 +2785,7 @@ dependencies = [ "hex", "iris-mpc-common", "itertools 0.13.0", + "memmap2", "metrics 0.22.3", "metrics-exporter-statsd 0.7.0", "ndarray", @@ -3142,6 +3143,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.9.1" diff --git a/Cargo.toml b/Cargo.toml index 6e16547f3..765c0c9a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ hawk-pack = { git = "https://github.com/Inversed-Tech/hawk-pack.git", rev = "ba9 hex = "0.4.3" itertools = "0.13" num-traits = "0.2" +memmap2 = "0.9.5" serde = { version = "1.0", features = ["derive"] } serde-big-array = "0.5.1" serde_json = "1" diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 15447d9d9..39e7a9b19 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.5" +image: "ghcr.io/worldcoin/iris-mpc:6b358589c25ef528f58ba02980103670b037a614" environment: stage replicaCount: 1 diff --git a/iris-mpc-gpu/Cargo.toml b/iris-mpc-gpu/Cargo.toml index 9ee89c19e..da1c791c4 100644 --- a/iris-mpc-gpu/Cargo.toml +++ b/iris-mpc-gpu/Cargo.toml @@ -31,6 +31,7 @@ iris-mpc-common = { path = "../iris-mpc-common" } base64 = "0.22.1" metrics = "0.22.1" metrics-exporter-statsd = "0.7" +memmap2.workspace = true [dev-dependencies] criterion = "0.5" diff --git a/iris-mpc-gpu/src/dot/share_db.rs b/iris-mpc-gpu/src/dot/share_db.rs index c7e8eeb0f..865a4c1ad 100644 --- a/iris-mpc-gpu/src/dot/share_db.rs +++ b/iris-mpc-gpu/src/dot/share_db.rs @@ -21,17 +21,18 @@ use cudarc::{ }, driver::{ result::{self, malloc_async}, - sys::CUdeviceptr, + sys::{CUdeviceptr, CU_MEMHOSTALLOC_PORTABLE}, CudaFunction, CudaSlice, CudaStream, CudaView, DevicePtr, DeviceSlice, LaunchAsync, }, nccl, nvrtc::compile_ptx, }; use itertools::{izip, Itertools}; +use memmap2::MmapMut; use rayon::prelude::*; use std::{ ffi::{c_void, CStr}, - mem, + mem::{self, forget}, sync::Arc, }; @@ -243,17 +244,23 @@ impl ShareDB { .devices() .iter() .map(|device| unsafe { - let mut host_mem0: *mut c_void = std::ptr::null_mut(); - let mut host_mem1: *mut c_void = std::ptr::null_mut(); - let _ = cudarc::driver::sys::lib() - .cuMemAllocHost_v2(&mut host_mem0, max_size * self.code_length); - let _ = cudarc::driver::sys::lib() - .cuMemAllocHost_v2(&mut host_mem1, max_size * self.code_length); + let host_mem0 = MmapMut::map_anon(max_size * self.code_length).unwrap(); + let host_mem1 = MmapMut::map_anon(max_size * self.code_length).unwrap(); + + let host_mem0_ptr = host_mem0.as_ptr() as u64; + let host_mem1_ptr = host_mem1.as_ptr() as u64; + + // Make sure to not drop the memory, even though we only use the pointers + // afterwards. This also has the effect that this memory is never freed, which + // is fine for the db. + forget(host_mem0); + forget(host_mem1); + ( StreamAwareCudaSlice::from(device.alloc(max_size).unwrap()), ( StreamAwareCudaSlice::from(device.alloc(max_size).unwrap()), - (host_mem0 as u64, host_mem1 as u64), + (host_mem0_ptr, host_mem1_ptr), ), ) }) @@ -275,6 +282,26 @@ impl ShareDB { } } + pub fn register_host_memory(&self, db: &SlicedProcessedDatabase, max_db_length: usize) { + let max_size = max_db_length / self.device_manager.device_count(); + for (device_index, device) in self.device_manager.devices().iter().enumerate() { + device.bind_to_thread().unwrap(); + unsafe { + let _ = cudarc::driver::sys::lib().cuMemHostRegister_v2( + db.code_gr.limb_0[device_index] as *mut _, + max_size * self.code_length, + CU_MEMHOSTALLOC_PORTABLE, + ); + + let _ = cudarc::driver::sys::lib().cuMemHostRegister_v2( + db.code_gr.limb_1[device_index] as *mut _, + max_size * self.code_length, + CU_MEMHOSTALLOC_PORTABLE, + ); + } + } + } + pub fn load_single_record( index: usize, db: &CudaVec2DSlicerRawPointer, @@ -861,6 +888,7 @@ mod tests { .unwrap(); let query_sums = engine.query_sums(&preprocessed_query, &streams, &blass); let mut db_slices = engine.alloc_db(DB_SIZE); + engine.register_host_memory(&db_slices, DB_SIZE); let db_sizes = engine.load_full_db(&mut db_slices, &db); engine.dot( @@ -962,6 +990,7 @@ mod tests { .unwrap(); let query_sums = engine.query_sums(&preprocessed_query, &streams, &blass); let mut db_slices = engine.alloc_db(DB_SIZE); + engine.register_host_memory(&db_slices, DB_SIZE); let db_sizes = engine.load_full_db(&mut db_slices, &codes_db); engine.dot( @@ -1095,6 +1124,8 @@ mod tests { let db_sizes = codes_engine.load_full_db(&mut code_db_slices, &codes_db); let mut mask_db_slices = masks_engine.alloc_db(DB_SIZE); let mask_db_sizes = masks_engine.load_full_db(&mut mask_db_slices, &masks_db); + codes_engine.register_host_memory(&code_db_slices, DB_SIZE); + masks_engine.register_host_memory(&mask_db_slices, DB_SIZE); assert_eq!(db_sizes, mask_db_sizes); diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index cfb58f96f..5dd0d0d61 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -238,11 +238,15 @@ impl ServerActor { comms.clone(), ); + let now = Instant::now(); + let left_code_db_slices = codes_engine.alloc_db(max_db_size); let left_mask_db_slices = masks_engine.alloc_db(max_db_size); let right_code_db_slices = codes_engine.alloc_db(max_db_size); let right_mask_db_slices = masks_engine.alloc_db(max_db_size); + tracing::info!("Allocated db in {:?}", now.elapsed()); + // Engines for inflight queries let batch_codes_engine = ShareDB::init( party_id, @@ -486,6 +490,17 @@ impl ServerActor { .preprocess_db(&mut self.right_mask_db_slices, &self.current_db_sizes); } + pub fn register_host_memory(&self) { + self.codes_engine + .register_host_memory(&self.left_code_db_slices, self.max_db_size); + self.masks_engine + .register_host_memory(&self.left_mask_db_slices, self.max_db_size); + self.codes_engine + .register_host_memory(&self.right_code_db_slices, self.max_db_size); + self.masks_engine + .register_host_memory(&self.right_mask_db_slices, self.max_db_size); + } + fn process_batch_query( &mut self, batch: BatchQuery, diff --git a/iris-mpc-gpu/tests/e2e.rs b/iris-mpc-gpu/tests/e2e.rs index 3af422b4c..7b9e47e7a 100644 --- a/iris-mpc-gpu/tests/e2e.rs +++ b/iris-mpc-gpu/tests/e2e.rs @@ -132,6 +132,7 @@ mod e2e_test { ) { Ok((mut actor, handle)) => { actor.load_full_db(&(&db0.0, &db0.1), &(&db0.0, &db0.1), DB_SIZE); + actor.register_host_memory(); tx0.send(Ok(handle)).unwrap(); actor } @@ -159,6 +160,7 @@ mod e2e_test { ) { Ok((mut actor, handle)) => { actor.load_full_db(&(&db1.0, &db1.1), &(&db1.0, &db1.1), DB_SIZE); + actor.register_host_memory(); tx1.send(Ok(handle)).unwrap(); actor } @@ -186,6 +188,7 @@ mod e2e_test { ) { Ok((mut actor, handle)) => { actor.load_full_db(&(&db2.0, &db2.1), &(&db2.0, &db2.1), DB_SIZE); + actor.register_host_memory(); tx2.send(Ok(handle)).unwrap(); actor } diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index efd1aa590..6a75509cc 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -1132,6 +1132,9 @@ async fn server_main(config: Config) -> eyre::Result<()> { tracing::info!("Preprocessing db"); actor.preprocess_db(); + tracing::info!("Page-lock host memory"); + actor.register_host_memory(); + tracing::info!( "Loaded {} records from db into memory [DB sizes: {:?}]", record_counter, From ba9d5fc9b217802d698daa352d675b25014d5f04 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Sat, 28 Dec 2024 10:00:11 -0800 Subject: [PATCH 162/170] reuse events (#861) * reuse events * assign context * dbg * init events once * trigger image build * deploy test image to stage --------- Co-authored-by: Ertugrul Aypek --- .../workflows/temp-branch-build-and-push.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- iris-mpc-gpu/src/helpers/device_manager.rs | 5 +- iris-mpc-gpu/src/server/actor.rs | 54 ++++++++----------- 4 files changed, 28 insertions(+), 35 deletions(-) diff --git a/.github/workflows/temp-branch-build-and-push.yaml b/.github/workflows/temp-branch-build-and-push.yaml index 678d37592..696d980ff 100644 --- a/.github/workflows/temp-branch-build-and-push.yaml +++ b/.github/workflows/temp-branch-build-and-push.yaml @@ -3,7 +3,7 @@ name: Branch - Build and push docker image on: push: branches: - - "ps/host-mem-alloc" + - "ps/reuse-events" concurrency: group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 39e7a9b19..276b120a1 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:6b358589c25ef528f58ba02980103670b037a614" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.6" environment: stage replicaCount: 1 diff --git a/iris-mpc-gpu/src/helpers/device_manager.rs b/iris-mpc-gpu/src/helpers/device_manager.rs index fe3f7563f..4053324a4 100644 --- a/iris-mpc-gpu/src/helpers/device_manager.rs +++ b/iris-mpc-gpu/src/helpers/device_manager.rs @@ -103,8 +103,9 @@ impl DeviceManager { } pub fn destroy_events(&self, events: Vec) { - for event in events { - unsafe { event::destroy(event).unwrap() }; + for (device_idx, event) in events.iter().enumerate() { + self.device(device_idx).bind_to_thread().unwrap(); + unsafe { event::destroy(*event).unwrap() }; } } diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 5dd0d0d61..1a31267a2 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -107,6 +107,9 @@ pub struct ServerActor { disable_persistence: bool, code_chunk_buffers: Vec, mask_chunk_buffers: Vec, + dot_events: Vec>, + exchange_events: Vec>, + phase2_events: Vec>, } const NON_MATCH_ID: u32 = u32::MAX; @@ -330,6 +333,11 @@ impl ServerActor { let code_chunk_buffers = vec![codes_engine.alloc_db_chunk_buffer(DB_CHUNK_SIZE); 2]; let mask_chunk_buffers = vec![masks_engine.alloc_db_chunk_buffer(DB_CHUNK_SIZE); 2]; + // Create all needed events + let dot_events = vec![device_manager.create_events(); 2]; + let exchange_events = vec![device_manager.create_events(); 2]; + let phase2_events = vec![device_manager.create_events(); 2]; + for dev in device_manager.devices() { dev.synchronize().unwrap(); } @@ -367,6 +375,9 @@ impl ServerActor { disable_persistence, code_chunk_buffers, mask_chunk_buffers, + dot_events, + exchange_events, + phase2_events, }) } @@ -1126,14 +1137,6 @@ impl ServerActor { tracing::info!(party_id = self.party_id, "Finished batch deduplication"); // ---- END BATCH DEDUP ---- - // Create new initial events - let mut current_dot_event = self.device_manager.create_events(); - let mut next_dot_event = self.device_manager.create_events(); - let mut current_exchange_event = self.device_manager.create_events(); - let mut next_exchange_event = self.device_manager.create_events(); - let mut current_phase2_event = self.device_manager.create_events(); - let mut next_phase2_event = self.device_manager.create_events(); - let chunk_sizes = |chunk_idx: usize| { self.current_db_sizes .iter() @@ -1195,11 +1198,11 @@ impl ServerActor { // First stream doesn't need to wait if db_chunk_idx == 0 { self.device_manager - .record_event(request_streams, ¤t_dot_event); + .record_event(request_streams, &self.dot_events[db_chunk_idx % 2]); self.device_manager - .record_event(request_streams, ¤t_exchange_event); + .record_event(request_streams, &self.exchange_events[db_chunk_idx % 2]); self.device_manager - .record_event(request_streams, ¤t_phase2_event); + .record_event(request_streams, &self.phase2_events[db_chunk_idx % 2]); } // Prefetch next chunk @@ -1229,7 +1232,7 @@ impl ServerActor { ); self.device_manager - .await_event(request_streams, ¤t_dot_event); + .await_event(request_streams, &self.dot_events[db_chunk_idx % 2]); // ---- START PHASE 1 ---- record_stream_time!(&self.device_manager, batch_streams, events, "db_dot", { @@ -1247,7 +1250,7 @@ impl ServerActor { // wait for the exchange result buffers to be ready self.device_manager - .await_event(request_streams, ¤t_exchange_event); + .await_event(request_streams, &self.exchange_events[db_chunk_idx % 2]); record_stream_time!( &self.device_manager, @@ -1268,7 +1271,7 @@ impl ServerActor { ); self.device_manager - .record_event(request_streams, &next_dot_event); + .record_event(request_streams, &self.dot_events[(db_chunk_idx + 1) % 2]); record_stream_time!( &self.device_manager, @@ -1286,7 +1289,7 @@ impl ServerActor { // ---- END PHASE 1 ---- self.device_manager - .await_event(request_streams, ¤t_phase2_event); + .await_event(request_streams, &self.phase2_events[db_chunk_idx % 2]); // ---- START PHASE 2 ---- let max_chunk_size = dot_chunk_size.iter().max().copied().unwrap(); @@ -1318,8 +1321,10 @@ impl ServerActor { // we can now record the exchange event since the phase 2 is no longer using the // code_dots/mask_dots which are just reinterpretations of the exchange result // buffers - self.device_manager - .record_event(request_streams, &next_exchange_event); + self.device_manager.record_event( + request_streams, + &self.exchange_events[(db_chunk_idx + 1) % 2], + ); let res = self.phase2.take_result_buffer(); record_stream_time!(&self.device_manager, request_streams, events, "db_open", { @@ -1340,23 +1345,10 @@ impl ServerActor { }); } self.device_manager - .record_event(request_streams, &next_phase2_event); + .record_event(request_streams, &self.phase2_events[(db_chunk_idx + 1) % 2]); // ---- END PHASE 2 ---- - // Destroy events - self.device_manager.destroy_events(current_dot_event); - self.device_manager.destroy_events(current_exchange_event); - self.device_manager.destroy_events(current_phase2_event); - - // Update events for synchronization - current_dot_event = next_dot_event; - current_exchange_event = next_exchange_event; - current_phase2_event = next_phase2_event; - next_dot_event = self.device_manager.create_events(); - next_exchange_event = self.device_manager.create_events(); - next_phase2_event = self.device_manager.create_events(); - // Increment chunk index db_chunk_idx += 1; From 7490148a397a852290620fc89e96eab56292b85a Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Sat, 28 Dec 2024 20:54:01 +0100 Subject: [PATCH 163/170] release v0.13.6 to prod (#865) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 746cdc946..7419fa777 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.5" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.6" environment: prod replicaCount: 1 From cc40ade70fd472b430888f3780dd1c902518c074 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Thu, 2 Jan 2025 15:47:29 +0100 Subject: [PATCH 164/170] chore(): Override AWS_ENDPOINT_URL to force Localstack use (#869) --- deploy/e2e/iris-mpc-0.yaml.tpl | 14 ++++++-------- deploy/e2e/iris-mpc-1.yaml.tpl | 13 ++++++------- deploy/e2e/iris-mpc-2.yaml.tpl | 13 ++++++------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index a74a18111..7a7b97089 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -90,6 +90,12 @@ iris-mpc-0: - name: RUST_LOG value: "info" + - name: AWS_REGION + value: "$AWS_REGION" + + - name: AWS_ENDPOINT_URL + value: "http://localstack:4566" + - name: RUST_BACKTRACE value: "full" @@ -120,12 +126,6 @@ iris-mpc-0: - name: SMPC__DATABASE__LOAD_PARALLELISM value: "8" - - name: SMPC__AWS__REGION - value: "$AWS_REGION" - - - name: SMPC__AWS__ENDPOINT - value: "http://localstack:4566" - - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" @@ -216,5 +216,3 @@ iris-mpc-0: aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . key-manager --node-id 0 --env $ENV --endpoint-url "http://localstack:4566" rotate --public-key-bucket-name wf-$ENV-stage-public-keys --region $AWS_REGION - - diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 857776162..84d23da0e 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -90,6 +90,12 @@ iris-mpc-1: - name: RUST_LOG value: "info" + - name: AWS_REGION + value: "$AWS_REGION" + + - name: AWS_ENDPOINT_URL + value: "http://localstack:4566" + - name: RUST_BACKTRACE value: "full" @@ -120,12 +126,6 @@ iris-mpc-1: - name: SMPC__DATABASE__LOAD_PARALLELISM value: "8" - - name: SMPC__AWS__REGION - value: "$AWS_REGION" - - - name: SMPC__AWS__ENDPOINT - value: "http://localstack:4566" - - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" @@ -216,4 +216,3 @@ iris-mpc-1: aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . key-manager --node-id 1 --env $ENV --region $AWS_REGION --endpoint-url "http://localstack:4566" rotate --public-key-bucket-name wf-$ENV-public-keys - diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index 3db2a986b..ec232ded5 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -90,6 +90,12 @@ iris-mpc-2: - name: RUST_LOG value: "info" + - name: AWS_REGION + value: "$AWS_REGION" + + - name: AWS_ENDPOINT_URL + value: "http://localstack:4566" + - name: RUST_BACKTRACE value: "full" @@ -120,12 +126,6 @@ iris-mpc-2: - name: SMPC__DATABASE__LOAD_PARALLELISM value: "8" - - name: SMPC__AWS__REGION - value: "$AWS_REGION" - - - name: SMPC__AWS__ENDPOINT - value: "http://localstack:4566" - - name: SMPC__REQUESTS_QUEUE_URL value: "arn:aws:sns:eu-central-1:000000000000:iris-mpc-input" @@ -216,4 +216,3 @@ iris-mpc-2: aws s3 cp s3://wf-smpcv2-stage-libs/libcublasLt.so.12.2.5.6 . key-manager --node-id 2 --env $ENV --region $AWS_REGION --endpoint-url "http://localstack:4566" rotate --public-key-bucket-name wf-$ENV-public-keys - From d36489164b6bc25f9b6f25829e7e866fe8373901 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Thu, 2 Jan 2025 16:42:17 +0100 Subject: [PATCH 165/170] chore(): Use the config region in key-manager (#870) * chore(): Use the config region in key-manager * fmt --- iris-mpc-common/src/helpers/key_pair.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/iris-mpc-common/src/helpers/key_pair.rs b/iris-mpc-common/src/helpers/key_pair.rs index cebf7e56c..dda1d9d66 100644 --- a/iris-mpc-common/src/helpers/key_pair.rs +++ b/iris-mpc-common/src/helpers/key_pair.rs @@ -83,7 +83,12 @@ impl Drop for SharesEncryptionKeyPairs { impl SharesEncryptionKeyPairs { pub async fn from_storage(config: Config) -> Result { - let region_provider = Region::new(REGION); + // use the configured region, fallback to the hardcoded value + let region = config + .aws + .and_then(|aws| aws.region) + .unwrap_or_else(|| REGION.to_owned()); + let region_provider = Region::new(region); let shared_config = aws_config::from_env().region(region_provider).load().await; let client = SecretsManagerClient::new(&shared_config); From 884e2a9edda0619d956d63ee7677ef24fb0e09c9 Mon Sep 17 00:00:00 2001 From: Wojciech Sromek <157375010+wojciechsromek@users.noreply.github.com> Date: Thu, 2 Jan 2025 17:25:01 +0100 Subject: [PATCH 166/170] chore: Use region value from the configuration (#872) * chore(): Use the config region in key-manager * fmt * prints for debugging * restore the app level aws config * fmt * remove space --- deploy/e2e/iris-mpc-0.yaml.tpl | 3 +++ deploy/e2e/iris-mpc-1.yaml.tpl | 3 +++ deploy/e2e/iris-mpc-2.yaml.tpl | 3 +++ iris-mpc-common/src/helpers/key_pair.rs | 5 +++++ 4 files changed, 14 insertions(+) diff --git a/deploy/e2e/iris-mpc-0.yaml.tpl b/deploy/e2e/iris-mpc-0.yaml.tpl index 7a7b97089..72e38feb1 100644 --- a/deploy/e2e/iris-mpc-0.yaml.tpl +++ b/deploy/e2e/iris-mpc-0.yaml.tpl @@ -108,6 +108,9 @@ iris-mpc-0: - name: SMPC__ENVIRONMENT value: "$ENV" + - name: SMPC__AWS__REGION + value: "$AWS_REGION" + - name: SMPC__SERVICE__SERVICE_NAME value: "smpcv2-server-$ENV" diff --git a/deploy/e2e/iris-mpc-1.yaml.tpl b/deploy/e2e/iris-mpc-1.yaml.tpl index 84d23da0e..15b3cd127 100644 --- a/deploy/e2e/iris-mpc-1.yaml.tpl +++ b/deploy/e2e/iris-mpc-1.yaml.tpl @@ -108,6 +108,9 @@ iris-mpc-1: - name: SMPC__ENVIRONMENT value: "$ENV" + - name: SMPC__AWS__REGION + value: "$AWS_REGION" + - name: SMPC__SERVICE__SERVICE_NAME value: "smpcv2-server-$ENV" diff --git a/deploy/e2e/iris-mpc-2.yaml.tpl b/deploy/e2e/iris-mpc-2.yaml.tpl index ec232ded5..485734c90 100644 --- a/deploy/e2e/iris-mpc-2.yaml.tpl +++ b/deploy/e2e/iris-mpc-2.yaml.tpl @@ -108,6 +108,9 @@ iris-mpc-2: - name: SMPC__ENVIRONMENT value: "$ENV" + - name: SMPC__AWS__REGION + value: "$AWS_REGION" + - name: SMPC__SERVICE__SERVICE_NAME value: "smpcv2-server-$ENV" diff --git a/iris-mpc-common/src/helpers/key_pair.rs b/iris-mpc-common/src/helpers/key_pair.rs index dda1d9d66..bcbda891b 100644 --- a/iris-mpc-common/src/helpers/key_pair.rs +++ b/iris-mpc-common/src/helpers/key_pair.rs @@ -88,6 +88,7 @@ impl SharesEncryptionKeyPairs { .aws .and_then(|aws| aws.region) .unwrap_or_else(|| REGION.to_owned()); + tracing::info!("Using region: {} for key pair download", region); let region_provider = Region::new(region); let shared_config = aws_config::from_env().region(region_provider).load().await; let client = SecretsManagerClient::new(&shared_config); @@ -199,6 +200,10 @@ async fn download_private_key_from_asm( version_stage: &str, ) -> Result { let private_key_secret_id: String = format!("{}/iris-mpc/ecdh-private-key-{}", env, node_id); + tracing::info!( + "Downloading private key from Secrets Manager: {}", + private_key_secret_id + ); match client .get_secret_value() .secret_id(private_key_secret_id) From cdcbceaf312b6f9c3a147491014b212f09bb9b64 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Thu, 2 Jan 2025 23:05:32 -0800 Subject: [PATCH 167/170] allow disabling of stream timers (#868) * allow disabling of stream timers * bump stage image --------- Co-authored-by: Ertugrul Aypek --- deploy/stage/common-values-iris-mpc.yaml | 2 +- iris-mpc-common/src/config/mod.rs | 3 + iris-mpc-gpu/src/server/actor.rs | 164 +++++++++++++++-------- iris-mpc-gpu/tests/e2e.rs | 3 + iris-mpc/src/bin/server.rs | 1 + 5 files changed, 115 insertions(+), 58 deletions(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 276b120a1..69e69d88e 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.6" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.7" environment: stage replicaCount: 1 diff --git a/iris-mpc-common/src/config/mod.rs b/iris-mpc-common/src/config/mod.rs index 666884068..971425b05 100644 --- a/iris-mpc-common/src/config/mod.rs +++ b/iris-mpc-common/src/config/mod.rs @@ -79,6 +79,9 @@ pub struct Config { #[serde(default)] pub disable_persistence: bool, + #[serde(default)] + pub enable_debug_timing: bool, + #[serde(default, deserialize_with = "deserialize_yaml_json_string")] pub node_hostnames: Vec, diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index 1a31267a2..e1972f172 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -37,14 +37,18 @@ use std::{collections::HashMap, mem, sync::Arc, time::Instant}; use tokio::sync::{mpsc, oneshot}; macro_rules! record_stream_time { - ($manager:expr, $streams:expr, $map:expr, $label:expr, $block:block) => {{ - let evt0 = $manager.create_events(); - let evt1 = $manager.create_events(); - $manager.record_event($streams, &evt0); - let res = $block; - $manager.record_event($streams, &evt1); - $map.entry($label).or_default().extend(vec![evt0, evt1]); - res + ($manager:expr, $streams:expr, $map:expr, $label:expr, $enable_timing:expr, $block:block) => {{ + if $enable_timing { + let evt0 = $manager.create_events(); + let evt1 = $manager.create_events(); + $manager.record_event($streams, &evt0); + let res = $block; + $manager.record_event($streams, &evt1); + $map.entry($label).or_default().extend(vec![evt0, evt1]); + res + } else { + $block + } }}; } @@ -105,6 +109,7 @@ pub struct ServerActor { max_db_size: usize, return_partial_results: bool, disable_persistence: bool, + enable_debug_timing: bool, code_chunk_buffers: Vec, mask_chunk_buffers: Vec, dot_events: Vec>, @@ -124,6 +129,7 @@ impl ServerActor { max_batch_size: usize, return_partial_results: bool, disable_persistence: bool, + enable_debug_timing: bool, ) -> eyre::Result<(Self, ServerActorHandle)> { let device_manager = Arc::new(DeviceManager::init()); Self::new_with_device_manager( @@ -135,6 +141,7 @@ impl ServerActor { max_batch_size, return_partial_results, disable_persistence, + enable_debug_timing, ) } #[allow(clippy::too_many_arguments)] @@ -147,6 +154,7 @@ impl ServerActor { max_batch_size: usize, return_partial_results: bool, disable_persistence: bool, + enable_debug_timing: bool, ) -> eyre::Result<(Self, ServerActorHandle)> { let ids = device_manager.get_ids_from_magic(0); let comms = device_manager.instantiate_network_from_ids(party_id, &ids)?; @@ -160,6 +168,7 @@ impl ServerActor { max_batch_size, return_partial_results, disable_persistence, + enable_debug_timing, ) } @@ -174,6 +183,7 @@ impl ServerActor { max_batch_size: usize, return_partial_results: bool, disable_persistence: bool, + enable_debug_timing: bool, ) -> eyre::Result<(Self, ServerActorHandle)> { let (tx, rx) = mpsc::channel(job_queue_size); let actor = Self::init( @@ -186,6 +196,7 @@ impl ServerActor { max_batch_size, return_partial_results, disable_persistence, + enable_debug_timing, )?; Ok((actor, ServerActorHandle { job_queue: tx })) } @@ -201,6 +212,7 @@ impl ServerActor { max_batch_size: usize, return_partial_results: bool, disable_persistence: bool, + enable_debug_timing: bool, ) -> eyre::Result { assert!(max_batch_size != 0); let mut kdf_nonce = 0; @@ -373,6 +385,7 @@ impl ServerActor { max_db_size, return_partial_results, disable_persistence, + enable_debug_timing, code_chunk_buffers, mask_chunk_buffers, dot_events, @@ -616,6 +629,7 @@ impl ServerActor { &self.streams[0], events, "query_preprocess", + self.enable_debug_timing, { // This needs to be max_batch_size, even though the query can be shorter to have // enough padding for GEMM @@ -662,6 +676,7 @@ impl ServerActor { &self.streams[0], events, "query_preprocess", + self.enable_debug_timing, { // This needs to be MAX_BATCH_SIZE, even though the query can be shorter to have // enough padding for GEMM @@ -903,6 +918,7 @@ impl ServerActor { &self.streams[0], events, "db_write", + self.enable_debug_timing, { for i in 0..self.device_manager.device_count() { self.device_manager.device(i).bind_to_thread().unwrap(); @@ -987,7 +1003,9 @@ impl ServerActor { ); // ---- END RESULT PROCESSING ---- - log_timers(events); + if self.enable_debug_timing { + log_timers(events); + } let processed_mil_elements_per_second = (self.max_batch_size * previous_total_db_size) as f64 / now.elapsed().as_secs_f64() @@ -1055,34 +1073,42 @@ impl ServerActor { // ---- START BATCH DEDUP ---- tracing::info!(party_id = self.party_id, "Starting batch deduplication"); - record_stream_time!(&self.device_manager, batch_streams, events, "batch_dot", { - tracing::info!(party_id = self.party_id, "batch_dot start"); + record_stream_time!( + &self.device_manager, + batch_streams, + events, + "batch_dot", + self.enable_debug_timing, + { + tracing::info!(party_id = self.party_id, "batch_dot start"); - compact_device_queries.compute_dot_products( - &mut self.batch_codes_engine, - &mut self.batch_masks_engine, - &self.query_db_size, - 0, - batch_streams, - batch_cublas, - ); - tracing::info!(party_id = self.party_id, "compute_dot_reducers start"); + compact_device_queries.compute_dot_products( + &mut self.batch_codes_engine, + &mut self.batch_masks_engine, + &self.query_db_size, + 0, + batch_streams, + batch_cublas, + ); + tracing::info!(party_id = self.party_id, "compute_dot_reducers start"); - compact_device_sums.compute_dot_reducers( - &mut self.batch_codes_engine, - &mut self.batch_masks_engine, - &self.query_db_size, - 0, - batch_streams, - ); - tracing::info!(party_id = self.party_id, "batch_dot end"); - }); + compact_device_sums.compute_dot_reducers( + &mut self.batch_codes_engine, + &mut self.batch_masks_engine, + &self.query_db_size, + 0, + batch_streams, + ); + tracing::info!(party_id = self.party_id, "batch_dot end"); + } + ); record_stream_time!( &self.device_manager, batch_streams, events, "batch_reshare", + self.enable_debug_timing, { tracing::info!(party_id = self.party_id, "batch_reshare start"); self.batch_codes_engine @@ -1104,6 +1130,7 @@ impl ServerActor { batch_streams, events, "batch_threshold", + self.enable_debug_timing, { tracing::info!(party_id = self.party_id, "batch_threshold start"); self.phase2_batch.compare_threshold_masked_many( @@ -1149,6 +1176,7 @@ impl ServerActor { &self.streams[0], events, "prefetch_db_chunk", + self.enable_debug_timing, { self.codes_engine.prefetch_db_chunk( code_db_slices, @@ -1211,6 +1239,7 @@ impl ServerActor { next_request_streams, events, "prefetch_db_chunk", + self.enable_debug_timing, { self.codes_engine.prefetch_db_chunk( code_db_slices, @@ -1235,18 +1264,29 @@ impl ServerActor { .await_event(request_streams, &self.dot_events[db_chunk_idx % 2]); // ---- START PHASE 1 ---- - record_stream_time!(&self.device_manager, batch_streams, events, "db_dot", { - compact_device_queries.dot_products_against_db( - &mut self.codes_engine, - &mut self.masks_engine, - &CudaVec2DSlicerRawPointer::from(&self.code_chunk_buffers[db_chunk_idx % 2]), - &CudaVec2DSlicerRawPointer::from(&self.mask_chunk_buffers[db_chunk_idx % 2]), - &dot_chunk_size, - 0, - request_streams, - request_cublas_handles, - ); - }); + record_stream_time!( + &self.device_manager, + batch_streams, + events, + "db_dot", + self.enable_debug_timing, + { + compact_device_queries.dot_products_against_db( + &mut self.codes_engine, + &mut self.masks_engine, + &CudaVec2DSlicerRawPointer::from( + &self.code_chunk_buffers[db_chunk_idx % 2], + ), + &CudaVec2DSlicerRawPointer::from( + &self.mask_chunk_buffers[db_chunk_idx % 2], + ), + &dot_chunk_size, + 0, + request_streams, + request_cublas_handles, + ); + } + ); // wait for the exchange result buffers to be ready self.device_manager @@ -1257,6 +1297,7 @@ impl ServerActor { request_streams, events, "db_reduce", + self.enable_debug_timing, { compact_device_sums.compute_dot_reducer_against_db( &mut self.codes_engine, @@ -1278,6 +1319,7 @@ impl ServerActor { request_streams, events, "db_reshare", + self.enable_debug_timing, { self.codes_engine .reshare_results(&dot_chunk_size, request_streams); @@ -1310,6 +1352,7 @@ impl ServerActor { request_streams, events, "db_threshold", + self.enable_debug_timing, { self.phase2.compare_threshold_masked_many( &code_dots, @@ -1327,22 +1370,29 @@ impl ServerActor { ); let res = self.phase2.take_result_buffer(); - record_stream_time!(&self.device_manager, request_streams, events, "db_open", { - open( - &mut self.phase2, - &res, - &self.distance_comparator, - db_match_bitmap, - max_chunk_size * self.max_batch_size * ROTATIONS / 64, - &dot_chunk_size, - &chunk_size, - offset, - &self.current_db_sizes, - &ignore_device_results, - request_streams, - ); - self.phase2.return_result_buffer(res); - }); + record_stream_time!( + &self.device_manager, + request_streams, + events, + "db_open", + self.enable_debug_timing, + { + open( + &mut self.phase2, + &res, + &self.distance_comparator, + db_match_bitmap, + max_chunk_size * self.max_batch_size * ROTATIONS / 64, + &dot_chunk_size, + &chunk_size, + offset, + &self.current_db_sizes, + &ignore_device_results, + request_streams, + ); + self.phase2.return_result_buffer(res); + } + ); } self.device_manager .record_event(request_streams, &self.phase2_events[(db_chunk_idx + 1) % 2]); diff --git a/iris-mpc-gpu/tests/e2e.rs b/iris-mpc-gpu/tests/e2e.rs index 7b9e47e7a..df377fa08 100644 --- a/iris-mpc-gpu/tests/e2e.rs +++ b/iris-mpc-gpu/tests/e2e.rs @@ -129,6 +129,7 @@ mod e2e_test { MAX_BATCH_SIZE, true, false, + false, ) { Ok((mut actor, handle)) => { actor.load_full_db(&(&db0.0, &db0.1), &(&db0.0, &db0.1), DB_SIZE); @@ -157,6 +158,7 @@ mod e2e_test { MAX_BATCH_SIZE, true, false, + false, ) { Ok((mut actor, handle)) => { actor.load_full_db(&(&db1.0, &db1.1), &(&db1.0, &db1.1), DB_SIZE); @@ -185,6 +187,7 @@ mod e2e_test { MAX_BATCH_SIZE, true, false, + false, ) { Ok((mut actor, handle)) => { actor.load_full_db(&(&db2.0, &db2.1), &(&db2.0, &db2.1), DB_SIZE); diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 6a75509cc..adf7beddb 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -973,6 +973,7 @@ async fn server_main(config: Config) -> eyre::Result<()> { config.max_batch_size, config.return_partial_results, config.disable_persistence, + config.enable_debug_timing, ) { Ok((mut actor, handle)) => { let res = if config.fake_db_size > 0 { From 491fa3a8c565e0d2f15471745c71de266d6acfbe Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Fri, 3 Jan 2025 08:25:17 +0100 Subject: [PATCH 168/170] release v0.13.8 in stage (#873) --- deploy/stage/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index 69e69d88e..d77f969e4 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.7" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.8" environment: stage replicaCount: 1 From ffad4fa470dc9fd0969527db0f98d09a8b3419d1 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Fri, 3 Jan 2025 01:03:26 -0800 Subject: [PATCH 169/170] fix potential phantom match (#871) * allow disabling of stream timers * bump stage image * fix potential phantom match * dbg: just for e2e * clear some more buffers * revert dbg * trigger image push * test image in stage * use rebased image * bump image --------- Co-authored-by: Ertugrul Aypek --- .../workflows/temp-branch-build-and-push.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- iris-mpc-gpu/src/dot/share_db.rs | 5 ++- iris-mpc-gpu/src/server/actor.rs | 38 +++++++------------ 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/.github/workflows/temp-branch-build-and-push.yaml b/.github/workflows/temp-branch-build-and-push.yaml index 696d980ff..87b38a746 100644 --- a/.github/workflows/temp-branch-build-and-push.yaml +++ b/.github/workflows/temp-branch-build-and-push.yaml @@ -3,7 +3,7 @@ name: Branch - Build and push docker image on: push: branches: - - "ps/reuse-events" + - "ps/potential-phantom-match" concurrency: group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index d77f969e4..b8da0ebb0 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.8" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.9" environment: stage replicaCount: 1 diff --git a/iris-mpc-gpu/src/dot/share_db.rs b/iris-mpc-gpu/src/dot/share_db.rs index 865a4c1ad..b6dcd210a 100644 --- a/iris-mpc-gpu/src/dot/share_db.rs +++ b/iris-mpc-gpu/src/dot/share_db.rs @@ -503,7 +503,10 @@ impl ShareDB { let device = self.device_manager.device(idx); device.bind_to_thread().unwrap(); - if offset[idx] >= db_sizes[idx] || offset[idx] + chunk_sizes[idx] > db_sizes[idx] { + if offset[idx] >= db_sizes[idx] + || offset[idx] + chunk_sizes[idx] > db_sizes[idx] + || chunk_sizes[idx] == 0 + { continue; } diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index e1972f172..6711c0884 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -727,6 +727,7 @@ impl ServerActor { ); self.device_manager.await_streams(&self.streams[0]); + self.device_manager.await_streams(&self.streams[1]); // Iterate over a list of tracing payloads, and create logs with mappings to // payloads Log at least a "start" event using a log with trace.id @@ -967,12 +968,8 @@ impl ServerActor { }) .unwrap(); - // Wait for all streams before get timings - self.device_manager.await_streams(&self.streams[0]); - self.device_manager.await_streams(&self.streams[1]); - // Reset the results buffers for reuse - for dst in &[ + for dst in [ &self.db_match_list_left, &self.db_match_list_right, &self.batch_match_list_left, @@ -981,26 +978,19 @@ impl ServerActor { reset_slice(self.device_manager.devices(), dst, 0, &self.streams[0]); } - reset_slice( - self.device_manager.devices(), + for dst in [ + &self.distance_comparator.all_matches, &self.distance_comparator.match_counters, - 0, - &self.streams[0], - ); - - reset_slice( - self.device_manager.devices(), &self.distance_comparator.match_counters_left, - 0, - &self.streams[0], - ); - - reset_slice( - self.device_manager.devices(), &self.distance_comparator.match_counters_right, - 0, - &self.streams[0], - ); + &self.distance_comparator.partial_results_left, + &self.distance_comparator.partial_results_right, + ] { + reset_slice(self.device_manager.devices(), dst, 0, &self.streams[0]); + } + + self.device_manager.await_streams(&self.streams[0]); + self.device_manager.await_streams(&self.streams[1]); // ---- END RESULT PROCESSING ---- if self.enable_debug_timing { @@ -1167,7 +1157,7 @@ impl ServerActor { let chunk_sizes = |chunk_idx: usize| { self.current_db_sizes .iter() - .map(|s| (s - DB_CHUNK_SIZE * chunk_idx).clamp(1, DB_CHUNK_SIZE)) + .map(|s| (s - DB_CHUNK_SIZE * chunk_idx).clamp(0, DB_CHUNK_SIZE)) .collect::>() }; @@ -1220,7 +1210,7 @@ impl ServerActor { // later. let dot_chunk_size = chunk_size .iter() - .map(|s| s.div_ceil(64) * 64) + .map(|&s| (s.max(1).div_ceil(64) * 64)) .collect::>(); // First stream doesn't need to wait From 4913ef30024615c5dca0132a22c2971158e1cc0e Mon Sep 17 00:00:00 2001 From: Ertugrul Aypek Date: Fri, 3 Jan 2025 10:32:01 +0100 Subject: [PATCH 170/170] release v0.13.9 to prod (#875) --- deploy/prod/common-values-iris-mpc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/prod/common-values-iris-mpc.yaml b/deploy/prod/common-values-iris-mpc.yaml index 7419fa777..858e3d872 100644 --- a/deploy/prod/common-values-iris-mpc.yaml +++ b/deploy/prod/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.6" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.9" environment: prod replicaCount: 1