From ffad4fa470dc9fd0969527db0f98d09a8b3419d1 Mon Sep 17 00:00:00 2001 From: Philipp Sippl Date: Fri, 3 Jan 2025 01:03:26 -0800 Subject: [PATCH] fix potential phantom match (#871) * allow disabling of stream timers * bump stage image * fix potential phantom match * dbg: just for e2e * clear some more buffers * revert dbg * trigger image push * test image in stage * use rebased image * bump image --------- Co-authored-by: Ertugrul Aypek --- .../workflows/temp-branch-build-and-push.yaml | 2 +- deploy/stage/common-values-iris-mpc.yaml | 2 +- iris-mpc-gpu/src/dot/share_db.rs | 5 ++- iris-mpc-gpu/src/server/actor.rs | 38 +++++++------------ 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/.github/workflows/temp-branch-build-and-push.yaml b/.github/workflows/temp-branch-build-and-push.yaml index 696d980ff..87b38a746 100644 --- a/.github/workflows/temp-branch-build-and-push.yaml +++ b/.github/workflows/temp-branch-build-and-push.yaml @@ -3,7 +3,7 @@ name: Branch - Build and push docker image on: push: branches: - - "ps/reuse-events" + - "ps/potential-phantom-match" concurrency: group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' diff --git a/deploy/stage/common-values-iris-mpc.yaml b/deploy/stage/common-values-iris-mpc.yaml index d77f969e4..b8da0ebb0 100644 --- a/deploy/stage/common-values-iris-mpc.yaml +++ b/deploy/stage/common-values-iris-mpc.yaml @@ -1,4 +1,4 @@ -image: "ghcr.io/worldcoin/iris-mpc:v0.13.8" +image: "ghcr.io/worldcoin/iris-mpc:v0.13.9" environment: stage replicaCount: 1 diff --git a/iris-mpc-gpu/src/dot/share_db.rs b/iris-mpc-gpu/src/dot/share_db.rs index 865a4c1ad..b6dcd210a 100644 --- a/iris-mpc-gpu/src/dot/share_db.rs +++ b/iris-mpc-gpu/src/dot/share_db.rs @@ -503,7 +503,10 @@ impl ShareDB { let device = self.device_manager.device(idx); device.bind_to_thread().unwrap(); - if offset[idx] >= db_sizes[idx] || offset[idx] + chunk_sizes[idx] > db_sizes[idx] { + if offset[idx] >= db_sizes[idx] + || offset[idx] + chunk_sizes[idx] > db_sizes[idx] + || chunk_sizes[idx] == 0 + { continue; } diff --git a/iris-mpc-gpu/src/server/actor.rs b/iris-mpc-gpu/src/server/actor.rs index e1972f172..6711c0884 100644 --- a/iris-mpc-gpu/src/server/actor.rs +++ b/iris-mpc-gpu/src/server/actor.rs @@ -727,6 +727,7 @@ impl ServerActor { ); self.device_manager.await_streams(&self.streams[0]); + self.device_manager.await_streams(&self.streams[1]); // Iterate over a list of tracing payloads, and create logs with mappings to // payloads Log at least a "start" event using a log with trace.id @@ -967,12 +968,8 @@ impl ServerActor { }) .unwrap(); - // Wait for all streams before get timings - self.device_manager.await_streams(&self.streams[0]); - self.device_manager.await_streams(&self.streams[1]); - // Reset the results buffers for reuse - for dst in &[ + for dst in [ &self.db_match_list_left, &self.db_match_list_right, &self.batch_match_list_left, @@ -981,26 +978,19 @@ impl ServerActor { reset_slice(self.device_manager.devices(), dst, 0, &self.streams[0]); } - reset_slice( - self.device_manager.devices(), + for dst in [ + &self.distance_comparator.all_matches, &self.distance_comparator.match_counters, - 0, - &self.streams[0], - ); - - reset_slice( - self.device_manager.devices(), &self.distance_comparator.match_counters_left, - 0, - &self.streams[0], - ); - - reset_slice( - self.device_manager.devices(), &self.distance_comparator.match_counters_right, - 0, - &self.streams[0], - ); + &self.distance_comparator.partial_results_left, + &self.distance_comparator.partial_results_right, + ] { + reset_slice(self.device_manager.devices(), dst, 0, &self.streams[0]); + } + + self.device_manager.await_streams(&self.streams[0]); + self.device_manager.await_streams(&self.streams[1]); // ---- END RESULT PROCESSING ---- if self.enable_debug_timing { @@ -1167,7 +1157,7 @@ impl ServerActor { let chunk_sizes = |chunk_idx: usize| { self.current_db_sizes .iter() - .map(|s| (s - DB_CHUNK_SIZE * chunk_idx).clamp(1, DB_CHUNK_SIZE)) + .map(|s| (s - DB_CHUNK_SIZE * chunk_idx).clamp(0, DB_CHUNK_SIZE)) .collect::>() }; @@ -1220,7 +1210,7 @@ impl ServerActor { // later. let dot_chunk_size = chunk_size .iter() - .map(|s| s.div_ceil(64) * 64) + .map(|&s| (s.max(1).div_ceil(64) * 64)) .collect::>(); // First stream doesn't need to wait