diff --git a/src/bin/sccache-dist/main.rs b/src/bin/sccache-dist/main.rs index 83d530060..37b85212d 100644 --- a/src/bin/sccache-dist/main.rs +++ b/src/bin/sccache-dist/main.rs @@ -309,7 +309,7 @@ fn init_logging() { } } -const MAX_PER_CORE_LOAD: f64 = 10f64; +const MAX_PER_CORE_LOAD: f64 = 2f64; const SERVER_REMEMBER_ERROR_TIMEOUT: Duration = Duration::from_secs(300); const UNCLAIMED_PENDING_TIMEOUT: Duration = Duration::from_secs(300); const UNCLAIMED_READY_TIMEOUT: Duration = Duration::from_secs(60); @@ -399,6 +399,17 @@ impl Default for Scheduler { } } +fn load_weight(job_count: usize, core_count: usize) -> f64 { + // Do not oversubscribe cores very much to avoid out of memory situations and cache thrashing - + // oversubscribe just a little to make up for network and other latency. + let cores_plus_slack = core_count + 1 + core_count / 8; + if job_count >= cores_plus_slack { + MAX_PER_CORE_LOAD + 1f64 // no new jobs for now + } else { + job_count as f64 / core_count as f64 + } +} + impl SchedulerIncoming for Scheduler { fn handle_alloc_job( &self, @@ -415,7 +426,7 @@ impl SchedulerIncoming for Scheduler { let mut best_load: f64 = MAX_PER_CORE_LOAD; let now = Instant::now(); for (&server_id, details) in servers.iter_mut() { - let load = details.jobs_assigned.len() as f64 / details.num_cpus as f64; + let load = load_weight(details.jobs_assigned.len(), details.num_cpus); if let Some(last_error) = details.last_error { if load < MAX_PER_CORE_LOAD {