Skip to content

Commit

Permalink
Scheduler: oversubscribe cores just a little to make up for slack
Browse files Browse the repository at this point in the history
...not by factor of 10 as it was done previously
  • Loading branch information
ahartmetz committed Jan 10, 2025
1 parent e8b9af4 commit e3f9d04
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/bin/sccache-dist/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ fn init_logging() {
}
}

const MAX_PER_CORE_LOAD: f64 = 10f64;
const MAX_PER_CORE_LOAD: f64 = 2f64;
const SERVER_REMEMBER_ERROR_TIMEOUT: Duration = Duration::from_secs(300);
const UNCLAIMED_PENDING_TIMEOUT: Duration = Duration::from_secs(300);
const UNCLAIMED_READY_TIMEOUT: Duration = Duration::from_secs(60);
Expand Down Expand Up @@ -399,6 +399,17 @@ impl Default for Scheduler {
}
}

fn load_weight(job_count: usize, core_count: usize) -> f64 {
// Do not oversubscribe cores very much to avoid out of memory situations and cache thrashing -
// oversubscribe just a little to make up for network and other latency.
let cores_plus_slack = core_count + 1 + core_count / 8;
if job_count >= cores_plus_slack {
MAX_PER_CORE_LOAD + 1f64 // no new jobs for now
} else {
job_count as f64 / core_count as f64
}
}

impl SchedulerIncoming for Scheduler {
fn handle_alloc_job(
&self,
Expand All @@ -415,7 +426,7 @@ impl SchedulerIncoming for Scheduler {
let mut best_load: f64 = MAX_PER_CORE_LOAD;
let now = Instant::now();
for (&server_id, details) in servers.iter_mut() {
let load = details.jobs_assigned.len() as f64 / details.num_cpus as f64;
let load = load_weight(details.jobs_assigned.len(), details.num_cpus);

if let Some(last_error) = details.last_error {
if load < MAX_PER_CORE_LOAD {
Expand Down

0 comments on commit e3f9d04

Please sign in to comment.