From 11588d6f0500aa7254f860d2da166876633d3f3f Mon Sep 17 00:00:00 2001 From: Tom Dyas Date: Tue, 31 Dec 2024 21:47:36 +0000 Subject: [PATCH] support `{chroot}` for program arguments in remote execution (#21803) As reported in https://github.com/pantsbuild/pants/issues/17519, remote environments do not currently support the `{chroot}` replacement marker supported by the local and Docker environments. With local and Docker environments, Pants will replace any `{chroot}` marker with the absolute path of the sandbox directory if it appears in a program argument or environment variable. This PR is a partial solution and implements `{chroot}` replacement for prorgram arguments used in remote execution. It by extends the existing wrapper script used for append-only caches to also support `{chroot}` replacements. This PR does not try to implement `{chroot}` replacement for environment variables. That will need to come after I figure out a good way to do that. --- docs/notes/2.25.x.md | 2 + .../remote/src/remote_tests.rs | 6 +- src/rust/engine/process_execution/src/lib.rs | 197 ++++++++++++------ .../engine/process_execution/src/tests.rs | 55 ++++- 4 files changed, 185 insertions(+), 75 deletions(-) diff --git a/docs/notes/2.25.x.md b/docs/notes/2.25.x.md index 95c506003ae..73b25b23ace 100644 --- a/docs/notes/2.25.x.md +++ b/docs/notes/2.25.x.md @@ -28,6 +28,8 @@ Pants now sends a `user-agent` header with every request to a remote store or a even when other headers are configured. If necessary, the user may override the user agent by specifying one in `remote_store_headers` or `remote_execution_headers`. +Pants now supports the `{chroot}` replacement marker in program arguments for remote execution. (With local and Docker execution, the `{chroot}` marker is replaced with the absolute path of the sandbox directory if it appears in program arguments or environment variables. Pants will now replace `{chroot}` for program arguments with remote execution, but still does not yet support doing so in environment variables. This requires `/bin/bash` to be available on the remote execution server.) + ### New Options System The "legacy" options system is removed in this release. All options parsing is now handled by the new, native parser. diff --git a/src/rust/engine/process_execution/remote/src/remote_tests.rs b/src/rust/engine/process_execution/remote/src/remote_tests.rs index 33e987518ad..2ef7f27496a 100644 --- a/src/rust/engine/process_execution/remote/src/remote_tests.rs +++ b/src/rust/engine/process_execution/remote/src/remote_tests.rs @@ -724,7 +724,7 @@ async fn make_execute_request_with_append_only_caches() { input_root_digest: Some( (Digest::new( Fingerprint::from_hex_string( - "9f4ad1839decdee5e0b01d8ab2562302e42d590f331c3efb747c17f1936bedd4", + "d6f65a7f56380de341fac43fcb4e2006fd0e716579325c4640ec11b7c040bb9c", ) .unwrap(), 178, @@ -739,7 +739,7 @@ async fn make_execute_request_with_append_only_caches() { action_digest: Some( (&Digest::new( Fingerprint::from_hex_string( - "18a5a53ad2909432f848f161af2b6886659b4047d608742b64a80f0879dc0a69", + "d516c6b3b9781777486025ebc87cb4810bc7088dc45092a70a592d8ca74f48d8", ) .unwrap(), 146, @@ -752,7 +752,7 @@ async fn make_execute_request_with_append_only_caches() { let want_input_root_digest = DirectoryDigest::from_persisted_digest(Digest::new( Fingerprint::from_hex_string( - "9f4ad1839decdee5e0b01d8ab2562302e42d590f331c3efb747c17f1936bedd4", + "d6f65a7f56380de341fac43fcb4e2006fd0e716579325c4640ec11b7c040bb9c", ) .unwrap(), 178, diff --git a/src/rust/engine/process_execution/src/lib.rs b/src/rust/engine/process_execution/src/lib.rs index 19801d7db29..7e74770b807 100644 --- a/src/rust/engine/process_execution/src/lib.rs +++ b/src/rust/engine/process_execution/src/lib.rs @@ -1013,61 +1013,66 @@ pub struct EntireExecuteRequest { pub input_root_digest: DirectoryDigest, } -fn make_wrapper_for_append_only_caches( +fn quote_path(path: &Path) -> Result { + let as_str = path + .to_str() + .ok_or_else(|| "Failed to convert path".to_string())?; + let quoted = shlex::try_quote(as_str).map_err(|e| format!("Failed to convert path: {e}"))?; + Ok(quoted.to_string()) +} + +/// Check supplied parameters from an execution request and create a wrapper script if a wrapper script +/// is needed. +// TODO: Support `{chroot}` replacement in environment variables and not just program arguments. +fn maybe_make_wrapper_script( caches: &BTreeMap, - base_path: &str, + append_only_caches_base_path: Option<&str>, working_directory: Option<&str>, -) -> Result { - let mut script = String::new(); - writeln!(&mut script, "#!/bin/sh").map_err(|err| format!("write! failed: {err:?}"))?; - - fn quote_path(path: &Path) -> Result { - let as_str = path - .to_str() - .ok_or_else(|| "Failed to convert path".to_string())?; - let quoted = - shlex::try_quote(as_str).map_err(|e| format!("Failed to convert path: {e}"))?; - Ok(quoted.to_string()) - } - - // Setup the append-only caches. - for (cache_name, path) in caches { - let cache_path = { - let mut p = PathBuf::new(); - p.push(base_path); - p.push(cache_name.name()); - - quote_path(&p)? - }; - writeln!(&mut script, "/bin/mkdir -p {cache_path}",) - .map_err(|err| format!("write! failed: {err:?}"))?; - - if let Some(parent) = path.parent() { - if !parent.as_os_str().is_empty() { - let parent_quoted = quote_path(parent)?; - writeln!(&mut script, "/bin/mkdir -p {}", &parent_quoted) - .map_err(|err| format!("write! failed: {err}"))?; + sandbox_root_token: Option<&str>, +) -> Result, String> { + let caches_fragment = match append_only_caches_base_path { + Some(base_path) if !caches.is_empty() => { + let mut fragment = String::new(); + for (cache_name, path) in caches { + let cache_path = { + let mut p = PathBuf::new(); + p.push(base_path); + p.push(cache_name.name()); + + quote_path(&p)? + }; + writeln!(&mut fragment, "/bin/mkdir -p {cache_path}",) + .map_err(|err| format!("write! failed: {err:?}"))?; + + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + let parent_quoted = quote_path(parent)?; + writeln!(&mut fragment, "/bin/mkdir -p {}", &parent_quoted) + .map_err(|err| format!("write! failed: {err}"))?; + } + } + writeln!( + &mut fragment, + "/bin/ln -s {} {}", + &cache_path, + quote_path(path.as_path())? + ) + .map_err(|err| format!("write! failed: {err}"))?; } + Some(fragment) } - writeln!( - &mut script, - "/bin/ln -s {} {}", - &cache_path, - quote_path(path.as_path())? - ) - .map_err(|err| format!("write! failed: {err}"))?; - } + _ => None, + }; // Change into any working directory. // // Note: When this wrapper script is in effect, Pants will not set the `working_directory` // field on the `ExecuteRequest` so that this wrapper script can operate in the input root // first. - if let Some(path) = working_directory { + let chdir_fragment = if let Some(path) = working_directory { let quoted_path = shlex::try_quote(path).map_err(|e| format!("Failed to convert path: {e}"))?; - writeln!( - &mut script, + Some(format!( concat!( "cd {0}\n", "if [ \"$?\" != 0 ]; then\n", @@ -1076,13 +1081,58 @@ fn make_wrapper_for_append_only_caches( "fi\n", ), quoted_path - ) - .map_err(|err| format!("write! failed: {err}"))?; + )) + } else { + None + }; + + // Generate code to replace `{chroot}` markers if any are present in the command. + let sandbox_root_fragment = if let Some(token) = sandbox_root_token { + let fragment = format!( + concat!( + "sandbox_root=\"$(/bin/pwd)\"\n", + "args=(\"${{@//{0}/$sandbox_root}}\")\n", + "set -- \"${{args[@]}}\"\n", + ), + token + ); + Some(fragment) + } else { + None + }; + + if caches_fragment.is_none() && sandbox_root_fragment.is_none() { + return Ok(None); } - // Finally, execute the process. - writeln!(&mut script, "exec \"$@\"").map_err(|err| format!("write! failed: {err:?}"))?; - Ok(script) + let script = { + let mut script = String::new(); + + let shebang = match sandbox_root_fragment { + Some(_) => "/bin/bash", // the array features need bash and not just plain old /bin/sh + _ => "/bin/sh", + }; + writeln!(&mut script, "#!{shebang}").map_err(|err| format!("write! failed: {err:?}"))?; + + if let Some(sandbox_root_fragment) = sandbox_root_fragment { + writeln!(&mut script, "{sandbox_root_fragment}") + .map_err(|err| format!("write! failed: {err:?}"))?; + } + if let Some(caches_fragment) = caches_fragment { + writeln!(&mut script, "{caches_fragment}") + .map_err(|err| format!("write! failed: {err:?}"))?; + } + if let Some(chdir_fragment) = chdir_fragment { + writeln!(&mut script, "{chdir_fragment}") + .map_err(|err| format!("write! failed: {err:?}"))?; + } + + writeln!(&mut script, "exec \"$@\"").map_err(|err| format!("write! failed: {err:?}"))?; + + script + }; + + Ok(Some(script)) } pub async fn make_execute_request( @@ -1093,35 +1143,46 @@ pub async fn make_execute_request( append_only_caches_base_path: Option<&str>, ) -> Result { const WRAPPER_SCRIPT: &str = "./__pants_wrapper__"; + const SANDBOX_ROOT_TOKEN: &str = "__PANTS_SANDBOX_ROOT__"; // Implement append-only caches by running a wrapper script before the actual program // to be invoked in the remote environment. - let wrapper_script_digest_opt = match (append_only_caches_base_path, &req.append_only_caches) { - (Some(base_path), caches) if !caches.is_empty() => { - let script = make_wrapper_for_append_only_caches( - caches, - base_path, - req.working_directory.as_ref().and_then(|p| p.to_str()), - )?; - let digest = store - .store_file_bytes(Bytes::from(script), false) - .await - .map_err(|err| { - format!("Failed to store wrapper script for remote execution: {err}") - })?; - let path = RelativePath::new(Path::new(WRAPPER_SCRIPT))?; - let snapshot = store.snapshot_of_one_file(path, digest, true).await?; - let directory_digest = DirectoryDigest::new(snapshot.digest, snapshot.tree); - Some(directory_digest) - } - _ => None, + let wrapper_script_content_opt = { + let sandbox_root_token = req + .argv + .iter() + .any(|arg| arg.contains("{chroot}")) + .then_some(SANDBOX_ROOT_TOKEN); + maybe_make_wrapper_script( + &req.append_only_caches, + append_only_caches_base_path, + req.working_directory.as_ref().and_then(|p| p.to_str()), + sandbox_root_token, + )? + }; + let wrapper_script_digest_opt = if let Some(script) = wrapper_script_content_opt { + let digest = store + .store_file_bytes(Bytes::from(script), false) + .await + .map_err(|err| format!("Failed to store wrapper script for remote execution: {err}"))?; + + let path = RelativePath::new(Path::new(WRAPPER_SCRIPT))?; + let snapshot = store.snapshot_of_one_file(path, digest, true).await?; + let directory_digest = DirectoryDigest::new(snapshot.digest, snapshot.tree); + Some(directory_digest) + } else { + None }; let arguments = match &wrapper_script_digest_opt { Some(_) => { let mut args = Vec::with_capacity(req.argv.len() + 1); args.push(WRAPPER_SCRIPT.to_string()); - args.extend(req.argv.iter().cloned()); + args.extend( + req.argv + .iter() + .map(|orig_arg| orig_arg.replace("{chroot}", SANDBOX_ROOT_TOKEN)), + ); args } None => req.argv.clone(), diff --git a/src/rust/engine/process_execution/src/tests.rs b/src/rust/engine/process_execution/src/tests.rs index 85f38ed2320..6cbfba66d4c 100644 --- a/src/rust/engine/process_execution/src/tests.rs +++ b/src/rust/engine/process_execution/src/tests.rs @@ -6,6 +6,7 @@ use std::collections::BTreeMap; use std::fs::Permissions; use std::hash::{Hash, Hasher}; use std::os::unix::fs::PermissionsExt; +use std::path::Path; use std::process::Stdio; use std::time::Duration; @@ -17,7 +18,7 @@ use tempfile::TempDir; use workunit_store::RunId; use crate::{ - make_wrapper_for_append_only_caches, CacheName, Platform, Process, ProcessExecutionEnvironment, + maybe_make_wrapper_script, CacheName, Platform, Process, ProcessExecutionEnvironment, ProcessExecutionStrategy, ProcessResultMetadata, ProcessResultSource, }; @@ -160,7 +161,7 @@ fn process_result_metadata_time_saved_from_cache() { } #[tokio::test] -async fn test_make_wrapper_for_append_only_caches_success() { +async fn wrapper_script_supports_append_only_caches() { const CACHE_NAME: &str = "test_cache"; const SUBDIR_NAME: &str = "a subdir"; // Space intentionally included to test shell quoting. @@ -176,11 +177,13 @@ async fn test_make_wrapper_for_append_only_caches_success() { .await .unwrap(); - let script_content = make_wrapper_for_append_only_caches( + let script_content = maybe_make_wrapper_script( &caches, - dummy_caches_base_path.path().to_str().unwrap(), + dummy_caches_base_path.path().to_str(), Some(SUBDIR_NAME), + None, ) + .unwrap() .unwrap(); let script_path = dummy_sandbox_path.path().join("wrapper"); @@ -234,3 +237,47 @@ async fn test_make_wrapper_for_append_only_caches_success() { "script wrote a file into a sudirectory (since script changed the working directory)" ); } + +#[tokio::test] +async fn wrapper_script_supports_sandbox_root_replacements_in_args() { + let caches = BTreeMap::new(); + + let script_content = maybe_make_wrapper_script(&caches, None, None, Some("__ROOT__")) + .unwrap() + .unwrap(); + + let dummy_sandbox_path = TempDir::new().unwrap(); + let script_path = dummy_sandbox_path.path().join("wrapper"); + tokio::fs::write(&script_path, script_content.as_bytes()) + .await + .unwrap(); + tokio::fs::set_permissions(&script_path, Permissions::from_mode(0o755)) + .await + .unwrap(); + + let mut cmd = tokio::process::Command::new("./wrapper"); + cmd.args(&[ + "/bin/sh", + "-c", + "echo xyzzy > foo.txt && echo __ROOT__/foo.txt", + ]); + cmd.current_dir(dummy_sandbox_path.path()); + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let child = cmd.spawn().unwrap(); + let output = child.wait_with_output().await.unwrap(); + if output.status.code() != Some(0) { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + println!("stdout:{}\n\nstderr: {}", &stdout, &stderr); + panic!("Wrapper script failed to run: {}", output.status); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let content = tokio::fs::read_to_string(Path::new(stdout.trim())) + .await + .unwrap(); + assert_eq!(content, "xyzzy\n"); +}