From dc0aa0a6444618435c60f8807ab89997885f5910 Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Mon, 6 May 2024 23:01:16 -0400 Subject: [PATCH] Rust options parser tweaks (#20880) Various changes needed to support access to the Rust parser from Python code. These include: 1. Modeling option source "rank", using the same enum values as the Python model. 2. Making the OptionsParser concurrency-friendly (making it Send + Sync, and using Arc instead of Rc). 3. Plumbing passthrough args in. 4. Having get_list()'s `default` be Vec instead of array. 5. Ensuring that `--string-list-option=` is interpreted as adding an empty string to the list, rather than setting it to an empty list, and ensuring that for other list types, `--foo=` is an error. This is how the Python options parser treats these cases, so we must comply with that. The actual Python bindings will be provided in a future PR. --- src/rust/engine/options/src/lib.rs | 94 ++++++++++++++-------- src/rust/engine/options/src/parse.rs | 43 +++++++--- src/rust/engine/options/src/parse_tests.rs | 22 ++++- src/rust/engine/options/src/tests.rs | 2 +- src/rust/engine/pantsd/src/lib.rs | 5 +- 5 files changed, 115 insertions(+), 51 deletions(-) diff --git a/src/rust/engine/options/src/lib.rs b/src/rust/engine/options/src/lib.rs index 4661fbcd72f..0b5abf290c6 100644 --- a/src/rust/engine/options/src/lib.rs +++ b/src/rust/engine/options/src/lib.rs @@ -38,7 +38,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; use std::hash::Hash; use std::path::Path; -use std::rc::Rc; +use std::sync::Arc; use serde::Deserialize; @@ -99,7 +99,7 @@ pub struct DictEdit { pub items: HashMap, } -pub(crate) trait OptionsSource { +pub(crate) trait OptionsSource: Send + Sync { /// /// Get a display version of the option `id` that most closely matches the syntax used to supply /// the id at runtime. For example, an global option of "bob" would display as "--bob" for use in @@ -200,6 +200,30 @@ pub enum Source { Flag, } +// NB: Must mirror the Rank enum in src/python/pants/option/ranked_value.py. +pub enum Rank { + _NONE = 0, // Unused, exists for historical Python compatibility reasons. + HARDCODED = 1, // The default provided at option registration. + _CONFIGDEFAULT = 2, // Unused, exists for historical Python compatibility reasons. + CONFIG = 3, // The value from the relevant section of the config file. + ENVIRONMENT = 4, // The value from the appropriately-named environment variable. + FLAG = 5, // The value from the appropriately-named command-line flag. +} + +impl Source { + pub fn rank(&self) -> Rank { + match *self { + Source::Default => Rank::HARDCODED, + Source::Config { + ordinal: _, + path: _, + } => Rank::CONFIG, + Source::Env => Rank::ENVIRONMENT, + Source::Flag => Rank::FLAG, + } + } +} + #[derive(Debug)] pub struct OptionValue { pub derivation: Option>, @@ -241,8 +265,9 @@ pub struct DictOptionValue { } pub struct OptionParser { - sources: BTreeMap>, + sources: BTreeMap>, include_derivation: bool, + passthrough_args: Option>, } impl OptionParser { @@ -266,18 +291,19 @@ impl OptionParser { .map(|(k, v)| (format!("env.{k}", k = k), v.clone())), ); - let mut sources: BTreeMap> = BTreeMap::new(); + let args_reader = ArgsReader::new(args, fromfile_expander.clone()); + let passthrough_args = args_reader.get_passthrough_args().cloned(); + + let mut sources: BTreeMap> = BTreeMap::new(); sources.insert( Source::Env, - Rc::new(EnvReader::new(env, fromfile_expander.clone())), - ); - sources.insert( - Source::Flag, - Rc::new(ArgsReader::new(args, fromfile_expander.clone())), + Arc::new(EnvReader::new(env, fromfile_expander.clone())), ); + sources.insert(Source::Flag, Arc::new(args_reader)); let mut parser = OptionParser { sources: sources.clone(), include_derivation: false, + passthrough_args: None, }; fn path_join(prefix: &str, suffix: &str) -> String { @@ -304,7 +330,7 @@ impl OptionParser { let config_paths = parser .parse_string_list( &option_id!("pants", "config", "files"), - &[&default_config_path], + vec![default_config_path], )? .value; config_paths @@ -343,23 +369,24 @@ impl OptionParser { config_source.path.to_string_lossy().as_ref(), ), }, - Rc::new(ConfigReader::new(config, fromfile_expander.clone())), + Arc::new(ConfigReader::new(config, fromfile_expander.clone())), ); ordinal += 1; } parser = OptionParser { sources: sources.clone(), include_derivation: false, + passthrough_args: None, }; if allow_pantsrc && parser.parse_bool(&option_id!("pantsrc"), true)?.value { for rcfile in parser .parse_string_list( &option_id!("pantsrc", "files"), - &[ - "/etc/pantsrc", - shellexpand::tilde("~/.pants.rc").as_ref(), - ".pants.rc", + vec![ + "/etc/pantsrc".to_string(), + shellexpand::tilde("~/.pants.rc").to_string(), + ".pants.rc".to_string(), ], )? .value @@ -373,7 +400,7 @@ impl OptionParser { ordinal, path: rcfile, }, - Rc::new(ConfigReader::new(rc_config, fromfile_expander.clone())), + Arc::new(ConfigReader::new(rc_config, fromfile_expander.clone())), ); ordinal += 1; } @@ -382,6 +409,7 @@ impl OptionParser { Ok(OptionParser { sources, include_derivation, + passthrough_args, }) } @@ -390,7 +418,7 @@ impl OptionParser { &self, id: &OptionId, default: Option<&T>, - getter: fn(&Rc, &OptionId) -> Result, String>, + getter: fn(&Arc, &OptionId) -> Result, String>, ) -> Result, String> { let mut derivation = None; if self.include_derivation { @@ -478,11 +506,11 @@ impl OptionParser { } #[allow(clippy::type_complexity)] - fn parse_list( + fn parse_list( &self, id: &OptionId, default: Vec, - getter: fn(&Rc, &OptionId) -> Result>>, String>, + getter: fn(&Arc, &OptionId) -> Result>>, String>, remover: fn(&mut Vec, &Vec), ) -> Result, String> { let mut list = default; @@ -542,11 +570,11 @@ impl OptionParser { // However this is still more than fast enough, and inoculates us against a very unlikely // pathological case of a very large removal set. #[allow(clippy::type_complexity)] - fn parse_list_hashable( + fn parse_list_hashable( &self, id: &OptionId, default: Vec, - getter: fn(&Rc, &OptionId) -> Result>>, String>, + getter: fn(&Arc, &OptionId) -> Result>>, String>, ) -> Result, String> { self.parse_list(id, default, getter, |list, remove| { let to_remove = remove.iter().collect::>(); @@ -557,28 +585,28 @@ impl OptionParser { pub fn parse_bool_list( &self, id: &OptionId, - default: &[bool], + default: Vec, ) -> Result, String> { - self.parse_list_hashable(id, default.to_vec(), |source, id| source.get_bool_list(id)) + self.parse_list_hashable(id, default, |source, id| source.get_bool_list(id)) } pub fn parse_int_list( &self, id: &OptionId, - default: &[i64], + default: Vec, ) -> Result, String> { - self.parse_list_hashable(id, default.to_vec(), |source, id| source.get_int_list(id)) + self.parse_list_hashable(id, default, |source, id| source.get_int_list(id)) } // Floats are not Eq or Hash, so we fall back to the brute-force O(N*M) lookups. pub fn parse_float_list( &self, id: &OptionId, - default: &[f64], + default: Vec, ) -> Result, String> { self.parse_list( id, - default.to_vec(), + default, |source, id| source.get_float_list(id), |list, to_remove| { list.retain(|item| !to_remove.contains(item)); @@ -589,13 +617,9 @@ impl OptionParser { pub fn parse_string_list( &self, id: &OptionId, - default: &[&str], + default: Vec, ) -> Result, String> { - self.parse_list_hashable::( - id, - default.iter().map(|s| s.to_string()).collect(), - |source, id| source.get_string_list(id), - ) + self.parse_list_hashable::(id, default, |source, id| source.get_string_list(id)) } pub fn parse_dict( @@ -638,6 +662,10 @@ impl OptionParser { value: dict, }) } + + pub fn get_passthrough_args(&self) -> Option<&Vec> { + self.passthrough_args.as_ref() + } } pub fn render_choice(items: &[&str]) -> Option { diff --git a/src/rust/engine/options/src/parse.rs b/src/rust/engine/options/src/parse.rs index 1b8c6728c75..3138601bf86 100644 --- a/src/rust/engine/options/src/parse.rs +++ b/src/rust/engine/options/src/parse.rs @@ -168,7 +168,7 @@ peg::parser! { } rule list_edits(parse_value: rule) -> Vec> - = e:list_edit(&parse_value) ** "," ","? { e } + = e:list_edit(&parse_value) ++ "," { e } rule list_replace(parse_value: rule) -> Vec> = items:items(&parse_value) { @@ -193,8 +193,13 @@ peg::parser! { pub(crate) rule float_list_edits() -> Vec> = scalar_list_edits() + // Make `--foo=` yield an implicit add of an empty string. + rule empty_string_string_list() -> Vec> + = ![_] { vec![ListEdit { action: ListEditAction::Add, items: vec!["".to_string()] }] } + pub(crate) rule string_list_edits() -> Vec> - = implicit_add() / list_replace() / list_edits() + = empty_string_string_list() / implicit_add() / + list_replace() / list_edits() // Heterogeneous values embedded in dicts. Note that float_val() must precede int_val() so that // the integer prefix of a float is not interpreted as an int. @@ -327,50 +332,70 @@ pub(crate) fn parse_dict(value: &str) -> Result { } pub(crate) trait Parseable: Sized + DeserializeOwned { + const OPTION_TYPE: &'static str; fn parse(value: &str) -> Result; fn parse_list(value: &str) -> Result>, ParseError>; + + fn format_parse_error(value: &str, e: peg::error::ParseError) -> ParseError { + format_parse_error(Self::OPTION_TYPE, value, e) + } + + fn format_list_parse_error( + value: &str, + e: peg::error::ParseError, + ) -> ParseError { + format_parse_error(&format!("{} list", Self::OPTION_TYPE), value, e) + } } impl Parseable for bool { + const OPTION_TYPE: &'static str = "bool"; + fn parse(value: &str) -> Result { - option_value_parser::bool(value).map_err(|e| format_parse_error("bool", value, e)) + option_value_parser::bool(value).map_err(|e| Self::format_parse_error(value, e)) } fn parse_list(value: &str) -> Result>, ParseError> { option_value_parser::bool_list_edits(value) - .map_err(|e| format_parse_error("bool list", value, e)) + .map_err(|e| Self::format_list_parse_error(value, e)) } } impl Parseable for i64 { + const OPTION_TYPE: &'static str = "int"; + fn parse(value: &str) -> Result { - option_value_parser::int(value).map_err(|e| format_parse_error("int", value, e)) + option_value_parser::int(value).map_err(|e| Self::format_parse_error(value, e)) } fn parse_list(value: &str) -> Result>, ParseError> { option_value_parser::int_list_edits(value) - .map_err(|e| format_parse_error("int list", value, e)) + .map_err(|e| Self::format_list_parse_error(value, e)) } } impl Parseable for f64 { + const OPTION_TYPE: &'static str = "float"; + fn parse(value: &str) -> Result { - option_value_parser::float(value).map_err(|e| format_parse_error("float", value, e)) + option_value_parser::float(value).map_err(|e| Self::format_parse_error(value, e)) } fn parse_list(value: &str) -> Result>, ParseError> { option_value_parser::float_list_edits(value) - .map_err(|e| format_parse_error("float list", value, e)) + .map_err(|e| Self::format_list_parse_error(value, e)) } } impl Parseable for String { + const OPTION_TYPE: &'static str = "string"; + fn parse(value: &str) -> Result { Ok(value.to_owned()) } fn parse_list(value: &str) -> Result>, ParseError> { option_value_parser::string_list_edits(value) - .map_err(|e| format_parse_error("string list", value, e)) + .map_err(|e| Self::format_list_parse_error(value, e)) } } diff --git a/src/rust/engine/options/src/parse_tests.rs b/src/rust/engine/options/src/parse_tests.rs index 90cf534fe40..a86615d1064 100644 --- a/src/rust/engine/options/src/parse_tests.rs +++ b/src/rust/engine/options/src/parse_tests.rs @@ -163,10 +163,24 @@ fn test_parse_float() { #[test] fn test_parse_list_from_empty_string() { - assert!(String::parse_list("").unwrap().is_empty()); - assert!(bool::parse_list("").unwrap().is_empty()); - assert!(i64::parse_list("").unwrap().is_empty()); - assert!(f64::parse_list("").unwrap().is_empty()); + assert_eq!( + String::parse_list(""), + Ok(vec![string_list_edit(ListEditAction::Add, [""])]) + ); + + fn check_err() { + let expected = format!("Problem parsing foo {} list value", T::OPTION_TYPE); + let actual = T::parse_list("").unwrap_err().render("foo"); + assert!( + actual.contains(&expected), + "Error message `{}` did not contain `{}`", + actual, + expected + ); + } + check_err::(); + check_err::(); + check_err::(); } fn string_list_edit>( diff --git a/src/rust/engine/options/src/tests.rs b/src/rust/engine/options/src/tests.rs index f53791f33ad..1713046bad7 100644 --- a/src/rust/engine/options/src/tests.rs +++ b/src/rust/engine/options/src/tests.rs @@ -185,7 +185,7 @@ fn test_parse_list_options() { ) { with_setup(args, env, config, extra_config, |option_parser| { let id = option_id!(["scope"], "foo"); - let option_value = option_parser.parse_int_list(&id, &[0]).unwrap(); + let option_value = option_parser.parse_int_list(&id, vec![0]).unwrap(); assert_eq!(expected, option_value.value); assert_eq!(expected_derivation, option_value.derivation.unwrap()); }); diff --git a/src/rust/engine/pantsd/src/lib.rs b/src/rust/engine/pantsd/src/lib.rs index 6f128a5f97a..57086cfc99a 100644 --- a/src/rust/engine/pantsd/src/lib.rs +++ b/src/rust/engine/pantsd/src/lib.rs @@ -297,10 +297,7 @@ pub fn fingerprint_compute( Digest::update(&mut hasher, val.value.as_bytes()); } OptionType::StringList(default) => { - let default = default.iter().map(|s| s.as_str()).collect::>(); - let val = options_parser - .parse_string_list(&option.id, &default)? - .value; + let val = options_parser.parse_string_list(&option.id, default)?.value; for item in val { Digest::update(&mut hasher, item.as_bytes()); }