diff --git a/engine/Cargo.lock b/engine/Cargo.lock index e5b499628..d25fe03d1 100644 --- a/engine/Cargo.lock +++ b/engine/Cargo.lock @@ -2775,6 +2775,8 @@ dependencies = [ "pest", "pest_derive", "pretty", + "pretty_assertions", + "regex", "serde", "serde_json", "test-log", @@ -3745,9 +3747,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -5793,9 +5795,9 @@ checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "zerocopy" diff --git a/engine/Cargo.toml b/engine/Cargo.toml index 5d09c649d..3abd8ee0f 100644 --- a/engine/Cargo.toml +++ b/engine/Cargo.toml @@ -67,7 +67,7 @@ minijinja = { version = "1.0.16", default-features = false, features = [ # loader # ] } -pretty_assertions = "1.4.0" +pretty_assertions = "1.4.1" rand = "0.8.5" regex = "1.10.4" reqwest = { version = "0.12.5", features = [ diff --git a/engine/baml-lib/schema-ast/Cargo.toml b/engine/baml-lib/schema-ast/Cargo.toml index fe64fbffc..445e85992 100644 --- a/engine/baml-lib/schema-ast/Cargo.toml +++ b/engine/baml-lib/schema-ast/Cargo.toml @@ -25,8 +25,10 @@ pest_derive = "2.1.0" either = "1.8.1" test-log = "0.2.16" pretty = "0.12.3" +regex.workspace = true [dev-dependencies] +pretty_assertions.workspace = true unindent = "0.2.3" [features] diff --git a/engine/baml-lib/schema-ast/src/formatter/mod.rs b/engine/baml-lib/schema-ast/src/formatter/mod.rs index 716946d2e..b42e2fc86 100644 --- a/engine/baml-lib/schema-ast/src/formatter/mod.rs +++ b/engine/baml-lib/schema-ast/src/formatter/mod.rs @@ -1,3 +1,6 @@ +#[cfg(test)] +mod tests; + use std::{ borrow::BorrowMut, cell::{RefCell, RefMut}, @@ -6,12 +9,13 @@ use std::{ }; use crate::parser::{BAMLParser, Rule}; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use pest::{ iterators::{Pair, Pairs}, Parser, }; use pretty::RcDoc; +use regex::Regex; pub struct FormatOptions { pub indent_width: isize, @@ -19,6 +23,11 @@ pub struct FormatOptions { } pub fn format_schema(source: &str, format_options: FormatOptions) -> Result { + let ignore_directive_regex = Regex::new(r"(?i)baml-format\s*:\s*ignore")?; + if ignore_directive_regex.is_match(source) { + return Ok(source.to_string()); + } + let mut schema = BAMLParser::parse(Rule::schema, source)?; let schema_pair = schema.next().ok_or(anyhow!("Expected a schema"))?; if schema_pair.as_rule() != Rule::schema { @@ -39,22 +48,28 @@ pub fn format_schema(source: &str, format_options: FormatOptions) -> Result {{ - match $pairs.peek() { - Some(pair) => { - if pair.as_rule() != $rule { - Err(anyhow!( - "Expected a {:?}, got a {:?} ({}:{})", - $rule, - pair.as_rule(), - file!(), - line!() - )) - } else { - $pairs.next(); - Ok(pair) + loop { + match $pairs.peek() { + Some(pair) => { + if pair.as_rule() == Rule::NEWLINE { + $pairs.next(); + continue; + } + if pair.as_rule() != $rule { + break Err(anyhow!( + "Expected a {:?}, got a {:?} ({}:{})", + $rule, + pair.as_rule(), + file!(), + line!() + )); + } else { + $pairs.next(); + break Ok(pair); + } } + None => break Err(anyhow!("Expected a {}", stringify!($rule))), } - None => Err(anyhow!("Expected a {}", stringify!($rule))), } }}; @@ -72,23 +87,77 @@ macro_rules! next_pair { }}; } +trait ToDoc { + type DocType; + + fn to_doc(&self) -> Self::DocType; +} + +impl<'a> ToDoc for Pair<'a, Rule> { + type DocType = RcDoc<'a, ()>; + + /// Embed the exact contents of the corresponding source in the output. + /// + /// This is our formatting "bail-out" effectively, where if we don't know + /// how to format something, we just emit the original source. + /// + /// NB: according to the `RcDoc::text` docs, this is an API violation, + /// because we call `to_doc()` on many Pest pairs that contain newlines + /// within them. I suspect that this is less of a "the 'pretty' crate will + /// break catastrophically in unexpected ways if text symbols contain + /// newlines" problem, and more of a "having newlines in text symbols may + /// produce surprising formatting" issue. It would be pretty bizarre for + /// the 'pretty' crate to inspect tokens for newlines (but not unreasonable!) + /// given how Wadler pretty prints work, but we need to rely on this + /// property to be able to incrementally implement our formatter. + fn to_doc(&self) -> Self::DocType { + if self.as_rule() == Rule::empty_lines { + // If we're formatting empty lines, superfluous whitespace should get stripped. + let newline_count = self.as_str().matches('\n').count(); + return RcDoc::concat(std::iter::repeat(RcDoc::hardline()).take(newline_count)); + } + RcDoc::text(self.as_str()) + } +} + struct Formatter { indent_width: isize, fail_on_unhandled_rule: bool, } impl Formatter { + /// The number of spaces to add before an inline trailing comment. + /// Here, "trailing comment" does not refer to the trailing_comment Pest rule, but rather just + /// a comment in this style: + /// + /// class Foo { + /// field string // comment + /// ^^^------------ This is what will get replaced with SPACES_BEFORE_TRAILING_COMMENT + /// } + const SPACES_BEFORE_TRAILING_COMMENT: &'static str = " "; + fn schema_to_doc<'a>(&self, mut pairs: Pairs<'a, Rule>) -> Result> { let mut doc = RcDoc::nil(); for pair in &mut pairs { match pair.as_rule() { Rule::type_expression_block => { - doc = doc.append(self.type_expression_block_to_doc(pair.into_inner())?); + match self.type_expression_block_to_doc(pair.clone().into_inner()) { + Ok(pair_doc) => { + doc = doc.append(pair_doc.group()); + } + Err(e) => { + log::debug!("Error formatting type_expression_block: {:#?}", e); + doc = doc.append(pair.to_doc()); + } + } } Rule::EOI => { // skip } + Rule::value_expression_block | Rule::empty_lines => { + doc = doc.append(pair.to_doc()); + } _ => { doc = doc.append(self.unhandled_rule_to_doc(pair)?); } @@ -129,9 +198,13 @@ impl Formatter { let mut content_docs = vec![]; for pair in &mut pairs { + let error_context = format!("type_expression: {:#?}", pair); match pair.as_rule() { Rule::type_expression => { - content_docs.push(self.type_expression_to_doc(pair.into_inner())?); + content_docs.push( + self.type_expression_to_doc(pair.into_inner()) + .context(error_context)?, + ); } Rule::block_attribute => { content_docs.push(pair_to_doc_text(pair)); @@ -167,8 +240,17 @@ impl Formatter { let mut doc = RcDoc::nil() .append(pair_to_doc_text(ident)) - .append(RcDoc::space()) - .append(self.field_type_chain_to_doc(field_type_chain.into_inner())?); + .append(RcDoc::space()); + + // Since our compiler currently doesn't allow newlines in type expressions, we can't + // put comments in the middle of a type expression, so we can rely on this hack to + // cascade comments all the way out of a type expression. + let (field_type_chain_doc, field_type_chain_comments) = + self.field_type_chain_to_doc(field_type_chain.into_inner())?; + doc = doc.append(field_type_chain_doc); + if let Some(field_type_chain_comments) = field_type_chain_comments { + doc = doc.append(field_type_chain_comments); + } for pair in pairs { match pair.as_rule() { @@ -190,13 +272,22 @@ impl Formatter { Ok(doc) } - fn field_type_chain_to_doc<'a>(&self, pairs: Pairs<'a, Rule>) -> Result> { + fn field_type_chain_to_doc<'a>( + &self, + pairs: Pairs<'a, Rule>, + ) -> Result<(RcDoc<'a, ()>, Option>)> { let mut docs = vec![]; + let mut comments = vec![]; for pair in pairs { match pair.as_rule() { Rule::field_type_with_attr => { - docs.push(self.field_type_with_attr_to_doc(pair.into_inner())?); + let (field_type_doc, field_type_comments) = + self.field_type_with_attr_to_doc(pair.into_inner())?; + docs.push(field_type_doc); + if let Some(field_type_comments) = field_type_comments { + comments.push(field_type_comments); + } } Rule::field_operator => { docs.push(RcDoc::text("|")); @@ -207,31 +298,69 @@ impl Formatter { } } - Ok(RcDoc::intersperse(docs, RcDoc::space()) - .nest(self.indent_width) - .group()) + Ok(( + RcDoc::intersperse(docs, RcDoc::space()) + .nest(self.indent_width) + .group(), + if comments.is_empty() { + None + } else { + Some(RcDoc::concat(comments).group()) + }, + )) } - fn field_type_with_attr_to_doc<'a>(&self, mut pairs: Pairs<'a, Rule>) -> Result> { + fn field_type_with_attr_to_doc<'a>( + &self, + mut pairs: Pairs<'a, Rule>, + ) -> Result<(RcDoc<'a, ()>, Option>)> { let mut docs = vec![]; + // This is a hack: we cascade comments all the way out of a type + // expression, relying on the (current) limitation that our users can't + // have newlines in a type expression today. + // + // The correct way to handle this is to either (1) make our lexer understand that + // trailing comments are not actually a part of a type expression or (2) teach the + // formatter how to push comments to the correct context. + // + // Arguably we're currently using (2), and just implementing it in a naive way, + // because we just push all comments to the context of the type expression, rather + // than, say, an operand of the type expression. + let mut comments = vec![]; for pair in &mut pairs { match pair.as_rule() { Rule::field_type => { docs.push(self.field_type_to_doc(pair.into_inner())?); } - Rule::field_attribute | Rule::trailing_comment => { + Rule::field_attribute => { docs.push(pair_to_doc_text(pair)); } + Rule::trailing_comment => { + if comments.is_empty() { + comments.push(RcDoc::text(Self::SPACES_BEFORE_TRAILING_COMMENT)); + } + comments.push(pair_to_doc_text(pair)); + } + Rule::NEWLINE => { + comments.push(RcDoc::hardline()); + } _ => { docs.push(self.unhandled_rule_to_doc(pair)?); } } } - Ok(RcDoc::intersperse(docs, RcDoc::space()) - .nest(self.indent_width) - .group()) + Ok(( + RcDoc::intersperse(docs, RcDoc::space()) + .nest(self.indent_width) + .group(), + if comments.is_empty() { + None + } else { + Some(RcDoc::concat(comments).group()) + }, + )) } fn field_type_to_doc<'a>(&self, pairs: Pairs<'a, Rule>) -> Result> { @@ -283,75 +412,3 @@ impl Formatter { fn pair_to_doc_text<'a>(pair: Pair<'a, Rule>) -> RcDoc<'a, ()> { RcDoc::text(pair.as_str().trim()) } - -#[cfg(test)] -mod tests { - use super::*; - use unindent::Unindent as _; - - #[track_caller] - fn assert_format_eq(schema: &str, expected: &str) -> Result<()> { - let formatted = format_schema( - &schema.unindent().trim_end(), - FormatOptions { - indent_width: 4, - fail_on_unhandled_rule: true, - }, - )?; - assert_eq!(expected.unindent().trim_end(), formatted); - Ok(()) - } - - #[test] - fn test_format_schema() -> anyhow::Result<()> { - assert_format_eq( - r#" - class Foo { - } - "#, - r#" - class Foo {} - "#, - )?; - - assert_format_eq( - r#" - class Foo { field1 string } - "#, - r#" - class Foo { - field1 string - } - "#, - )?; - - assert_format_eq( - r#" - class Foo { - - field1 string - } - "#, - r#" - class Foo { - field1 string - } - "#, - )?; - - assert_format_eq( - r#" - class Foo { - field1 string|int - } - "#, - r#" - class Foo { - field1 string | int - } - "#, - )?; - - Ok(()) - } -} diff --git a/engine/baml-lib/schema-ast/src/formatter/tests.rs b/engine/baml-lib/schema-ast/src/formatter/tests.rs new file mode 100644 index 000000000..0312d1c69 --- /dev/null +++ b/engine/baml-lib/schema-ast/src/formatter/tests.rs @@ -0,0 +1,222 @@ +use super::*; +use pretty_assertions::assert_eq; +use unindent::Unindent as _; + +#[track_caller] +fn assert_format_eq(schema: &str, expected: &str) -> Result<()> { + let formatted = format_schema( + &schema, + FormatOptions { + indent_width: 2, + fail_on_unhandled_rule: true, + }, + )?; + assert_eq!(formatted, expected); + + Ok(()) +} + +#[test] +fn class_containing_whitespace() -> anyhow::Result<()> { + let actual = r#" + class Foo { + } + + class Foo { field1 string } + + class Foo { + + field1 string + } + + class Foo { + field1 string|int + } + "# + .unindent() + .trim_end() + .to_string(); + + let expected = r#" + class Foo {} + + class Foo { + field1 string + } + + class Foo { + field1 string + } + + class Foo { + field1 string | int + } + "# + .unindent() + .trim_end() + .to_string(); + + assert_format_eq(&actual, &expected)?; + assert_format_eq(&expected, &expected) +} + +#[test] +fn class_with_assorted_comment_styles() -> anyhow::Result<()> { + let actual = r#" + class Foo0 { + lorem string // trailing comments should be separated by two spaces + ipsum string + } + + class Foo1 { + lorem string + ipsum string + // dolor string + } + + class Foo2 { + + // "lorem" is a latin word + lorem string + + // "ipsum" is a latin word + ipsum string + + } + + class Foo3 { + lorem string + ipsum string + // Lorem ipsum dolor sit amet + // Consectetur adipiscing elit + // Sed do eiusmod tempor incididunt + // Ut labore et dolore magna aliqua + // Ut enim ad minim veniam + } + "# + .unindent() + .trim_end() + .to_string(); + + let expected = r#" + class Foo0 { + lorem string // trailing comments should be separated by two spaces + ipsum string + } + + class Foo1 { + lorem string + ipsum string + // dolor string + } + + class Foo2 { + // "lorem" is a latin word + lorem string + // "ipsum" is a latin word + ipsum string + } + + class Foo3 { + lorem string + ipsum string + // Lorem ipsum dolor sit amet + // Consectetur adipiscing elit + // Sed do eiusmod tempor incididunt + // Ut labore et dolore magna aliqua + // Ut enim ad minim veniam + } + "# + .unindent() + .trim_end() + .to_string(); + + assert_format_eq(&actual, &expected)?; + assert_format_eq(&expected, &expected) +} + +#[test] +fn baml_format_escape_directive_works() -> anyhow::Result<()> { + let expected = r#" + // baml-format: ignore + class BadlyFormatted0 { + lorem string // trailing comments should be separated by two spaces + ipsum string + } + + class BadlyFormatted1 { + lorem string + ipsum string + // Lorem ipsum dolor sit amet + // Consectetur adipiscing elit + // Sed do eiusmod tempor incididunt + // Ut labore et dolore magna aliqua + // Ut enim ad minim veniam + } + "# + .unindent() + .trim_end() + .to_string(); + + assert_format_eq(&expected, &expected) +} + +/// We have not yet implemented formatting for functions or enums, +/// so those should be preserved as-is. +#[test] +fn class_formatting_is_resilient_to_unhandled_rules() -> anyhow::Result<()> { + let actual = r##" + function LlmConvert(input: string) -> string { + client "openai/gpt-4o" + prompt #" + Extract this info from the email in JSON format: + {{ ctx.output_format }} + "# + } + + enum Latin { + Lorem + Ipsum + } + + class Foo { + lorem "alpha" | "bravo" + ipsum "charlie"|"delta" + } + "## + .unindent() + .trim_end() + .to_string(); + let expected = r##" + function LlmConvert(input: string) -> string { + client "openai/gpt-4o" + prompt #" + Extract this info from the email in JSON format: + {{ ctx.output_format }} + "# + } + + enum Latin { + Lorem + Ipsum + } + + class Foo { + lorem "alpha" | "bravo" + ipsum "charlie" | "delta" + } + "## + .unindent() + .trim_end() + .to_string(); + + assert_format_eq(&actual, &expected) +} + +#[test] +fn newlines_with_only_spaces_are_stripped() -> anyhow::Result<()> { + let actual = "class Foo {}\n \n \nclass Bar {}\n"; + let expected = "class Foo {}\n\n\nclass Bar {}\n"; + + assert_format_eq(&actual, &expected) +} diff --git a/engine/baml-lib/schema-ast/src/parser/datamodel.pest b/engine/baml-lib/schema-ast/src/parser/datamodel.pest index 7fe6ec60e..623b746ad 100644 --- a/engine/baml-lib/schema-ast/src/parser/datamodel.pest +++ b/engine/baml-lib/schema-ast/src/parser/datamodel.pest @@ -162,7 +162,7 @@ doc_content = @{ (!NEWLINE ~ ANY)* } // Shared Building Blocks and Comments // ###################################### WHITESPACE = _{ " " | "\t" } -NEWLINE = _{ "\n" | "\r\n" | "\r" } +NEWLINE = { "\n" | "\r\n" | "\r" } empty_lines = @{ (WHITESPACE* ~ NEWLINE)+ } // ###################################### diff --git a/engine/baml-lib/schema-ast/src/parser/helpers.rs b/engine/baml-lib/schema-ast/src/parser/helpers.rs index 91c8bb8d5..2251dcb52 100644 --- a/engine/baml-lib/schema-ast/src/parser/helpers.rs +++ b/engine/baml-lib/schema-ast/src/parser/helpers.rs @@ -9,7 +9,8 @@ pub fn parsing_catch_all(token: Pair<'_>, kind: &str) { | Rule::trailing_comment | Rule::comment_block | Rule::block_comment - | Rule::SPACER_TEXT => {} + | Rule::SPACER_TEXT + | Rule::NEWLINE => {} x => unreachable!( "Encountered impossible {} during parsing: {:?} {:?}", kind, diff --git a/engine/cli/src/format.rs b/engine/cli/src/format.rs index 974604c24..8e9c99878 100644 --- a/engine/cli/src/format.rs +++ b/engine/cli/src/format.rs @@ -1,13 +1,14 @@ use std::{fs, path::PathBuf}; use anyhow::Result; -use baml_runtime::{baml_src_files, BamlRuntime}; +use baml_runtime::baml_src_files; use clap::Args; use internal_baml_core::internal_baml_schema_ast::{format_schema, FormatOptions}; #[derive(Args, Debug)] pub struct FormatArgs { - #[arg(long, help = "path/to/baml_src", default_value = "./baml_src")] + // default_value for --from is _usually_ the baml_src directory, but not for baml-cli fmt! + #[arg(long, help = "path/to/baml_src", default_value = ".")] pub from: PathBuf, #[arg( @@ -30,8 +31,8 @@ impl FormatArgs { // Usually this is done in commands.rs, but fmt is a special case // because it doesn't need to actually load the BAML runtime to parse // BAML files. - let from = BamlRuntime::parse_baml_src_path(&self.from)?; - baml_src_files(&from)? + // let from = BamlRuntime::parse_baml_src_path(&self.from)?; + baml_src_files(&self.from)? } else { self.paths.clone() }; @@ -53,7 +54,7 @@ impl FormatArgs { } } Err(e) => { - log::error!("Failed to format {}: {}", path.display(), e); + log::error!("Failed to format {}: {:?}", path.display(), e); } } } diff --git a/fern/03-reference/baml-cli/fmt.mdx b/fern/03-reference/baml-cli/fmt.mdx new file mode 100644 index 000000000..bc3509268 --- /dev/null +++ b/fern/03-reference/baml-cli/fmt.mdx @@ -0,0 +1,25 @@ +The `fmt` command will format your BAML files. + + + **Warning: Beta Feature** + + This feature is still in-progress, and does not yet support all BAML syntax. + + +## Usage + +``` +baml-cli fmt [OPTIONS] [file.baml] [file2.baml] [file3.baml] ... +``` + +## Details + +To disable the formatter in a file, you can add + +```baml +// baml-format: ignore +``` + +anywhere in the file. + +Formatting is done in-place and non-configurable. \ No newline at end of file diff --git a/integ-tests/baml_src/formatter/test-comments.baml b/integ-tests/baml_src/formatter/test-comments.baml new file mode 100644 index 000000000..550dfb8fc --- /dev/null +++ b/integ-tests/baml_src/formatter/test-comments.baml @@ -0,0 +1,27 @@ +class FormatterTest0 { + lorem string // trailing comments should be preserved + ipsum string +} + +class FormatterTest1 { + lorem string + ipsum string + // dolor string +} + +class FormatterTest2 { + // "lorem" is a latin word + lorem string + // "ipsum" is a latin word + ipsum string +} + +class FormatterTest3 { + lorem string + ipsum string + // Lorem ipsum dolor sit amet + // Consectetur adipiscing elit + // Sed do eiusmod tempor incididunt + // Ut labore et dolore magna aliqua + // Ut enim ad minim veniam +} \ No newline at end of file diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py index b1b088470..50c14023d 100644 --- a/integ-tests/python/baml_client/inlinedbaml.py +++ b/integ-tests/python/baml_client/inlinedbaml.py @@ -25,6 +25,7 @@ "fiddle-examples/extract-receipt-info.baml": "class ReceiptItem {\n name string\n description string?\n quantity int\n price float\n}\n\nclass ReceiptInfo {\n items ReceiptItem[]\n total_cost float?\n venue \"barisa\" | \"ox_burger\"\n}\n\nfunction ExtractReceiptInfo(email: string, reason: \"curiosity\" | \"personal_finance\") -> ReceiptInfo {\n client GPT4o\n prompt #\"\n Given the receipt below:\n\n ```\n {{email}}\n ```\n\n {{ ctx.output_format }}\n \"#\n}\n\n", "fiddle-examples/images/image.baml": "function DescribeImage(img: image) -> string {\n client GPT4o\n prompt #\"\n {{ _.role(\"user\") }}\n\n\n Describe the image below in 20 words:\n {{ img }}\n \"#\n\n}\n\nclass FakeImage {\n url string\n}\n\nclass ClassWithImage {\n myImage image\n param2 string\n fake_image FakeImage\n}\n\n// chat role user present\nfunction DescribeImage2(classWithImage: ClassWithImage, img2: image) -> string { \n client GPT4Turbo\n prompt #\"\n {{ _.role(\"user\") }}\n You should return 2 answers that answer the following commands.\n\n 1. Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n 2. Also tell me what's happening here in one sentence:\n {{ img2 }}\n \"#\n}\n\n// no chat role\nfunction DescribeImage3(classWithImage: ClassWithImage, img2: image) -> string {\n client GPT4Turbo\n prompt #\"\n Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n Tell me also what's happening here in one sentence and relate it to the word {{ classWithImage.param2 }}:\n {{ img2 }}\n \"#\n}\n\n\n// system prompt and chat prompt\nfunction DescribeImage4(classWithImage: ClassWithImage, img2: image) -> string {\n client GPT4Turbo\n prompt #\"\n {{ _.role(\"system\")}}\n\n Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n Tell me also what's happening here in one sentence and relate it to the word {{ classWithImage.param2 }}:\n {{ img2 }}\n \"#\n}\n\ntest TestName {\n functions [DescribeImage]\n args {\n img { url \"https://imgs.xkcd.com/comics/standards.png\"}\n }\n}\n", "fiddle-examples/symbol-tuning.baml": "enum Category3 {\n Refund @alias(\"k1\")\n @description(\"Customer wants to refund a product\")\n\n CancelOrder @alias(\"k2\")\n @description(\"Customer wants to cancel an order\")\n\n TechnicalSupport @alias(\"k3\")\n @description(\"Customer needs help with a technical issue unrelated to account creation or login\")\n\n AccountIssue @alias(\"k4\")\n @description(\"Specifically relates to account-login or account-creation\")\n\n Question @alias(\"k5\")\n @description(\"Customer has a question\")\n}\n\nfunction ClassifyMessage3(input: string) -> Category {\n client GPT4\n\n prompt #\"\n Classify the following INPUT into ONE\n of the following categories:\n\n INPUT: {{ input }}\n\n {{ ctx.output_format }}\n\n Response:\n \"#\n}", + "formatter/test-comments.baml": "class FormatterTest0 {\n lorem string // trailing comments should be preserved\n ipsum string\n}\n\nclass FormatterTest1 {\n lorem string\n ipsum string\n // dolor string\n}\n\nclass FormatterTest2 {\n // \"lorem\" is a latin word\n lorem string\n // \"ipsum\" is a latin word\n ipsum string\n}\n\nclass FormatterTest3 {\n lorem string\n ipsum string\n // Lorem ipsum dolor sit amet\n // Consectetur adipiscing elit\n // Sed do eiusmod tempor incididunt\n // Ut labore et dolore magna aliqua\n // Ut enim ad minim veniam\n}", "generators.baml": "generator lang_python {\n output_type python/pydantic\n output_dir \"../python\"\n version \"0.71.1\"\n}\n\ngenerator lang_typescript {\n output_type typescript\n output_dir \"../typescript\"\n version \"0.71.1\"\n}\n\ngenerator lang_ruby {\n output_type ruby/sorbet\n output_dir \"../ruby\"\n version \"0.71.1\"\n}\n\n// generator openapi {\n// output_type rest/openapi\n// output_dir \"../openapi\"\n// version \"0.71.1\"\n// on_generate \"rm .gitignore\"\n// }\n", "test-files/aliases/aliased-inputs.baml": "\nclass InputClass {\n key string @alias(\"color\")\n key2 string\n}\n\n\nclass InputClassNested {\n key string\n nested InputClass @alias(\"interesting-key\")\n}\n \n\nfunction AliasedInputClass(input: InputClass) -> string {\n client GPT35\n prompt #\"\n\n {{input}}\n\n This is a test. What's the name of the first json key above? Remember, tell me the key, not value.\n \"#\n}\n \nfunction AliasedInputClass2(input: InputClass) -> string {\n client GPT35\n prompt #\"\n\n {# making sure we can still access the original key #}\n {%if input.key == \"tiger\"%}\n Repeat this value back to me, and nothing else: {{input.key}}\n {%endif%}\n \"#\n}\n \n function AliasedInputClassNested(input: InputClassNested) -> string {\n client GPT35\n prompt #\"\n {{ _.role(\"user\")}}\n\n {{input}}\n\n This is a test. What's the name of the second json key above? Remember, tell me the key, not value.\n \"#\n }\n\n\nenum AliasedEnum {\n KEY_ONE @alias(\"tiger\")\n KEY_TWO\n}\n\nfunction AliasedInputEnum(input: AliasedEnum) -> string {\n client GPT4o\n prompt #\"\n {{ _.role(\"user\")}}\n\n\n Write out this word only in your response, in lowercase:\n ---\n {{input}}\n ---\n Answer:\n \"#\n}\n\n\nfunction AliasedInputList(input: AliasedEnum[]) -> string {\n client GPT35\n prompt #\"\n {{ _.role(\"user\")}}\n Given this array:\n ---\n {{input}}\n ---\n\n Return the first element in the array:\n \"#\n}\n\n", "test-files/aliases/classes.baml": "class TestClassAlias {\n key string @alias(\"key-dash\") @description(#\"\n This is a description for key\n af asdf\n \"#)\n key2 string @alias(\"key21\")\n key3 string @alias(\"key with space\")\n key4 string //unaliased\n key5 string @alias(\"key.with.punctuation/123\")\n}\n\nfunction FnTestClassAlias(input: string) -> TestClassAlias {\n client GPT35\n prompt #\"\n {{ctx.output_format}}\n \"#\n}\n\ntest FnTestClassAlias {\n functions [FnTestClassAlias]\n args {\n input \"example input\"\n }\n}\n", diff --git a/integ-tests/python/baml_client/partial_types.py b/integ-tests/python/baml_client/partial_types.py index 9ce8a02c0..a80af5e9c 100644 --- a/integ-tests/python/baml_client/partial_types.py +++ b/integ-tests/python/baml_client/partial_types.py @@ -149,6 +149,22 @@ class FooAny(BaseModel): class Forest(BaseModel): trees: List["Tree"] +class FormatterTest0(BaseModel): + lorem: Optional[str] = None + ipsum: Optional[str] = None + +class FormatterTest1(BaseModel): + lorem: Optional[str] = None + ipsum: Optional[str] = None + +class FormatterTest2(BaseModel): + lorem: Optional[str] = None + ipsum: Optional[str] = None + +class FormatterTest3(BaseModel): + lorem: Optional[str] = None + ipsum: Optional[str] = None + class GroceryReceipt(BaseModel): receiptId: Optional[str] = None storeName: Optional[str] = None diff --git a/integ-tests/python/baml_client/type_builder.py b/integ-tests/python/baml_client/type_builder.py index 741999408..3b30598a0 100644 --- a/integ-tests/python/baml_client/type_builder.py +++ b/integ-tests/python/baml_client/type_builder.py @@ -20,7 +20,7 @@ class TypeBuilder(_TypeBuilder): def __init__(self): super().__init__(classes=set( - ["BigNumbers","BinaryNode","Blah","BlockConstraint","BlockConstraintForParam","BookOrder","ClassOptionalOutput","ClassOptionalOutput2","ClassToRecAlias","ClassWithImage","CompoundBigNumbers","ContactInfo","CustomTaskResult","DummyOutput","DynInputOutput","DynamicClassOne","DynamicClassTwo","DynamicOutput","Earthling","Education","Email","EmailAddress","Event","FakeImage","FlightConfirmation","FooAny","Forest","GroceryReceipt","InnerClass","InnerClass2","InputClass","InputClassNested","LinkedList","LinkedListAliasNode","LiteralClassHello","LiteralClassOne","LiteralClassTwo","MalformedConstraints","MalformedConstraints2","Martian","MergeAttrs","NamedArgsSingleClass","Nested","Nested2","NestedBlockConstraint","NestedBlockConstraintForParam","Node","NodeWithAliasIndirection","OptionalListAndMap","OptionalTest_Prop1","OptionalTest_ReturnType","OrderInfo","OriginalA","OriginalB","Person","PhoneNumber","Quantity","RaysData","ReceiptInfo","ReceiptItem","Recipe","Resume","Schema","SearchParams","SomeClassNestedDynamic","StringToClassEntry","TestClassAlias","TestClassNested","TestClassWithEnum","TestOutputClass","Tree","TwoStoriesOneTitle","UnionTest_ReturnType","WithReasoning",] + ["BigNumbers","BinaryNode","Blah","BlockConstraint","BlockConstraintForParam","BookOrder","ClassOptionalOutput","ClassOptionalOutput2","ClassToRecAlias","ClassWithImage","CompoundBigNumbers","ContactInfo","CustomTaskResult","DummyOutput","DynInputOutput","DynamicClassOne","DynamicClassTwo","DynamicOutput","Earthling","Education","Email","EmailAddress","Event","FakeImage","FlightConfirmation","FooAny","Forest","FormatterTest0","FormatterTest1","FormatterTest2","FormatterTest3","GroceryReceipt","InnerClass","InnerClass2","InputClass","InputClassNested","LinkedList","LinkedListAliasNode","LiteralClassHello","LiteralClassOne","LiteralClassTwo","MalformedConstraints","MalformedConstraints2","Martian","MergeAttrs","NamedArgsSingleClass","Nested","Nested2","NestedBlockConstraint","NestedBlockConstraintForParam","Node","NodeWithAliasIndirection","OptionalListAndMap","OptionalTest_Prop1","OptionalTest_ReturnType","OrderInfo","OriginalA","OriginalB","Person","PhoneNumber","Quantity","RaysData","ReceiptInfo","ReceiptItem","Recipe","Resume","Schema","SearchParams","SomeClassNestedDynamic","StringToClassEntry","TestClassAlias","TestClassNested","TestClassWithEnum","TestOutputClass","Tree","TwoStoriesOneTitle","UnionTest_ReturnType","WithReasoning",] ), enums=set( ["AliasedEnum","Category","Category2","Category3","Color","DataType","DynEnumOne","DynEnumTwo","EnumInClass","EnumOutput","Hobby","MapKey","NamedArgsSingleEnum","NamedArgsSingleEnumList","OptionalTest_CategoryType","OrderStatus","Tag","TestEnum",] )) diff --git a/integ-tests/python/baml_client/types.py b/integ-tests/python/baml_client/types.py index 6a68f8efd..de43ade95 100644 --- a/integ-tests/python/baml_client/types.py +++ b/integ-tests/python/baml_client/types.py @@ -274,6 +274,22 @@ class FooAny(BaseModel): class Forest(BaseModel): trees: List["Tree"] +class FormatterTest0(BaseModel): + lorem: str + ipsum: str + +class FormatterTest1(BaseModel): + lorem: str + ipsum: str + +class FormatterTest2(BaseModel): + lorem: str + ipsum: str + +class FormatterTest3(BaseModel): + lorem: str + ipsum: str + class GroceryReceipt(BaseModel): receiptId: str storeName: str diff --git a/integ-tests/ruby/baml_client/inlined.rb b/integ-tests/ruby/baml_client/inlined.rb index aafa0086c..97b329398 100644 --- a/integ-tests/ruby/baml_client/inlined.rb +++ b/integ-tests/ruby/baml_client/inlined.rb @@ -25,6 +25,7 @@ module Inlined "fiddle-examples/extract-receipt-info.baml" => "class ReceiptItem {\n name string\n description string?\n quantity int\n price float\n}\n\nclass ReceiptInfo {\n items ReceiptItem[]\n total_cost float?\n venue \"barisa\" | \"ox_burger\"\n}\n\nfunction ExtractReceiptInfo(email: string, reason: \"curiosity\" | \"personal_finance\") -> ReceiptInfo {\n client GPT4o\n prompt #\"\n Given the receipt below:\n\n ```\n {{email}}\n ```\n\n {{ ctx.output_format }}\n \"#\n}\n\n", "fiddle-examples/images/image.baml" => "function DescribeImage(img: image) -> string {\n client GPT4o\n prompt #\"\n {{ _.role(\"user\") }}\n\n\n Describe the image below in 20 words:\n {{ img }}\n \"#\n\n}\n\nclass FakeImage {\n url string\n}\n\nclass ClassWithImage {\n myImage image\n param2 string\n fake_image FakeImage\n}\n\n// chat role user present\nfunction DescribeImage2(classWithImage: ClassWithImage, img2: image) -> string { \n client GPT4Turbo\n prompt #\"\n {{ _.role(\"user\") }}\n You should return 2 answers that answer the following commands.\n\n 1. Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n 2. Also tell me what's happening here in one sentence:\n {{ img2 }}\n \"#\n}\n\n// no chat role\nfunction DescribeImage3(classWithImage: ClassWithImage, img2: image) -> string {\n client GPT4Turbo\n prompt #\"\n Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n Tell me also what's happening here in one sentence and relate it to the word {{ classWithImage.param2 }}:\n {{ img2 }}\n \"#\n}\n\n\n// system prompt and chat prompt\nfunction DescribeImage4(classWithImage: ClassWithImage, img2: image) -> string {\n client GPT4Turbo\n prompt #\"\n {{ _.role(\"system\")}}\n\n Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n Tell me also what's happening here in one sentence and relate it to the word {{ classWithImage.param2 }}:\n {{ img2 }}\n \"#\n}\n\ntest TestName {\n functions [DescribeImage]\n args {\n img { url \"https://imgs.xkcd.com/comics/standards.png\"}\n }\n}\n", "fiddle-examples/symbol-tuning.baml" => "enum Category3 {\n Refund @alias(\"k1\")\n @description(\"Customer wants to refund a product\")\n\n CancelOrder @alias(\"k2\")\n @description(\"Customer wants to cancel an order\")\n\n TechnicalSupport @alias(\"k3\")\n @description(\"Customer needs help with a technical issue unrelated to account creation or login\")\n\n AccountIssue @alias(\"k4\")\n @description(\"Specifically relates to account-login or account-creation\")\n\n Question @alias(\"k5\")\n @description(\"Customer has a question\")\n}\n\nfunction ClassifyMessage3(input: string) -> Category {\n client GPT4\n\n prompt #\"\n Classify the following INPUT into ONE\n of the following categories:\n\n INPUT: {{ input }}\n\n {{ ctx.output_format }}\n\n Response:\n \"#\n}", + "formatter/test-comments.baml" => "class FormatterTest0 {\n lorem string // trailing comments should be preserved\n ipsum string\n}\n\nclass FormatterTest1 {\n lorem string\n ipsum string\n // dolor string\n}\n\nclass FormatterTest2 {\n // \"lorem\" is a latin word\n lorem string\n // \"ipsum\" is a latin word\n ipsum string\n}\n\nclass FormatterTest3 {\n lorem string\n ipsum string\n // Lorem ipsum dolor sit amet\n // Consectetur adipiscing elit\n // Sed do eiusmod tempor incididunt\n // Ut labore et dolore magna aliqua\n // Ut enim ad minim veniam\n}", "generators.baml" => "generator lang_python {\n output_type python/pydantic\n output_dir \"../python\"\n version \"0.71.1\"\n}\n\ngenerator lang_typescript {\n output_type typescript\n output_dir \"../typescript\"\n version \"0.71.1\"\n}\n\ngenerator lang_ruby {\n output_type ruby/sorbet\n output_dir \"../ruby\"\n version \"0.71.1\"\n}\n\n// generator openapi {\n// output_type rest/openapi\n// output_dir \"../openapi\"\n// version \"0.71.1\"\n// on_generate \"rm .gitignore\"\n// }\n", "test-files/aliases/aliased-inputs.baml" => "\nclass InputClass {\n key string @alias(\"color\")\n key2 string\n}\n\n\nclass InputClassNested {\n key string\n nested InputClass @alias(\"interesting-key\")\n}\n \n\nfunction AliasedInputClass(input: InputClass) -> string {\n client GPT35\n prompt #\"\n\n {{input}}\n\n This is a test. What's the name of the first json key above? Remember, tell me the key, not value.\n \"#\n}\n \nfunction AliasedInputClass2(input: InputClass) -> string {\n client GPT35\n prompt #\"\n\n {# making sure we can still access the original key #}\n {%if input.key == \"tiger\"%}\n Repeat this value back to me, and nothing else: {{input.key}}\n {%endif%}\n \"#\n}\n \n function AliasedInputClassNested(input: InputClassNested) -> string {\n client GPT35\n prompt #\"\n {{ _.role(\"user\")}}\n\n {{input}}\n\n This is a test. What's the name of the second json key above? Remember, tell me the key, not value.\n \"#\n }\n\n\nenum AliasedEnum {\n KEY_ONE @alias(\"tiger\")\n KEY_TWO\n}\n\nfunction AliasedInputEnum(input: AliasedEnum) -> string {\n client GPT4o\n prompt #\"\n {{ _.role(\"user\")}}\n\n\n Write out this word only in your response, in lowercase:\n ---\n {{input}}\n ---\n Answer:\n \"#\n}\n\n\nfunction AliasedInputList(input: AliasedEnum[]) -> string {\n client GPT35\n prompt #\"\n {{ _.role(\"user\")}}\n Given this array:\n ---\n {{input}}\n ---\n\n Return the first element in the array:\n \"#\n}\n\n", "test-files/aliases/classes.baml" => "class TestClassAlias {\n key string @alias(\"key-dash\") @description(#\"\n This is a description for key\n af asdf\n \"#)\n key2 string @alias(\"key21\")\n key3 string @alias(\"key with space\")\n key4 string //unaliased\n key5 string @alias(\"key.with.punctuation/123\")\n}\n\nfunction FnTestClassAlias(input: string) -> TestClassAlias {\n client GPT35\n prompt #\"\n {{ctx.output_format}}\n \"#\n}\n\ntest FnTestClassAlias {\n functions [FnTestClassAlias]\n args {\n input \"example input\"\n }\n}\n", diff --git a/integ-tests/ruby/baml_client/partial-types.rb b/integ-tests/ruby/baml_client/partial-types.rb index 438593d7f..ecbda8c40 100644 --- a/integ-tests/ruby/baml_client/partial-types.rb +++ b/integ-tests/ruby/baml_client/partial-types.rb @@ -47,6 +47,10 @@ class FakeImage < T::Struct; end class FlightConfirmation < T::Struct; end class FooAny < T::Struct; end class Forest < T::Struct; end + class FormatterTest0 < T::Struct; end + class FormatterTest1 < T::Struct; end + class FormatterTest2 < T::Struct; end + class FormatterTest3 < T::Struct; end class GroceryReceipt < T::Struct; end class InnerClass < T::Struct; end class InnerClass2 < T::Struct; end @@ -486,6 +490,62 @@ def initialize(props) @props = props end end + class FormatterTest0 < T::Struct + include Baml::Sorbet::Struct + const :lorem, T.nilable(String) + const :ipsum, T.nilable(String) + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end + class FormatterTest1 < T::Struct + include Baml::Sorbet::Struct + const :lorem, T.nilable(String) + const :ipsum, T.nilable(String) + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end + class FormatterTest2 < T::Struct + include Baml::Sorbet::Struct + const :lorem, T.nilable(String) + const :ipsum, T.nilable(String) + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end + class FormatterTest3 < T::Struct + include Baml::Sorbet::Struct + const :lorem, T.nilable(String) + const :ipsum, T.nilable(String) + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end class GroceryReceipt < T::Struct include Baml::Sorbet::Struct const :receiptId, T.nilable(String) diff --git a/integ-tests/ruby/baml_client/type-registry.rb b/integ-tests/ruby/baml_client/type-registry.rb index 3095f25e9..b9cdec85d 100644 --- a/integ-tests/ruby/baml_client/type-registry.rb +++ b/integ-tests/ruby/baml_client/type-registry.rb @@ -18,7 +18,7 @@ module Baml class TypeBuilder def initialize @registry = Baml::Ffi::TypeBuilder.new - @classes = Set[ "BigNumbers", "BinaryNode", "Blah", "BlockConstraint", "BlockConstraintForParam", "BookOrder", "ClassOptionalOutput", "ClassOptionalOutput2", "ClassToRecAlias", "ClassWithImage", "CompoundBigNumbers", "ContactInfo", "CustomTaskResult", "DummyOutput", "DynInputOutput", "DynamicClassOne", "DynamicClassTwo", "DynamicOutput", "Earthling", "Education", "Email", "EmailAddress", "Event", "FakeImage", "FlightConfirmation", "FooAny", "Forest", "GroceryReceipt", "InnerClass", "InnerClass2", "InputClass", "InputClassNested", "LinkedList", "LinkedListAliasNode", "LiteralClassHello", "LiteralClassOne", "LiteralClassTwo", "MalformedConstraints", "MalformedConstraints2", "Martian", "MergeAttrs", "NamedArgsSingleClass", "Nested", "Nested2", "NestedBlockConstraint", "NestedBlockConstraintForParam", "Node", "NodeWithAliasIndirection", "OptionalListAndMap", "OptionalTest_Prop1", "OptionalTest_ReturnType", "OrderInfo", "OriginalA", "OriginalB", "Person", "PhoneNumber", "Quantity", "RaysData", "ReceiptInfo", "ReceiptItem", "Recipe", "Resume", "Schema", "SearchParams", "SomeClassNestedDynamic", "StringToClassEntry", "TestClassAlias", "TestClassNested", "TestClassWithEnum", "TestOutputClass", "Tree", "TwoStoriesOneTitle", "UnionTest_ReturnType", "WithReasoning", ] + @classes = Set[ "BigNumbers", "BinaryNode", "Blah", "BlockConstraint", "BlockConstraintForParam", "BookOrder", "ClassOptionalOutput", "ClassOptionalOutput2", "ClassToRecAlias", "ClassWithImage", "CompoundBigNumbers", "ContactInfo", "CustomTaskResult", "DummyOutput", "DynInputOutput", "DynamicClassOne", "DynamicClassTwo", "DynamicOutput", "Earthling", "Education", "Email", "EmailAddress", "Event", "FakeImage", "FlightConfirmation", "FooAny", "Forest", "FormatterTest0", "FormatterTest1", "FormatterTest2", "FormatterTest3", "GroceryReceipt", "InnerClass", "InnerClass2", "InputClass", "InputClassNested", "LinkedList", "LinkedListAliasNode", "LiteralClassHello", "LiteralClassOne", "LiteralClassTwo", "MalformedConstraints", "MalformedConstraints2", "Martian", "MergeAttrs", "NamedArgsSingleClass", "Nested", "Nested2", "NestedBlockConstraint", "NestedBlockConstraintForParam", "Node", "NodeWithAliasIndirection", "OptionalListAndMap", "OptionalTest_Prop1", "OptionalTest_ReturnType", "OrderInfo", "OriginalA", "OriginalB", "Person", "PhoneNumber", "Quantity", "RaysData", "ReceiptInfo", "ReceiptItem", "Recipe", "Resume", "Schema", "SearchParams", "SomeClassNestedDynamic", "StringToClassEntry", "TestClassAlias", "TestClassNested", "TestClassWithEnum", "TestOutputClass", "Tree", "TwoStoriesOneTitle", "UnionTest_ReturnType", "WithReasoning", ] @enums = Set[ "AliasedEnum", "Category", "Category2", "Category3", "Color", "DataType", "DynEnumOne", "DynEnumTwo", "EnumInClass", "EnumOutput", "Hobby", "MapKey", "NamedArgsSingleEnum", "NamedArgsSingleEnumList", "OptionalTest_CategoryType", "OrderStatus", "Tag", "TestEnum", ] end diff --git a/integ-tests/ruby/baml_client/types.rb b/integ-tests/ruby/baml_client/types.rb index 320f35062..33c5e3734 100644 --- a/integ-tests/ruby/baml_client/types.rb +++ b/integ-tests/ruby/baml_client/types.rb @@ -172,6 +172,10 @@ class FakeImage < T::Struct; end class FlightConfirmation < T::Struct; end class FooAny < T::Struct; end class Forest < T::Struct; end + class FormatterTest0 < T::Struct; end + class FormatterTest1 < T::Struct; end + class FormatterTest2 < T::Struct; end + class FormatterTest3 < T::Struct; end class GroceryReceipt < T::Struct; end class InnerClass < T::Struct; end class InnerClass2 < T::Struct; end @@ -611,6 +615,62 @@ def initialize(props) @props = props end end + class FormatterTest0 < T::Struct + include Baml::Sorbet::Struct + const :lorem, String + const :ipsum, String + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end + class FormatterTest1 < T::Struct + include Baml::Sorbet::Struct + const :lorem, String + const :ipsum, String + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end + class FormatterTest2 < T::Struct + include Baml::Sorbet::Struct + const :lorem, String + const :ipsum, String + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end + class FormatterTest3 < T::Struct + include Baml::Sorbet::Struct + const :lorem, String + const :ipsum, String + + def initialize(props) + super( + lorem: props[:lorem], + ipsum: props[:ipsum], + ) + + @props = props + end + end class GroceryReceipt < T::Struct include Baml::Sorbet::Struct const :receiptId, String diff --git a/integ-tests/typescript/baml_client/async_client.ts b/integ-tests/typescript/baml_client/async_client.ts index 91b7ed5d1..17c98f3b9 100644 --- a/integ-tests/typescript/baml_client/async_client.ts +++ b/integ-tests/typescript/baml_client/async_client.ts @@ -17,7 +17,7 @@ $ pnpm add @boundaryml/baml // biome-ignore format: autogenerated code import { BamlRuntime, FunctionResult, BamlCtxManager, BamlStream, Image, ClientRegistry, BamlValidationError, createBamlValidationError } from "@boundaryml/baml" import { Checked, Check } from "./types" -import {BigNumbers, BinaryNode, Blah, BlockConstraint, BlockConstraintForParam, BookOrder, ClassOptionalOutput, ClassOptionalOutput2, ClassToRecAlias, ClassWithImage, CompoundBigNumbers, ContactInfo, CustomTaskResult, DummyOutput, DynInputOutput, DynamicClassOne, DynamicClassTwo, DynamicOutput, Earthling, Education, Email, EmailAddress, Event, FakeImage, FlightConfirmation, FooAny, Forest, GroceryReceipt, InnerClass, InnerClass2, InputClass, InputClassNested, LinkedList, LinkedListAliasNode, LiteralClassHello, LiteralClassOne, LiteralClassTwo, MalformedConstraints, MalformedConstraints2, Martian, MergeAttrs, NamedArgsSingleClass, Nested, Nested2, NestedBlockConstraint, NestedBlockConstraintForParam, Node, NodeWithAliasIndirection, OptionalListAndMap, OptionalTest_Prop1, OptionalTest_ReturnType, OrderInfo, OriginalA, OriginalB, Person, PhoneNumber, Quantity, RaysData, ReceiptInfo, ReceiptItem, Recipe, Resume, Schema, SearchParams, SomeClassNestedDynamic, StringToClassEntry, TestClassAlias, TestClassNested, TestClassWithEnum, TestOutputClass, Tree, TwoStoriesOneTitle, UnionTest_ReturnType, WithReasoning, AliasedEnum, Category, Category2, Category3, Color, DataType, DynEnumOne, DynEnumTwo, EnumInClass, EnumOutput, Hobby, MapKey, NamedArgsSingleEnum, NamedArgsSingleEnumList, OptionalTest_CategoryType, OrderStatus, Tag, TestEnum} from "./types" +import {BigNumbers, BinaryNode, Blah, BlockConstraint, BlockConstraintForParam, BookOrder, ClassOptionalOutput, ClassOptionalOutput2, ClassToRecAlias, ClassWithImage, CompoundBigNumbers, ContactInfo, CustomTaskResult, DummyOutput, DynInputOutput, DynamicClassOne, DynamicClassTwo, DynamicOutput, Earthling, Education, Email, EmailAddress, Event, FakeImage, FlightConfirmation, FooAny, Forest, FormatterTest0, FormatterTest1, FormatterTest2, FormatterTest3, GroceryReceipt, InnerClass, InnerClass2, InputClass, InputClassNested, LinkedList, LinkedListAliasNode, LiteralClassHello, LiteralClassOne, LiteralClassTwo, MalformedConstraints, MalformedConstraints2, Martian, MergeAttrs, NamedArgsSingleClass, Nested, Nested2, NestedBlockConstraint, NestedBlockConstraintForParam, Node, NodeWithAliasIndirection, OptionalListAndMap, OptionalTest_Prop1, OptionalTest_ReturnType, OrderInfo, OriginalA, OriginalB, Person, PhoneNumber, Quantity, RaysData, ReceiptInfo, ReceiptItem, Recipe, Resume, Schema, SearchParams, SomeClassNestedDynamic, StringToClassEntry, TestClassAlias, TestClassNested, TestClassWithEnum, TestOutputClass, Tree, TwoStoriesOneTitle, UnionTest_ReturnType, WithReasoning, AliasedEnum, Category, Category2, Category3, Color, DataType, DynEnumOne, DynEnumTwo, EnumInClass, EnumOutput, Hobby, MapKey, NamedArgsSingleEnum, NamedArgsSingleEnumList, OptionalTest_CategoryType, OrderStatus, Tag, TestEnum} from "./types" import TypeBuilder from "./type_builder" import { DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME } from "./globals" diff --git a/integ-tests/typescript/baml_client/inlinedbaml.ts b/integ-tests/typescript/baml_client/inlinedbaml.ts index 68c3cbd1a..d9ca6330a 100644 --- a/integ-tests/typescript/baml_client/inlinedbaml.ts +++ b/integ-tests/typescript/baml_client/inlinedbaml.ts @@ -26,6 +26,7 @@ const fileMap = { "fiddle-examples/extract-receipt-info.baml": "class ReceiptItem {\n name string\n description string?\n quantity int\n price float\n}\n\nclass ReceiptInfo {\n items ReceiptItem[]\n total_cost float?\n venue \"barisa\" | \"ox_burger\"\n}\n\nfunction ExtractReceiptInfo(email: string, reason: \"curiosity\" | \"personal_finance\") -> ReceiptInfo {\n client GPT4o\n prompt #\"\n Given the receipt below:\n\n ```\n {{email}}\n ```\n\n {{ ctx.output_format }}\n \"#\n}\n\n", "fiddle-examples/images/image.baml": "function DescribeImage(img: image) -> string {\n client GPT4o\n prompt #\"\n {{ _.role(\"user\") }}\n\n\n Describe the image below in 20 words:\n {{ img }}\n \"#\n\n}\n\nclass FakeImage {\n url string\n}\n\nclass ClassWithImage {\n myImage image\n param2 string\n fake_image FakeImage\n}\n\n// chat role user present\nfunction DescribeImage2(classWithImage: ClassWithImage, img2: image) -> string { \n client GPT4Turbo\n prompt #\"\n {{ _.role(\"user\") }}\n You should return 2 answers that answer the following commands.\n\n 1. Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n 2. Also tell me what's happening here in one sentence:\n {{ img2 }}\n \"#\n}\n\n// no chat role\nfunction DescribeImage3(classWithImage: ClassWithImage, img2: image) -> string {\n client GPT4Turbo\n prompt #\"\n Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n Tell me also what's happening here in one sentence and relate it to the word {{ classWithImage.param2 }}:\n {{ img2 }}\n \"#\n}\n\n\n// system prompt and chat prompt\nfunction DescribeImage4(classWithImage: ClassWithImage, img2: image) -> string {\n client GPT4Turbo\n prompt #\"\n {{ _.role(\"system\")}}\n\n Describe this in 5 words:\n {{ classWithImage.myImage }}\n\n Tell me also what's happening here in one sentence and relate it to the word {{ classWithImage.param2 }}:\n {{ img2 }}\n \"#\n}\n\ntest TestName {\n functions [DescribeImage]\n args {\n img { url \"https://imgs.xkcd.com/comics/standards.png\"}\n }\n}\n", "fiddle-examples/symbol-tuning.baml": "enum Category3 {\n Refund @alias(\"k1\")\n @description(\"Customer wants to refund a product\")\n\n CancelOrder @alias(\"k2\")\n @description(\"Customer wants to cancel an order\")\n\n TechnicalSupport @alias(\"k3\")\n @description(\"Customer needs help with a technical issue unrelated to account creation or login\")\n\n AccountIssue @alias(\"k4\")\n @description(\"Specifically relates to account-login or account-creation\")\n\n Question @alias(\"k5\")\n @description(\"Customer has a question\")\n}\n\nfunction ClassifyMessage3(input: string) -> Category {\n client GPT4\n\n prompt #\"\n Classify the following INPUT into ONE\n of the following categories:\n\n INPUT: {{ input }}\n\n {{ ctx.output_format }}\n\n Response:\n \"#\n}", + "formatter/test-comments.baml": "class FormatterTest0 {\n lorem string // trailing comments should be preserved\n ipsum string\n}\n\nclass FormatterTest1 {\n lorem string\n ipsum string\n // dolor string\n}\n\nclass FormatterTest2 {\n // \"lorem\" is a latin word\n lorem string\n // \"ipsum\" is a latin word\n ipsum string\n}\n\nclass FormatterTest3 {\n lorem string\n ipsum string\n // Lorem ipsum dolor sit amet\n // Consectetur adipiscing elit\n // Sed do eiusmod tempor incididunt\n // Ut labore et dolore magna aliqua\n // Ut enim ad minim veniam\n}", "generators.baml": "generator lang_python {\n output_type python/pydantic\n output_dir \"../python\"\n version \"0.71.1\"\n}\n\ngenerator lang_typescript {\n output_type typescript\n output_dir \"../typescript\"\n version \"0.71.1\"\n}\n\ngenerator lang_ruby {\n output_type ruby/sorbet\n output_dir \"../ruby\"\n version \"0.71.1\"\n}\n\n// generator openapi {\n// output_type rest/openapi\n// output_dir \"../openapi\"\n// version \"0.71.1\"\n// on_generate \"rm .gitignore\"\n// }\n", "test-files/aliases/aliased-inputs.baml": "\nclass InputClass {\n key string @alias(\"color\")\n key2 string\n}\n\n\nclass InputClassNested {\n key string\n nested InputClass @alias(\"interesting-key\")\n}\n \n\nfunction AliasedInputClass(input: InputClass) -> string {\n client GPT35\n prompt #\"\n\n {{input}}\n\n This is a test. What's the name of the first json key above? Remember, tell me the key, not value.\n \"#\n}\n \nfunction AliasedInputClass2(input: InputClass) -> string {\n client GPT35\n prompt #\"\n\n {# making sure we can still access the original key #}\n {%if input.key == \"tiger\"%}\n Repeat this value back to me, and nothing else: {{input.key}}\n {%endif%}\n \"#\n}\n \n function AliasedInputClassNested(input: InputClassNested) -> string {\n client GPT35\n prompt #\"\n {{ _.role(\"user\")}}\n\n {{input}}\n\n This is a test. What's the name of the second json key above? Remember, tell me the key, not value.\n \"#\n }\n\n\nenum AliasedEnum {\n KEY_ONE @alias(\"tiger\")\n KEY_TWO\n}\n\nfunction AliasedInputEnum(input: AliasedEnum) -> string {\n client GPT4o\n prompt #\"\n {{ _.role(\"user\")}}\n\n\n Write out this word only in your response, in lowercase:\n ---\n {{input}}\n ---\n Answer:\n \"#\n}\n\n\nfunction AliasedInputList(input: AliasedEnum[]) -> string {\n client GPT35\n prompt #\"\n {{ _.role(\"user\")}}\n Given this array:\n ---\n {{input}}\n ---\n\n Return the first element in the array:\n \"#\n}\n\n", "test-files/aliases/classes.baml": "class TestClassAlias {\n key string @alias(\"key-dash\") @description(#\"\n This is a description for key\n af asdf\n \"#)\n key2 string @alias(\"key21\")\n key3 string @alias(\"key with space\")\n key4 string //unaliased\n key5 string @alias(\"key.with.punctuation/123\")\n}\n\nfunction FnTestClassAlias(input: string) -> TestClassAlias {\n client GPT35\n prompt #\"\n {{ctx.output_format}}\n \"#\n}\n\ntest FnTestClassAlias {\n functions [FnTestClassAlias]\n args {\n input \"example input\"\n }\n}\n", diff --git a/integ-tests/typescript/baml_client/sync_client.ts b/integ-tests/typescript/baml_client/sync_client.ts index 67709fc70..ed6484a89 100644 --- a/integ-tests/typescript/baml_client/sync_client.ts +++ b/integ-tests/typescript/baml_client/sync_client.ts @@ -17,7 +17,7 @@ $ pnpm add @boundaryml/baml // biome-ignore format: autogenerated code import { BamlRuntime, FunctionResult, BamlCtxManager, BamlSyncStream, Image, ClientRegistry, createBamlValidationError, BamlValidationError } from "@boundaryml/baml" import { Checked, Check } from "./types" -import {BigNumbers, BinaryNode, Blah, BlockConstraint, BlockConstraintForParam, BookOrder, ClassOptionalOutput, ClassOptionalOutput2, ClassToRecAlias, ClassWithImage, CompoundBigNumbers, ContactInfo, CustomTaskResult, DummyOutput, DynInputOutput, DynamicClassOne, DynamicClassTwo, DynamicOutput, Earthling, Education, Email, EmailAddress, Event, FakeImage, FlightConfirmation, FooAny, Forest, GroceryReceipt, InnerClass, InnerClass2, InputClass, InputClassNested, LinkedList, LinkedListAliasNode, LiteralClassHello, LiteralClassOne, LiteralClassTwo, MalformedConstraints, MalformedConstraints2, Martian, MergeAttrs, NamedArgsSingleClass, Nested, Nested2, NestedBlockConstraint, NestedBlockConstraintForParam, Node, NodeWithAliasIndirection, OptionalListAndMap, OptionalTest_Prop1, OptionalTest_ReturnType, OrderInfo, OriginalA, OriginalB, Person, PhoneNumber, Quantity, RaysData, ReceiptInfo, ReceiptItem, Recipe, Resume, Schema, SearchParams, SomeClassNestedDynamic, StringToClassEntry, TestClassAlias, TestClassNested, TestClassWithEnum, TestOutputClass, Tree, TwoStoriesOneTitle, UnionTest_ReturnType, WithReasoning, AliasedEnum, Category, Category2, Category3, Color, DataType, DynEnumOne, DynEnumTwo, EnumInClass, EnumOutput, Hobby, MapKey, NamedArgsSingleEnum, NamedArgsSingleEnumList, OptionalTest_CategoryType, OrderStatus, Tag, TestEnum} from "./types" +import {BigNumbers, BinaryNode, Blah, BlockConstraint, BlockConstraintForParam, BookOrder, ClassOptionalOutput, ClassOptionalOutput2, ClassToRecAlias, ClassWithImage, CompoundBigNumbers, ContactInfo, CustomTaskResult, DummyOutput, DynInputOutput, DynamicClassOne, DynamicClassTwo, DynamicOutput, Earthling, Education, Email, EmailAddress, Event, FakeImage, FlightConfirmation, FooAny, Forest, FormatterTest0, FormatterTest1, FormatterTest2, FormatterTest3, GroceryReceipt, InnerClass, InnerClass2, InputClass, InputClassNested, LinkedList, LinkedListAliasNode, LiteralClassHello, LiteralClassOne, LiteralClassTwo, MalformedConstraints, MalformedConstraints2, Martian, MergeAttrs, NamedArgsSingleClass, Nested, Nested2, NestedBlockConstraint, NestedBlockConstraintForParam, Node, NodeWithAliasIndirection, OptionalListAndMap, OptionalTest_Prop1, OptionalTest_ReturnType, OrderInfo, OriginalA, OriginalB, Person, PhoneNumber, Quantity, RaysData, ReceiptInfo, ReceiptItem, Recipe, Resume, Schema, SearchParams, SomeClassNestedDynamic, StringToClassEntry, TestClassAlias, TestClassNested, TestClassWithEnum, TestOutputClass, Tree, TwoStoriesOneTitle, UnionTest_ReturnType, WithReasoning, AliasedEnum, Category, Category2, Category3, Color, DataType, DynEnumOne, DynEnumTwo, EnumInClass, EnumOutput, Hobby, MapKey, NamedArgsSingleEnum, NamedArgsSingleEnumList, OptionalTest_CategoryType, OrderStatus, Tag, TestEnum} from "./types" import TypeBuilder from "./type_builder" import { DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME } from "./globals" diff --git a/integ-tests/typescript/baml_client/type_builder.ts b/integ-tests/typescript/baml_client/type_builder.ts index 2d40ca5d8..56a984f5b 100644 --- a/integ-tests/typescript/baml_client/type_builder.ts +++ b/integ-tests/typescript/baml_client/type_builder.ts @@ -50,7 +50,7 @@ export default class TypeBuilder { constructor() { this.tb = new _TypeBuilder({ classes: new Set([ - "BigNumbers","BinaryNode","Blah","BlockConstraint","BlockConstraintForParam","BookOrder","ClassOptionalOutput","ClassOptionalOutput2","ClassToRecAlias","ClassWithImage","CompoundBigNumbers","ContactInfo","CustomTaskResult","DummyOutput","DynInputOutput","DynamicClassOne","DynamicClassTwo","DynamicOutput","Earthling","Education","Email","EmailAddress","Event","FakeImage","FlightConfirmation","FooAny","Forest","GroceryReceipt","InnerClass","InnerClass2","InputClass","InputClassNested","LinkedList","LinkedListAliasNode","LiteralClassHello","LiteralClassOne","LiteralClassTwo","MalformedConstraints","MalformedConstraints2","Martian","MergeAttrs","NamedArgsSingleClass","Nested","Nested2","NestedBlockConstraint","NestedBlockConstraintForParam","Node","NodeWithAliasIndirection","OptionalListAndMap","OptionalTest_Prop1","OptionalTest_ReturnType","OrderInfo","OriginalA","OriginalB","Person","PhoneNumber","Quantity","RaysData","ReceiptInfo","ReceiptItem","Recipe","Resume","Schema","SearchParams","SomeClassNestedDynamic","StringToClassEntry","TestClassAlias","TestClassNested","TestClassWithEnum","TestOutputClass","Tree","TwoStoriesOneTitle","UnionTest_ReturnType","WithReasoning", + "BigNumbers","BinaryNode","Blah","BlockConstraint","BlockConstraintForParam","BookOrder","ClassOptionalOutput","ClassOptionalOutput2","ClassToRecAlias","ClassWithImage","CompoundBigNumbers","ContactInfo","CustomTaskResult","DummyOutput","DynInputOutput","DynamicClassOne","DynamicClassTwo","DynamicOutput","Earthling","Education","Email","EmailAddress","Event","FakeImage","FlightConfirmation","FooAny","Forest","FormatterTest0","FormatterTest1","FormatterTest2","FormatterTest3","GroceryReceipt","InnerClass","InnerClass2","InputClass","InputClassNested","LinkedList","LinkedListAliasNode","LiteralClassHello","LiteralClassOne","LiteralClassTwo","MalformedConstraints","MalformedConstraints2","Martian","MergeAttrs","NamedArgsSingleClass","Nested","Nested2","NestedBlockConstraint","NestedBlockConstraintForParam","Node","NodeWithAliasIndirection","OptionalListAndMap","OptionalTest_Prop1","OptionalTest_ReturnType","OrderInfo","OriginalA","OriginalB","Person","PhoneNumber","Quantity","RaysData","ReceiptInfo","ReceiptItem","Recipe","Resume","Schema","SearchParams","SomeClassNestedDynamic","StringToClassEntry","TestClassAlias","TestClassNested","TestClassWithEnum","TestOutputClass","Tree","TwoStoriesOneTitle","UnionTest_ReturnType","WithReasoning", ]), enums: new Set([ "AliasedEnum","Category","Category2","Category3","Color","DataType","DynEnumOne","DynEnumTwo","EnumInClass","EnumOutput","Hobby","MapKey","NamedArgsSingleEnum","NamedArgsSingleEnumList","OptionalTest_CategoryType","OrderStatus","Tag","TestEnum", diff --git a/integ-tests/typescript/baml_client/types.ts b/integ-tests/typescript/baml_client/types.ts index fae4be644..ae8dfea30 100644 --- a/integ-tests/typescript/baml_client/types.ts +++ b/integ-tests/typescript/baml_client/types.ts @@ -330,6 +330,30 @@ export interface Forest { } +export interface FormatterTest0 { + lorem: string + ipsum: string + +} + +export interface FormatterTest1 { + lorem: string + ipsum: string + +} + +export interface FormatterTest2 { + lorem: string + ipsum: string + +} + +export interface FormatterTest3 { + lorem: string + ipsum: string + +} + export interface GroceryReceipt { receiptId: string storeName: string diff --git a/tools/build b/tools/build index 2402b8781..e92be8d43 100755 --- a/tools/build +++ b/tools/build @@ -166,6 +166,26 @@ case "$_path" in fi ;; + /integ-tests/baml_src | /integ-tests/baml_src/* ) + command="cd ${_repo_root}/engine" + command="${command} && git checkout -- ${_repo_root}/integ-tests/baml_src/formatter/test-comments.baml" + command="${command} && cargo run --bin baml-cli fmt ${_repo_root}/integ-tests/baml_src/formatter/test-comments.baml" + command="${command} && git difftool --extcmd icdiff -y -- ${_repo_root}/integ-tests/baml_src/formatter/test-comments.baml" + if [ "$_watch_mode" -eq 1 ]; then + npx nodemon \ + --ext py,pyi,rs,j2,toml \ + --watch "${_repo_root}/engine" \ + --watch . \ + --ignore 'baml_client/**' \ + --ignore 'target/**' \ + --ignore .pytest_cache \ + --exec "${command}" + else + eval "${command}" + date + fi + ;; + /integ-tests/python | /integ-tests/python/* ) command="true" command="env -u CONDA_PREFIX poetry run maturin develop --manifest-path ${_repo_root}/engine/language_client_python/Cargo.toml"