Skip to content

Commit

Permalink
support sigle line quoteless json parsing (#1170)
Browse files Browse the repository at this point in the history
Prior: minified json didn't parse, now we support minified json for
numeric boolean and null values.

We could improve it further for literals.

<!-- ELLIPSIS_HIDDEN -->



> [!IMPORTANT]
> Add support for parsing single-line JSON without quotes for numbers,
booleans, and null in `JsonParseState`, with new test cases to validate
functionality.
> 
>   - **Behavior**:
> - Support parsing single-line JSON without quotes for numbers,
booleans, and null in `JsonParseState`.
> - Handles cases where values are followed by a comma and space,
ensuring correct parsing.
>   - **Tests**:
> - Add
`test_recursive_union_on_multiple_fields_single_line_without_quotes` and
`test_recursive_single_line` in `test_class.rs` to validate new parsing
behavior.
> - Add
`test_recursive_union_on_multiple_fields_single_line_without_quotes_complex`
to test complex nested structures without quotes.
> 
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral)<sup>
for 9aa485c. It will automatically
update as commits are pushed.</sup>

<!-- ELLIPSIS_HIDDEN -->
  • Loading branch information
hellovai authored Nov 14, 2024
1 parent 68806e3 commit b1b9cab
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,20 @@ impl JsonParseState {
counter = idx;
match c {
',' => {
// Check if we have just numeric values in the string so far.
let Some((JsonCollection::UnquotedString(current_value), _)) =
self.collection_stack.last()
else {
return Some(idx);
};

// current value could be a numeric looking things.
let is_numeric = current_value.trim().parse::<f64>().is_ok();
let is_bool = current_value.trim().eq_ignore_ascii_case("true")
|| current_value.trim().eq_ignore_ascii_case("false");
let is_null = current_value.trim().eq_ignore_ascii_case("null");
let is_possible_value = is_numeric || is_bool || is_null;

if let Some((_, next_c)) = next.peek() {
match next_c {
'\n' => {
Expand All @@ -181,6 +195,9 @@ impl JsonParseState {
}
' ' => {
log::debug!("Testing for comment after space + comma");
if is_possible_value {
return Some(idx);
}
// If after the space we have "//" or "/*" or the beginning of a key, we'll close the string
let mut buffer = ",".to_string();
let mut anything_but_whitespace = false;
Expand All @@ -193,7 +210,7 @@ impl JsonParseState {
'\n' => {
if anything_but_whitespace {
} else {
// Likely end of the key as the LLM generated a (', ' token by mistake)
// Likely end of the key as the LLM generated a ", " token by mistake instead of a ","
// so drop the comma
log::debug!("Closing due to: newline after comma + space");
return Some(idx);
Expand Down
114 changes: 105 additions & 9 deletions engine/baml-lib/jsonish/src/tests/test_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ test_deserializer!(
);

test_deserializer!(
test_same_recursive_union_on_multiple_fields,
test_recursive_union_on_multiple_fields_single_line,
r#"class Foo {
rec_one Foo | int
rec_two Foo | int
Expand All @@ -1357,19 +1357,49 @@ test_deserializer!(
r#"
The answer is
{
"rec_one": { "rec_one": 1, "rec_two": 2 },
"rec_two": {
"rec_one": { "rec_one": 1, "rec_two": 2 },
"rec_two": { "rec_one": 1, "rec_two": 2 }
}
},
Anything else I can help with?
"#,
FieldType::Class("Foo".to_string()),
{
"rec_one": {
"rec_one": 1,
"rec_two": 2
},
"rec_two": {
"rec_one": {
"rec_one": 1,
"rec_two": 2
},
"rec_two": {
"rec_one": {
"rec_one": 1,
"rec_two": 2
},
"rec_two": {
"rec_one": 1,
"rec_two": 2
}
"rec_one": 1,
"rec_two": 2
}
},
}
);


test_deserializer!(
test_recursive_union_on_multiple_fields_single_line_without_quotes,
r#"class Foo {
rec_one Foo | int
rec_two Foo | int
}
"#,
r#"
The answer is
{
rec_one: { rec_one: 1, rec_two: 2 },
rec_two: {
rec_one: { rec_one: 1, rec_two: 2 },
rec_two: { rec_one: 1, rec_two: 2 }
}
},
Expand All @@ -1393,3 +1423,69 @@ test_deserializer!(
},
}
);


test_deserializer!(
test_recursive_single_line,
r#"class Foo {
rec_one Foo | int | bool
rec_two Foo | int | bool
}
"#,
r#"
The answer is
{ rec_one: true, rec_two: false },
Anything else I can help with?
"#,
FieldType::Class("Foo".to_string()),
{
"rec_one": true,
"rec_two": false
}
);


test_deserializer!(
test_recursive_union_on_multiple_fields_single_line_without_quotes_complex,
r#"class Foo {
rec_one Foo | int | bool
rec_two Foo | int | bool | null
}
"#,
r#"
The answer is
{
rec_one: { rec_one: { rec_one: true, rec_two: false }, rec_two: null },
rec_two: {
rec_one: { rec_one: { rec_one: 1, rec_two: 2 }, rec_two: null },
rec_two: { rec_one: 1, rec_two: null }
}
},
Anything else I can help with?
"#,
FieldType::Class("Foo".to_string()),
{
"rec_one": {
"rec_one": {
"rec_one": true,
"rec_two": false
},
"rec_two": null
},
"rec_two": {
"rec_one": {
"rec_one": {
"rec_one": 1,
"rec_two": 2
},
"rec_two": null
},
"rec_two": {
"rec_one": 1,
"rec_two": null
}
},
}
);

0 comments on commit b1b9cab

Please sign in to comment.