Skip to content

Commit

Permalink
chg: expression parsing optimisation
Browse files Browse the repository at this point in the history
- usage of lru_cache on `parse_expression` helps performance but it
  seems to be a diminishing return for cache sizes > 128, and the
  memory used by the cached strings and ExpressionLexerTokens can become
  significant if there are lots of long strings being parsed
- added option to get the parsed token type only, since calls through
  `is_single_token_expression` only care about the token type
- for token type checks, ignore empty strings or strings that would be
  to short to be that token type
  • Loading branch information
lindsay-stevens committed Nov 30, 2024
1 parent ff99279 commit 61c4df1
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions pyxform/parsing/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import lru_cache


def get_expression_lexer() -> re.Scanner:
def get_expression_lexer(name_only: bool = False) -> re.Scanner:
"""
Get a expression lexer (scanner) for parsing.
"""
Expand Down Expand Up @@ -61,7 +61,9 @@ def get_expression_lexer() -> re.Scanner:
}

def get_tokenizer(name):
def tokenizer(scan, value):
def tokenizer(scan, value) -> ExpLexerToken | str:
if name_only:
return name
return ExpLexerToken(name, value, scan.match.start(), scan.match.end())

return tokenizer
Expand All @@ -84,9 +86,10 @@ def __init__(self, name: str, value: str, start: int, end: int) -> None:


_EXPRESSION_LEXER = get_expression_lexer()
_TOKEN_NAME_LEXER = get_expression_lexer(name_only=True)


@lru_cache(maxsize=1024)
@lru_cache(maxsize=128)
def parse_expression(text: str) -> tuple[list[ExpLexerToken], str]:
"""
Parse an expression.
Expand All @@ -104,8 +107,10 @@ def is_single_token_expression(expression: str, token_types: Iterable[str]) -> b
"""
Does the expression contain single token of one of the provided token types?
"""
tokens, _ = parse_expression(expression.strip())
if 1 == len(tokens) and tokens[0].name in token_types:
if not expression:
return False
tokens, _ = _TOKEN_NAME_LEXER.scan(expression.strip())
if 1 == len(tokens) and tokens[0] in token_types:
return True
else:
return False
Expand All @@ -115,11 +120,15 @@ def is_pyxform_reference(value: str) -> bool:
"""
Does the input string contain only a valid Pyxform reference? e.g. ${my_question}
"""
if not value or len(value) <= 3: # Needs 3 characters for "${}", plus a name inside.
return False
return is_single_token_expression(expression=value, token_types=("PYXFORM_REF",))


def is_xml_tag(value: str) -> bool:
"""
Does the input string contain only a valid XML tag / element name?
"""
if not value:
return False
return is_single_token_expression(expression=value, token_types=("NAME",))

0 comments on commit 61c4df1

Please sign in to comment.