diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 0000000..f298a08 --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 794f69d2daab1a72179a29ae4bd05019 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/_downloads/1a4f80c19cefb4f346c42c68da6fff8e/example.py b/_downloads/1a4f80c19cefb4f346c42c68da6fff8e/example.py new file mode 100644 index 0000000..173625c --- /dev/null +++ b/_downloads/1a4f80c19cefb4f346c42c68da6fff8e/example.py @@ -0,0 +1,28 @@ +import logging +from pathlib import Path +from pprint import pprint + +from textmate_grammar.grammars import matlab +from textmate_grammar.language import LanguageParser +from textmate_grammar.utils.cache import init_cache + +# Initialize shelved cache +init_cache("shelve") + +# Enable debug logging +logging.getLogger().setLevel(logging.DEBUG) +logging.getLogger("textmate_grammar").setLevel(logging.INFO) + +# Initialize language parser +parser = LanguageParser(matlab.GRAMMAR) + +# Parse file +filePath = Path(__file__).parent / "syntaxes" / "matlab" / "AnEnum.m" +element = parser.parse_file(filePath) + +# Print element +element.print() + +# Find all enum members +enum_members = element.findall('variable.other.enummember.matlab') +pprint(enum_members) \ No newline at end of file diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 0000000..3969c24 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,270 @@ + + +
+ + + + +
+from __future__ import annotations
+
+from abc import ABC
+from collections import defaultdict
+from itertools import groupby
+from pprint import pprint
+from typing import TYPE_CHECKING, Generator
+
+from .utils.handler import POS, ContentHandler, Match, Pattern
+from .utils.logger import LOGGER
+
+if TYPE_CHECKING:
+ from .parser import GrammarParser
+
+
+TOKEN_DICT = dict[POS, list[str]]
+
+
+
+[docs]
+class Capture:
+ """A captured matching group.
+
+ After mathing, any pattern can have a number of capture groups for which subsequent parsers can be defined.
+ The Capture object stores this subsequent parse to be dispatched at a later moment.
+ """
+
+ def __init__(
+ self,
+ handler: ContentHandler,
+ pattern: Pattern,
+ matching: Match,
+ parsers: dict[int, GrammarParser],
+ starting: tuple[int, int],
+ boundary: tuple[int, int],
+ key: str = "",
+ **kwargs,
+ ):
+ """
+ Initialize a new instance of the Element class.
+
+ :param handler: The content handler for the element.
+ :param pattern: The pattern used for matching.
+ :param matching: The match object.
+ :param parsers: A dictionary of grammar parsers.
+ :param starting: The starting position of the element.
+ :param boundary: The boundary position of the element.
+ :param key: The key for the element. Defaults to "".
+ :param **kwargs: Additional keyword arguments.
+ :returns: None
+ """
+ self.handler = handler
+ self.pattern = pattern
+ self.matching = matching
+ self.parsers = parsers
+ self.starting = starting
+ self.boundary = boundary
+ self.key = key
+ self.kwargs = kwargs
+
+ def __eq__(self, other: object) -> bool:
+ if isinstance(other, Capture):
+ return bool(
+ self.key == other.key
+ and self.starting == other.starting
+ and self.matching.group() == other.matching.group()
+ )
+ else:
+ return False
+
+ def __repr__(self) -> str:
+ return f"@capture<{self.key}>"
+
+
+[docs]
+ def dispatch(self) -> list[Capture | ContentElement]:
+ """Dispatches the remaining parse of the capture group.
+
+ This method iterates over the defined parsers for the capture group and dispatches the remaining parse
+ based on the captured elements. It returns a list of captured elements or captures.
+
+ :return: A list of Capture or ContentElement objects representing the parsed elements.
+ """
+ elements = []
+ for group_id, parser in self.parsers.items():
+ if group_id > self.pattern.number_of_captures():
+ LOGGER.warning(
+ f"The capture group {group_id} does not exist in pattern {self.pattern._pattern}"
+ )
+ continue
+
+ group_span = self.matching.span(group_id)
+
+ # Empty group
+ if group_span[0] == group_span[1]:
+ continue
+
+ group_starting = (self.starting[0], group_span[0])
+ group_boundary = (self.starting[0], group_span[1])
+
+ if (
+ parser == self
+ and group_starting == self.starting
+ and group_boundary == self.boundary
+ ):
+ LOGGER.warning("Parser loop detected, continuing...", self, self.starting)
+ continue
+
+ # Dispatch the parse
+ self.kwargs.pop("greedy", None)
+ parsed, captured_elements, _ = parser._parse(
+ self.handler,
+ starting=group_starting,
+ boundary=group_boundary,
+ find_one=self.kwargs.pop("find_one", False),
+ parent_capture=self,
+ **self.kwargs,
+ )
+
+ if parsed:
+ elements.extend(captured_elements)
+
+ return elements
+
+
+
+
+def _dispatch_list(
+ pending_elements: list[Capture | ContentElement], parent: ContentElement | None = None
+) -> list[ContentElement]:
+ """Dispatches all captured parsers in the list."""
+ elements = []
+ for item in pending_elements:
+ if isinstance(item, Capture):
+ captured_elements: list[ContentElement] = _dispatch_list(item.dispatch())
+ elements.extend(captured_elements)
+ elif item != parent:
+ elements.append(item)
+ for element in elements:
+ element.parent = parent
+ return elements
+
+
+def _str_to_list(input: str | list[str]) -> list[str]:
+ if isinstance(input, str):
+ return [input] if input else []
+ else:
+ return input
+
+
+
+[docs]
+class ContentElement:
+ """The parsed grammar element."""
+
+ def __init__(
+ self,
+ token: str,
+ grammar: dict,
+ content: str,
+ characters: dict[POS, str],
+ children: list[Capture | ContentElement] | None = None,
+ ) -> None:
+ """
+ Initialize a new instance of the Element class.
+
+ :param token: The token associated with the element.
+ :param grammar: The grammar associated with the element.
+ :param content: The content associated with the element.
+ :param characters: The characters associated with the element.
+ :param children: The children associated with the element. Defaults to None.
+ """
+ if children is None:
+ children = []
+ self.token = token
+ self.grammar = grammar
+ self.content = content
+ self.characters = characters
+ self._children_captures = children
+ self._dispatched: bool = False
+ self.parent: ContentElement | None = None
+
+ @property
+ def _subelements(self) -> list[ContentElement]:
+ return self.children
+
+ @property
+ def children(self) -> list[ContentElement]:
+ """
+ Returns a list of children elements.
+
+ If the elements have not been dispatched yet, this method will dispatch them before returning.
+
+ :return: A list of ContentElement objects representing the children elements.
+ """
+ if not self._dispatched:
+ self._dispatch()
+ return self._children
+
+ def _dispatch(self, nested: bool = False):
+ """
+ Dispatches the content element and its children.
+
+ :param nested: Indicates whether the dispatch is nested within another dispatch.
+ :type nested: bool
+ :return: None
+ """
+ if self._dispatched:
+ return
+ self._dispatched = True
+ self._children: list[ContentElement] = _dispatch_list(self._children_captures, parent=self)
+ self._children_captures = []
+ if nested:
+ for child in self._children:
+ child._dispatch(True)
+
+ def __eq__(self, other):
+ if not isinstance(other, ContentElement):
+ return False
+ return bool(self.grammar == other.grammar and self.characters == other.characters)
+
+ def _find(
+ self,
+ tokens: str | list[str],
+ start_tokens: str | list[str] = "",
+ hide_tokens: str | list[str] = "",
+ stop_tokens: str | list[str] = "",
+ depth: int = -1,
+ attribute: str = "_subelements",
+ stack: list[str] | None = None,
+ ) -> Generator[tuple[ContentElement, list[str]], None, None]:
+ tokens = _str_to_list(tokens)
+ start_tokens = _str_to_list(start_tokens)
+ hide_tokens = _str_to_list(hide_tokens)
+ stop_tokens = _str_to_list(stop_tokens)
+
+ if not set(tokens).isdisjoint(set(stop_tokens)):
+ raise ValueError("Input tokens and stop_tokens must be disjoint")
+
+ if stack is None:
+ stack = []
+ stack += [self.token]
+
+ start_found = not start_tokens
+
+ if depth:
+ depth -= 1
+ children: list[ContentElement] = getattr(self, attribute, self._subelements)
+ for child in children:
+ if stop_tokens and (
+ child.token in stop_tokens
+ or (stop_tokens == ["*"] and child.token not in tokens)
+ ):
+ return None
+
+ if not start_found and child.token in start_tokens:
+ start_found = True
+ start_tokens = []
+
+ if (
+ start_found
+ and (child.token in tokens or tokens == ["*"])
+ and child.token not in hide_tokens
+ ):
+ yield child, [e for e in stack]
+ if depth:
+ nested_generator = child._find(
+ tokens,
+ start_tokens=start_tokens,
+ hide_tokens=hide_tokens,
+ stop_tokens=stop_tokens,
+ depth=depth - 1,
+ stack=[e for e in stack],
+ )
+ yield from nested_generator
+ return None
+
+
+[docs]
+ def find(
+ self,
+ tokens: str | list[str],
+ start_tokens: str | list[str] = "",
+ hide_tokens: str | list[str] = "",
+ stop_tokens: str | list[str] = "",
+ depth: int = -1,
+ attribute: str = "_subelements",
+ ) -> Generator[tuple[ContentElement, list[str]], None, None]:
+ """
+ Find content elements based on the given criteria.
+
+ The find method will return a generator that globs though the element-tree, searching for the next
+ subelement that matches the given token.
+
+ :param tokens: The tokens to search for. Can be a single token or a list of tokens.
+ :param start_tokens: The tokens that mark the start of the search. Can be a single token or a list of tokens.
+ :param hide_tokens: The tokens to hide from the search results. Can be a single token or a list of tokens.
+ :param stop_tokens: The tokens that mark the end of the search. Can be a single token or a list of tokens.
+ :param depth: The maximum depth to search. Defaults to -1 (unlimited depth).
+ :param attribute: The attribute name to access the subelements. Defaults to "_subelements".
+
+ :yield: A tuple containing the found content element and the stack of tokens encountered.
+
+ :raises ValueError: If the input tokens and stop_tokens are not disjoint.
+
+ :return: None if no matching content elements are found.
+ """
+ return self._find(
+ tokens,
+ start_tokens=start_tokens,
+ hide_tokens=hide_tokens,
+ stop_tokens=stop_tokens,
+ depth=depth,
+ attribute=attribute,
+ )
+
+
+
+[docs]
+ def findall(
+ self,
+ tokens: str | list[str],
+ start_tokens: str | list[str] = "",
+ hide_tokens: str | list[str] = "",
+ stop_tokens: str | list[str] = "",
+ depth: int = -1,
+ attribute: str = "_subelements",
+ ) -> list[tuple[ContentElement, list[str]]]:
+ """
+ Find all occurrences of the specified tokens within the content element.
+
+ :param tokens: The tokens to search for.
+ :param start_tokens: The tokens that must appear before the found tokens. Defaults to "".
+ :param hide_tokens: The tokens that should be hidden from the search. Defaults to "".
+ :param stop_tokens: The tokens that, if found, should stop the search. Defaults to "".
+ :param depth: The maximum depth to search. Defaults to -1 (unlimited depth).
+ :param attribute: The attribute to search within. Defaults to "_subelements".
+
+ :return: A list of tuples containing the content element and the found tokens.
+ """
+ return list(
+ self._find(
+ tokens,
+ start_tokens=start_tokens,
+ hide_tokens=hide_tokens,
+ stop_tokens=stop_tokens,
+ depth=depth,
+ attribute=attribute,
+ )
+ )
+
+
+
+[docs]
+ def to_dict(self, depth: int = -1, all_content: bool = False, **kwargs) -> dict:
+ """
+ Converts the object to a dictionary.
+
+ :param depth: The depth of the conversion. Defaults to -1.
+ :param all_content: Whether to include all content or only the top-level content. Defaults to False.
+
+ :return: The converted dictionary representation of the object.
+ """
+ out_dict = {"token": self.token}
+ if all_content or not self.children:
+ out_dict["content"] = self.content
+ if self.children:
+ out_dict["children"] = (
+ self._list_property_to_dict("children", depth=depth - 1, all_content=all_content)
+ if depth
+ else self.children
+ )
+ return out_dict
+
+
+
+[docs]
+ def flatten(self) -> list[tuple[tuple[int, int], str, list[str]]]:
+ """
+ Converts the object to a flattened array of tokens per index, similarly to vscode-textmate.
+
+ :return: A list of tuples representing the flattened tokens. Each tuple contains:
+ - A tuple representing the starting and ending index of the token.
+ - The content of the token.
+ - A list of keys associated with the token.
+ """
+ token_dict = self._token_by_index(defaultdict(list))
+ tokens = []
+ for (_, key), group in groupby(sorted(token_dict.items()), lambda x: (x[0][0], x[1])):
+ group_tokens = list(group)
+ starting = group_tokens[0][0]
+ content = ""
+ for pos, _ in group_tokens:
+ content += self.characters[pos]
+ if content:
+ tokens.append((starting, content, key))
+ return tokens
+
+
+
+[docs]
+ def print(
+ self,
+ flatten: bool = False,
+ depth: int = -1,
+ all_content: bool = False,
+ **kwargs,
+ ) -> None:
+ """
+ Prints the current object recursively by converting it to a dictionary or a flattened array.
+
+ :param flatten: If True, flattens the object before printing. Defaults to False.
+ :param depth: The maximum depth to print. Defaults to -1 (unlimited depth).
+ :param all_content: If True, includes all content in the printout. Defaults to False.
+ :param **kwargs: Additional keyword arguments to be passed to the pprint function.
+
+ :return: None
+ """
+ if flatten:
+ pprint(
+ self.flatten(**kwargs),
+ sort_dicts=False,
+ width=kwargs.pop("width", 150),
+ **kwargs,
+ )
+ else:
+ pprint(
+ self.to_dict(depth=depth, all_content=all_content, **kwargs),
+ sort_dicts=False,
+ width=kwargs.pop("width", 150),
+ **kwargs,
+ )
+
+
+ def _token_by_index(self, token_dict: TOKEN_DICT | None = None) -> TOKEN_DICT:
+ """Recursively tokenize every index between start and close.
+
+ This method recursively tokenizes every index between the start and close positions of the element.
+ It populates a dictionary, `token_dict`, with the tokens corresponding to each index.
+
+ :param token_dict: A dictionary to store the tokens. If None, a new dictionary is created.
+ :type token_dict: dict | None
+ :return: A dictionary containing the tokens for each index.
+ :rtype: dict
+ """
+ if token_dict is None:
+ token_dict = defaultdict(list)
+ for pos in self.characters:
+ token_dict[pos].append(self.token)
+
+ # Tokenize child elements
+ for element in self.children:
+ element._token_by_index(token_dict)
+ return token_dict
+
+ def _list_property_to_dict(self, prop: str, **kwargs):
+ """Makes a dictionary from a property."""
+ return [
+ item.to_dict(**kwargs) if isinstance(item, ContentElement) else item
+ for item in getattr(self, prop, [])
+ ]
+
+ def __repr__(self) -> str:
+ content = self.content if len(self.content) < 15 else self.content[:15] + "..."
+ return repr(f"{self.token}<<{content}>>({len(self.children)})")
+
+
+
+
+[docs]
+class ContentBlockElement(ContentElement):
+ """A parsed element with a begin and a end"""
+
+ def __init__(
+ self,
+ *args,
+ begin: list[Capture | ContentElement] | None = None,
+ end: list[Capture | ContentElement] | None = None,
+ **kwargs,
+ ) -> None:
+ """
+ Initialize a new instance of the Element class.
+
+ :param begin: A list of Capture or ContentElement objects representing the beginning captures of the element. Defaults to None.
+ :param end: A list of Capture or ContentElement objects representing the ending captures of the element. Defaults to None.
+ :param **kwargs: Additional keyword arguments to be passed to the parent class constructor.
+
+ :return: None
+ """
+ if end is None:
+ end = []
+ if begin is None:
+ begin = []
+ super().__init__(*args, **kwargs)
+ self._begin_captures = begin
+ self._end_captures = end
+
+ @property
+ def _subelements(self) -> list[ContentElement]:
+ return self.begin + self.children + self.end
+
+ @property
+ def begin(self) -> list[ContentElement]:
+ """
+ Returns the list of begin elements.
+
+ If the elements have not been dispatched yet, this method will dispatch them before returning.
+
+ :return: The list of begin elements.
+ """
+ if not self._dispatched:
+ self._dispatch()
+ return self._begin
+
+ @property
+ def end(self) -> list[ContentElement]:
+ """
+ Returns the end elements.
+
+ If the elements have not been dispatched yet, this method will dispatch them before returning.
+
+ :return: A list of end elements.
+ """
+ if not self._dispatched:
+ self._dispatch()
+ return self._end
+
+ def _dispatch(self, nested: bool = False):
+ if self._dispatched:
+ return
+ super()._dispatch(nested)
+ self._begin: list[ContentElement] = _dispatch_list(self._begin_captures, parent=self)
+ self._end: list[ContentElement] = _dispatch_list(self._end_captures, parent=self)
+ self._begin_captures, self._end_captures = [], []
+ if nested:
+ for item in self._begin:
+ item._dispatch(True)
+ for item in self._end:
+ item._dispatch(True)
+
+
+[docs]
+ def to_dict(self, depth: int = -1, all_content: bool = False, **kwargs) -> dict:
+ """
+ Converts the element to a dictionary representation.
+
+ :param depth: The depth of the conversion. Defaults to -1.
+ :param all_content: Whether to include all content. Defaults to False.
+ :param **kwargs: Additional keyword arguments.
+
+ :return: The dictionary representation of the element.
+ """
+ out_dict = super().to_dict(depth=depth, all_content=all_content, **kwargs)
+ if self.begin:
+ out_dict["begin"] = (
+ self._list_property_to_dict("begin", depth=depth - 1, **kwargs)
+ if depth
+ else self.begin
+ )
+ if self.end:
+ out_dict["end"] = (
+ self._list_property_to_dict("end", depth=depth - 1, **kwargs) if depth else self.end
+ )
+
+ ordered_keys = [
+ key for key in ["token", "begin", "end", "content", "children"] if key in out_dict
+ ]
+ ordered_dict = {key: out_dict[key] for key in ordered_keys}
+ return ordered_dict
+
+
+ def _token_by_index(self, token_dict: TOKEN_DICT | None = None) -> TOKEN_DICT:
+ """Converts the object to a flattened array of tokens."""
+ if token_dict is None:
+ token_dict = defaultdict(list)
+ super()._token_by_index(token_dict)
+ for element in self.begin:
+ element._token_by_index(token_dict)
+ for element in self.end:
+ element._token_by_index(token_dict)
+ return token_dict
+
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from .elements import Capture, ContentElement
+from .parser import GrammarParser, PatternsParser
+from .utils.cache import TextmateCache, init_cache
+from .utils.exceptions import IncompatibleFileType
+from .utils.handler import POS, ContentHandler
+from .utils.logger import LOGGER
+
+LANGUAGE_PARSERS = {}
+
+
+
+[docs]
+class DummyParser(GrammarParser):
+ """A dummy parser object"""
+
+ def __init__(self):
+ self.key = "DummyLanguage"
+ self.initialized = True
+
+ def _initialize_repository(self):
+ pass
+
+ def _parse(self, *args, **kwargs):
+ pass
+
+
+
+
+[docs]
+class LanguageParser(PatternsParser):
+ """The parser of a language grammar."""
+
+ def __init__(self, grammar: dict, **kwargs):
+ """
+ Initialize a Language object.
+
+ :param grammar: The grammar definition for the language.
+ :param kwargs: Additional keyword arguments.
+
+ :ivar name: The name of the language.
+ :ivar uuid: The UUID of the language.
+ :ivar file_types: The file types associated with the language.
+ :ivar token: The scope name of the language.
+ :ivar repository: The repository of grammar rules for the language.
+ :ivar injections: The list of injection rules for the language.
+ :ivar _cache: The cache object for the language.
+ """
+ super().__init__(grammar, key=grammar.get("name", "myLanguage"), language=self, **kwargs)
+
+ self.name = grammar.get("name", "")
+ self.uuid = grammar.get("uuid", "")
+ self.file_types = grammar.get("fileTypes", [])
+ self.token = grammar.get("scopeName", "myScope")
+ self.repository = {}
+ self.injections: list[dict] = []
+ self._cache: TextmateCache = init_cache()
+
+ # Initialize grammars in repository
+ for repo in _gen_repositories(grammar):
+ for key, parser_grammar in repo.items():
+ self.repository[key] = GrammarParser.initialize(
+ parser_grammar, key=key, language=self
+ )
+
+ # Update language parser store
+ language_name = grammar.get("scopeName", "myLanguage")
+ LANGUAGE_PARSERS[language_name] = self
+
+ self._initialize_repository()
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}:{self.key}"
+
+ @staticmethod
+ def _find_include_scopes(key: str):
+ return LANGUAGE_PARSERS.get(key, DummyParser())
+
+ def _initialize_repository(self):
+ """When the grammar has patterns, this method should called to initialize its inclusions."""
+
+ # Initialize injections
+ injections = self.grammar.get("injections", {})
+ for key, injected_grammar in injections.items():
+ target_string = key[: key.index("-")].strip()
+ if not target_string:
+ target_string = self.grammar.get("scopeName", "myLanguage")
+ target_language = LANGUAGE_PARSERS[target_string]
+
+ injected_parser = GrammarParser.initialize(
+ injected_grammar,
+ key=f"{target_string}.injection",
+ language=target_language,
+ )
+ injected_parser._initialize_repository()
+
+ scope_string = key[key.index("-") :]
+ exception_scopes = [s.strip() for s in scope_string.split("-") if s.strip()]
+ target_language.injections.append([exception_scopes, injected_parser])
+
+ super()._initialize_repository()
+
+
+[docs]
+ def parse_file(self, filePath: str | Path, **kwargs) -> ContentElement | None:
+ """
+ Parses an entire file with the current grammar.
+
+ :param filePath: The path to the file to be parsed.
+ :param kwargs: Additional keyword arguments to be passed to the parser.
+ :return: The parsed element if successful, None otherwise.
+ """
+ if not isinstance(filePath, Path):
+ filePath = Path(filePath).resolve()
+
+ if filePath.suffix.split(".")[-1] not in self.file_types:
+ raise IncompatibleFileType(extensions=self.file_types)
+
+ if self._cache.cache_valid(filePath):
+ element = self._cache.load(filePath)
+ else:
+ handler = ContentHandler.from_path(filePath)
+ if handler.source == "":
+ return None
+
+ # Configure logger
+ LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths))
+ element = self._parse_language(handler, **kwargs) # type: ignore
+
+ if element is not None:
+ self._cache.save(filePath, element)
+ return element
+
+
+
+[docs]
+ def parse_string(self, input: str, **kwargs) -> ContentElement | None:
+ """
+ Parses an input string.
+
+ :param input: The input string to be parsed.
+ :param kwargs: Additional keyword arguments.
+ :return: The result of parsing the input string.
+ """
+ handler = ContentHandler(input)
+ # Configure logger
+ LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths))
+
+ element = self._parse_language(handler, **kwargs)
+
+ return element
+
+
+ def _parse_language(self, handler: ContentHandler, **kwargs) -> ContentElement | None:
+ """Parses the current stream with the language scope."""
+
+ parsed, elements, _ = self.parse(handler, (0, 0), **kwargs)
+
+ if parsed:
+ element = elements[0]
+ element._dispatch(nested=True) # type: ignore
+ else:
+ element = None
+ return element # type: ignore
+
+ def _parse(
+ self, handler: ContentHandler, starting: POS, **kwargs
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int]]:
+ kwargs.pop("find_one", None)
+ return super()._parse(handler, starting, find_one=False, **kwargs)
+
+
+
+def _gen_repositories(grammar, key="repository"):
+ """Recursively gets all repositories from a grammar dictionary"""
+ if hasattr(grammar, "items"):
+ for k, v in grammar.items():
+ if k == key:
+ yield v
+ if isinstance(v, dict):
+ for result in _gen_repositories(v, key):
+ yield result
+ elif isinstance(v, list):
+ for d in v:
+ for result in _gen_repositories(d, key):
+ yield result
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+import onigurumacffi as re
+
+from .elements import Capture, ContentBlockElement, ContentElement
+from .utils.exceptions import IncludedParserNotFound
+from .utils.handler import POS, ContentHandler, Pattern
+from .utils.logger import LOGGER, track_depth
+
+if TYPE_CHECKING:
+ from .language import LanguageParser
+
+
+
+[docs]
+class GrammarParser(ABC):
+ """The abstract grammar parser object"""
+
+
+[docs]
+ @staticmethod
+ def initialize(grammar: dict, **kwargs):
+ """
+ Initializes the parser based on the grammar.
+
+ :param grammar: The grammar to initialize the parser with.
+ :param kwargs: Additional keyword arguments.
+ :return: The initialized parser.
+ """
+ if "include" in grammar:
+ return grammar["include"]
+ elif "match" in grammar:
+ return MatchParser(grammar, **kwargs)
+ elif "begin" in grammar and "end" in grammar:
+ return BeginEndParser(grammar, **kwargs)
+ elif "begin" in grammar and "while" in grammar:
+ return BeginWhileParser(grammar, **kwargs)
+ elif "patterns" in grammar:
+ return PatternsParser(grammar, **kwargs)
+ else:
+ return TokenParser(grammar, **kwargs)
+
+
+ def __init__(
+ self,
+ grammar: dict,
+ language: LanguageParser | None = None,
+ key: str = "",
+ is_capture: bool = False,
+ **kwargs,
+ ) -> None:
+ """
+ Initialize a Parser object.
+
+ :param grammar: The grammar dictionary.
+ :param language: The language parser object. Defaults to None.
+ :param key: The key for the parser. Defaults to "".
+ :param is_capture: Indicates if the parser is a capture. Defaults to False.
+ :param kwargs: Additional keyword arguments.
+
+ :return: None
+ """
+ self.grammar = grammar
+ self.language = language
+ self.key = key
+ self.token = grammar.get("name", "")
+ self.is_capture = is_capture
+ self.initialized = False
+ self.anchored = False
+
+ @property
+ def comment(self) -> str:
+ return self.grammar.get("comment", "")
+
+ @property
+ def disabled(self) -> bool:
+ return self.grammar.get("disabled", False)
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}:<{self.key}>"
+
+ def _init_captures(self, grammar: dict, key: str = "captures", **kwargs) -> dict:
+ """Initializes a captures dictionary"""
+ captures = {}
+ if key in grammar:
+ for group_id, pattern in grammar[key].items():
+ captures[int(group_id)] = self.initialize(
+ pattern, language=self.language, is_capture=True
+ )
+ return captures
+
+ def _find_include(self, key: str, **kwargs) -> GrammarParser:
+ """Find the included grammars and during repository initialization"""
+ if not self.language:
+ raise IncludedParserNotFound(key)
+
+ if key in ["$self", "$base"]: # TODO there is a difference between these
+ return self.language
+ elif key[0] == "#":
+ return self.language.repository.get(key[1:], None)
+ else:
+ return self.language._find_include_scopes(key)
+
+ @abstractmethod
+ def _parse(
+ self,
+ handler: ContentHandler,
+ starting: POS,
+ **kwargs,
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
+ """The abstract method which all parsers much implement
+
+ The ``_parse`` method is called by ``parse``, which will additionally parse any nested Capture elements.
+ The ``_parse`` method should contain all the rules for the extended parser.
+
+ :param handler: The content handler to handle the parsed elements.
+ :param starting: The starting position of the parsing.
+ :param kwargs: Additional keyword arguments.
+ :return: A tuple containing the parsing result, a list of parsed elements, and the ending position of the parsing.
+ """
+ pass
+
+ def _initialize_repository(self, **kwargs) -> None:
+ """Initializes the repository's inclusions.
+
+ When the grammar has patterns, this method should called to initialize its inclusions.
+ This should occur after all sub patterns have been initialized.
+ """
+ return
+
+
+[docs]
+ def parse(
+ self,
+ handler: ContentHandler,
+ starting: POS = (0, 0),
+ boundary: POS | None = None,
+ **kwargs,
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
+ """
+ The method to parse a handler using the current grammar.
+
+ :param handler: The ContentHandler object that will handle the parsed content.
+ :param starting: The starting position for parsing. Defaults to (0, 0).
+ :param boundary: The boundary position for parsing. Defaults to None.
+ :param **kwargs: Additional keyword arguments that can be passed to the parser.
+
+ :return: A tuple containing:
+ - parsed: A boolean indicating whether the parsing was successful.
+ - elements: A list of Capture or ContentElement objects representing the parsed content.
+ - span: A tuple containing the starting and ending positions of the parsed content, or None if parsing failed.
+ """
+ if not self.initialized and self.language is not None:
+ self.language._initialize_repository()
+ parsed, elements, span = self._parse(handler, starting, boundary=boundary, **kwargs)
+ return parsed, elements, span
+
+
+
+[docs]
+ def match_and_capture(
+ self,
+ handler: ContentHandler,
+ pattern: Pattern,
+ starting: POS,
+ boundary: POS,
+ parsers: dict[int, GrammarParser] | None = None,
+ parent_capture: Capture | None = None,
+ **kwargs,
+ ) -> tuple[tuple[POS, POS] | None, str, list[Capture | ContentElement]]:
+ """Matches a pattern and its capture groups.
+
+ Matches the pattern on the handler between the starting and boundary positions. If a pattern is matched,
+ its capture groups are initialized as Capture objects. These are only parsed after the full handler has been
+ parsed. This occurs in GrammarParser.parse when calling parse_captures.
+
+ :param handler: The content handler to match the pattern on.
+ :param pattern: The pattern to match.
+ :param starting: The starting position for the match.
+ :param boundary: The boundary position for the match.
+ :param parsers: A dictionary of parsers.
+ :param parent_capture: The parent capture object.
+ :param kwargs: Additional keyword arguments.
+ :return: A tuple containing the span of the match, the matched string, and a list of capture objects or content elements.
+ """
+ if parsers is None:
+ parsers = {}
+ matching, span = handler.search(pattern, starting=starting, boundary=boundary, **kwargs)
+
+ if matching:
+ if parsers:
+ capture = Capture(
+ handler,
+ pattern,
+ matching,
+ parsers,
+ starting,
+ boundary,
+ key=self.key,
+ **kwargs,
+ )
+ if parent_capture is not None and capture == parent_capture:
+ return None, "", []
+ else:
+ return span, matching.group(), [capture]
+ else:
+ return span, matching.group(), []
+ else:
+ return None, "", []
+
+
+
+
+
+[docs]
+class TokenParser(GrammarParser):
+ """The parser for grammars for which only the token is provided."""
+
+ def __init__(self, grammar: dict, **kwargs) -> None:
+ super().__init__(grammar, **kwargs)
+ self.initialized = True
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}:{self.token}"
+
+ @track_depth
+ def _parse(
+ self,
+ handler: ContentHandler,
+ starting: POS,
+ boundary: POS,
+ **kwargs,
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
+ """The parse method for grammars for which only the token is provided.
+
+ When no regex patterns are provided. The element is created between the initial and boundary positions.
+ """
+ content = handler.read_pos(starting, boundary)
+ elements: list[Capture | ContentElement] = [
+ ContentElement(
+ token=self.token,
+ grammar=self.grammar,
+ content=content,
+ characters=handler.chars(starting, boundary),
+ )
+ ]
+ handler.anchor = boundary[1]
+ LOGGER.info(
+ f"{self.__class__.__name__} found < {repr(content)} >",
+ self,
+ starting,
+ kwargs.get("depth", 0),
+ )
+ return True, elements, (starting, boundary)
+
+
+
+
+[docs]
+class MatchParser(GrammarParser):
+ """The parser for grammars for which a match pattern is provided."""
+
+ def __init__(self, grammar: dict, **kwargs) -> None:
+ super().__init__(grammar, **kwargs)
+ self.exp_match = re.compile(grammar["match"])
+ self.parsers = self._init_captures(grammar, key="captures")
+ if "\\G" in grammar["match"]:
+ self.anchored = True
+
+ def __repr__(self) -> str:
+ if self.token:
+ return f"{self.__class__.__name__}:{self.token}"
+ else:
+ identifier = self.key if self.key else "_".join(self.comment.lower().split(" "))
+ return f"{self.__class__.__name__}:<{identifier}>"
+
+ def _initialize_repository(self, **kwargs) -> None:
+ """When the grammar has patterns, this method should called to initialize its inclusions."""
+ self.initialized = True
+ for key, value in self.parsers.items():
+ if not isinstance(value, GrammarParser):
+ self.parsers[key] = self._find_include(value)
+ for parser in self.parsers.values():
+ if not parser.initialized:
+ parser._initialize_repository()
+
+ @track_depth
+ def _parse(
+ self,
+ handler: ContentHandler,
+ starting: POS,
+ boundary: POS,
+ **kwargs,
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
+ """The parse method for grammars for which a match pattern is provided."""
+
+ span, content, captures = self.match_and_capture(
+ handler,
+ pattern=self.exp_match,
+ starting=starting,
+ boundary=boundary,
+ parsers=self.parsers,
+ **kwargs,
+ )
+
+ if span is None:
+ LOGGER.debug(
+ f"{self.__class__.__name__} no match",
+ self,
+ starting,
+ kwargs.get("depth", 0),
+ )
+ return False, [], None
+
+ LOGGER.info(
+ f"{self.__class__.__name__} found < {repr(content)} >",
+ self,
+ starting,
+ kwargs.get("depth", 0),
+ )
+
+ if self.token:
+ elements: list[Capture | ContentElement] = [
+ ContentElement(
+ token=self.token,
+ grammar=self.grammar,
+ content=content,
+ characters=handler.chars(*span),
+ children=captures,
+ )
+ ]
+ else:
+ elements = captures
+
+ return True, elements, span
+
+
+
+
+[docs]
+class ParserHasPatterns(GrammarParser, ABC):
+ def __init__(self, grammar: dict, **kwargs) -> None:
+ super().__init__(grammar, **kwargs)
+ self.patterns = [
+ self.initialize(pattern, language=self.language)
+ for pattern in grammar.get("patterns", [])
+ ]
+
+ def _initialize_repository(self):
+ """When the grammar has patterns, this method should called to initialize its inclusions."""
+ self.initialized = True
+ self.patterns = [
+ parser if isinstance(parser, GrammarParser) else self._find_include(parser)
+ for parser in self.patterns
+ ]
+ for parser in self.patterns:
+ if not parser.initialized:
+ parser._initialize_repository()
+
+ # Copy patterns from included pattern parsers
+ pattern_parsers = [parser for parser in self.patterns if isinstance(parser, PatternsParser)]
+ for parser in pattern_parsers:
+ parser_index = self.patterns.index(parser)
+ self.patterns[parser_index : parser_index + 1] = parser.patterns
+
+ # Injection grammars
+ for exception_scopes, injection_pattern in self.language.injections:
+ if self.token:
+ if self.token.split(".")[0] not in exception_scopes:
+ self.patterns.append(injection_pattern)
+ elif self.is_capture:
+ self.patterns.append(injection_pattern)
+
+
+
+
+[docs]
+class PatternsParser(ParserHasPatterns):
+ """The parser for grammars for which several patterns are provided."""
+
+ @track_depth
+ def _parse(
+ self,
+ handler: ContentHandler,
+ starting: POS,
+ boundary: POS | None = None,
+ greedy: bool = False,
+ find_one: bool = True,
+ **kwargs,
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS]]:
+ """The parse method for grammars for which a match pattern is provided."""
+
+ if boundary is None:
+ boundary = (len(handler.lines) - 1, handler.line_lengths[-1])
+
+ parsed = False
+ elements: list[Capture | ContentElement] = []
+ patterns = [parser for parser in self.patterns if not parser.disabled]
+
+ current = (starting[0], starting[1])
+
+ while current < boundary:
+ for parser in patterns:
+ # Try to find patterns
+ parsed, captures, span = parser._parse(
+ handler,
+ current,
+ boundary=boundary,
+ greedy=greedy,
+ **kwargs,
+ )
+ if parsed:
+ if find_one:
+ LOGGER.info(
+ f"{self.__class__.__name__} found single element",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ return True, captures, span
+ elements.extend(captures)
+ current = span[1]
+ break
+ else:
+ if find_one:
+ break
+
+ if not parsed and not greedy:
+ # Try again if previously allowed no leading white space charaters, only when multple patterns are to be found
+ options_span, options_elements = {}, {}
+ for parser in patterns:
+ parsed, captures, span = parser._parse(
+ handler,
+ current,
+ boundary=boundary,
+ greedy=True,
+ **kwargs,
+ )
+ if parsed:
+ options_span[parser] = span
+ options_elements[parser] = captures
+ LOGGER.debug(
+ f"{self.__class__.__name__} found pattern choice",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+
+ if options_span:
+ parser = sorted(
+ options_span,
+ key=lambda parser: (
+ *options_span[parser][0],
+ patterns.index(parser),
+ ),
+ )[0]
+ current = options_span[parser][1]
+ elements.extend(options_elements[parser])
+ LOGGER.info(
+ f"{self.__class__.__name__} chosen pattern of {parser}",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ elif self != self.language:
+ break
+ else:
+ remainder = handler.read_line(current)
+ if not remainder.isspace():
+ LOGGER.warning(
+ f"{self.__class__.__name__} remainder of line not parsed: {remainder}",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ if current[0] + 1 <= len(handler.lines):
+ current = (current[0] + 1, 0)
+ else:
+ LOGGER.debug(
+ f"{self.__class__.__name__} EOF encountered",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ break
+
+ if current == starting:
+ LOGGER.warning(
+ f"{self.__class__.__name__} handler did not move after a search round",
+ self,
+ starting,
+ kwargs.get("depth", 0),
+ )
+ break
+
+ line_length = handler.line_lengths[current[0]]
+ if current[1] in [line_length, line_length - 1]:
+ try:
+ empty_lines = next(
+ i for i, v in enumerate(handler.line_lengths[current[0] + 1 :]) if v > 1
+ )
+ current = (current[0] + 1 + empty_lines, 0)
+ except StopIteration:
+ break
+
+ if self.token:
+ elements = [
+ ContentElement(
+ token=self.token,
+ grammar=self.grammar,
+ content=handler.read_pos(starting, boundary),
+ characters=handler.chars(starting, boundary),
+ children=elements,
+ )
+ ]
+
+ return bool(elements), elements, (starting, current)
+
+
+
+
+[docs]
+class BeginEndParser(ParserHasPatterns):
+ """The parser for grammars for which a begin/end pattern is provided."""
+
+ def __init__(self, grammar: dict, **kwargs) -> None:
+ super().__init__(grammar, **kwargs)
+ if "contentName" in grammar:
+ self.token = grammar["contentName"]
+ self.between_content = True
+ else:
+ self.token = grammar.get("name")
+ self.between_content = False
+ self.apply_end_pattern_last = grammar.get("applyEndPatternLast", False)
+ self.exp_begin = re.compile(grammar["begin"])
+ self.exp_end = re.compile(grammar["end"])
+ self.parsers_begin = self._init_captures(grammar, key="beginCaptures")
+ self.parsers_end = self._init_captures(grammar, key="endCaptures")
+ if "\\G" in grammar["begin"]:
+ self.anchored = True
+
+ def __repr__(self) -> str:
+ if self.token:
+ return f"{self.__class__.__name__}:{self.token}"
+ else:
+ identifier = self.key if self.key else "_".join(self.comment.lower().split(" "))
+ return f"{self.__class__.__name__}:<{identifier}>"
+
+ def _initialize_repository(self, **kwargs) -> None:
+ """When the grammar has patterns, this method should called to initialize its inclusions."""
+ self.initialized = True
+ super()._initialize_repository()
+ for key, value in self.parsers_end.items():
+ if not isinstance(value, GrammarParser):
+ self.parsers_end[key] = self._find_include(value)
+ for key, value in self.parsers_begin.items():
+ if not isinstance(value, GrammarParser):
+ self.parsers_begin[key] = self._find_include(value)
+ for parser in self.parsers_begin.values():
+ if not parser.initialized:
+ parser._initialize_repository()
+ for parser in self.parsers_end.values():
+ if not parser.initialized:
+ parser._initialize_repository()
+
+ @track_depth
+ def _parse(
+ self,
+ handler: ContentHandler,
+ starting: POS,
+ boundary: POS,
+ greedy: bool = False,
+ **kwargs,
+ ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
+ """The parse method for grammars for which a begin/end pattern is provided."""
+
+ begin_span, _, begin_elements = self.match_and_capture(
+ handler,
+ self.exp_begin,
+ starting,
+ boundary=boundary,
+ parsers=self.parsers_begin,
+ greedy=greedy,
+ **kwargs,
+ )
+
+ if not begin_span:
+ LOGGER.debug(
+ f"{self.__class__.__name__} no begin match",
+ self,
+ starting,
+ kwargs.get("depth", 0),
+ )
+ return False, [], None
+ LOGGER.info(
+ f"{self.__class__.__name__} found begin",
+ self,
+ starting,
+ kwargs.get("depth", 0),
+ )
+
+ # Get initial and boundary positions
+ current = begin_span[1]
+ if boundary is None:
+ boundary = (len(handler.lines) - 1, handler.line_lengths[-1])
+
+ # Define loop parameters
+ end_elements: list[Capture | ContentElement] = []
+ mid_elements: list[Capture | ContentElement] = []
+ patterns = [parser for parser in self.patterns if not parser.disabled]
+ first_run = True
+
+ while current <= boundary:
+ parsed = False
+
+ # Create boolean that is enabled when a parser is recursively called. In this its end pattern should
+ # be applied last, otherwise the same span will be recognzed as the end pattern by the upper level parser
+ apply_end_pattern_last = False
+
+ # Try to find patterns first with no leading whitespace charaters allowed
+ for parser in patterns:
+ parsed, capture_elements, capture_span = parser._parse(
+ handler, current, boundary=boundary, greedy=False, **kwargs
+ )
+ if parsed:
+ if parser == self:
+ apply_end_pattern_last = True
+ LOGGER.debug(
+ f"{self.__class__.__name__} found pattern (no ws)",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ break
+
+ # Try to find the end pattern with no leading whitespace charaters allowed
+ end_span, _, end_elements = self.match_and_capture(
+ handler,
+ self.exp_end,
+ current,
+ boundary=boundary,
+ parsers=self.parsers_end,
+ greedy=False,
+ **kwargs,
+ )
+
+ if not parsed and not end_span:
+ # Try to find the patterns and end pattern allowing for leading whitespace charaters
+
+ LOGGER.info(
+ f"{self.__class__.__name__} getting all pattern options",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+
+ options_span, options_elements = {}, {}
+ for parser in patterns:
+ parsed, capture_elements, capture_span = parser._parse(
+ handler,
+ current,
+ boundary=boundary,
+ greedy=True,
+ **kwargs,
+ )
+ if parsed:
+ options_span[parser] = capture_span
+ options_elements[parser] = capture_elements
+ LOGGER.debug(
+ f"{self.__class__.__name__} found pattern choice",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+
+ if options_span:
+ parsed = True
+ parser = sorted(
+ options_span,
+ key=lambda parser: (
+ *options_span[parser][0],
+ patterns.index(parser),
+ ),
+ )[0]
+ capture_span = options_span[parser]
+ capture_elements = options_elements[parser]
+
+ if parser == self:
+ apply_end_pattern_last = True
+
+ LOGGER.info(
+ f"{self.__class__.__name__} chosen pattern of {parser}",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+
+ end_span, end_content, end_elements = self.match_and_capture(
+ handler,
+ self.exp_end,
+ current,
+ boundary=boundary,
+ parsers=self.parsers_end,
+ greedy=True,
+ **kwargs,
+ )
+
+ if end_span:
+ if parsed:
+ # Check whether the capture pattern has the same closing positions as the end pattern
+ capture_before_end = handler.prev(capture_span[1])
+ if handler.read(capture_before_end, skip_newline=False) == "\n":
+ # If capture pattern ends with \n, both left and right of \n is considered end
+ pattern_at_end = end_span[1] in [
+ capture_before_end,
+ capture_span[1],
+ ]
+ else:
+ pattern_at_end = end_span[1] == capture_span[1]
+
+ end_before_pattern = end_span[0] <= capture_span[0]
+ empty_span_end = end_span[1] == end_span[0]
+
+ if pattern_at_end and (end_before_pattern or empty_span_end):
+ if empty_span_end:
+ # Both found capture pattern and end pattern are accepted, break pattern search
+ LOGGER.debug(
+ f"{self.__class__.__name__} capture+end: both accepted, break",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ mid_elements.extend(capture_elements)
+ closing = end_span[0] if self.between_content else end_span[1]
+ break
+ elif not self.apply_end_pattern_last and not apply_end_pattern_last:
+ # End pattern prioritized over capture pattern, break pattern search
+ LOGGER.debug(
+ f"{self.__class__.__name__} capture+end: end prioritized, break",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ closing = end_span[0] if self.between_content else end_span[1]
+ break
+ else:
+ # Capture pattern prioritized over end pattern, continue pattern search
+ LOGGER.debug(
+ f"{self.__class__.__name__} capture+end: capture prioritized, continue",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ mid_elements.extend(capture_elements)
+ current = capture_span[1]
+
+ elif capture_span[0] < end_span[0]:
+ # Capture pattern found before end pattern, continue pattern search
+ LOGGER.debug(
+ f"{self.__class__.__name__} capture<end: leading capture, continue",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ mid_elements.extend(capture_elements)
+ current = capture_span[1]
+ else:
+ # End pattern found before capture pattern, break pattern search
+ LOGGER.debug(
+ f"{self.__class__.__name__} end<capture: leading end, break",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ closing = end_span[0] if self.between_content else end_span[1]
+ break
+ else:
+ # No capture pattern found, accept end pattern and break pattern search
+ LOGGER.debug(
+ f"{self.__class__.__name__} end: break",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ closing = end_span[0] if self.between_content else end_span[1]
+ break
+ else: # No end pattern found
+ if parsed:
+ # Append found capture pattern and find next starting position
+ mid_elements.extend(capture_elements)
+
+ if handler.read(capture_span[1], skip_newline=False) == "\n":
+ # Next character after capture pattern is newline
+
+ LOGGER.debug(
+ f"{self.__class__.__name__} capture: next is newline, continue",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+
+ end_span, _, _ = self.match_and_capture(
+ handler,
+ self.exp_end,
+ capture_span[1],
+ boundary=boundary,
+ parsers=self.parsers_end,
+ allow_leading_all=False,
+ **kwargs,
+ )
+
+ if end_span and end_span[1] <= handler.next(capture_span[1]):
+ # Potential end pattern can be found directly after the found capture pattern
+ current = capture_span[1]
+ else:
+ # Skip the newline character in the next pattern search round
+ current = handler.next(capture_span[1])
+ else:
+ LOGGER.debug(
+ f"{self.__class__.__name__} capture: continue",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ current = capture_span[1]
+ else:
+ # No capture patterns nor end patterns found. Skip the current line.
+ line = handler.read_line(current)
+
+ if line and not line.isspace():
+ LOGGER.warning(
+ f"No patterns found in line, skipping < {repr(line)} >",
+ self,
+ current,
+ kwargs.get("depth", 0),
+ )
+ current = handler.next((current[0], handler.line_lengths[current[0]]))
+
+ if apply_end_pattern_last:
+ current = handler.next(current)
+
+ if first_run:
+ # Skip all parsers that were anchored to the begin pattern after the first round
+ patterns = [parser for parser in patterns if not parser.anchored]
+ first_run = False
+ else:
+ # Did not break out of while loop, set closing to boundary
+ closing = boundary
+ end_span = ((0, 0), boundary)
+
+ start = begin_span[1] if self.between_content else begin_span[0]
+
+ content = handler.read_pos(start, closing)
+ LOGGER.info(
+ f"{self.__class__.__name__} found < {repr(content)} >",
+ self,
+ start,
+ kwargs.get("depth", 0),
+ )
+
+ # Construct output elements
+ if self.token:
+ elements: list[Capture | ContentElement] = [
+ ContentBlockElement(
+ token=self.token,
+ grammar=self.grammar,
+ content=content,
+ characters=handler.chars(start, closing),
+ children=mid_elements,
+ begin=begin_elements,
+ end=end_elements,
+ )
+ ]
+ else:
+ elements = begin_elements + mid_elements + end_elements
+
+ return True, elements, (begin_span[0], end_span[1])
+
+
+
+
+[docs]
+class BeginWhileParser(PatternsParser):
+ """The parser for grammars for which a begin/end pattern is provided."""
+
+ def __init__(self, grammar: dict, **kwargs) -> None:
+ super().__init__(grammar, **kwargs)
+ if "contentName" in grammar:
+ self.token = grammar["contentName"]
+ self.between_content = True
+ else:
+ self.token = grammar.get("name")
+ self.between_content = False
+ self.exp_begin = re.compile(grammar["begin"])
+ self.exp_while = re.compile(grammar["while"])
+ self.parsers_begin = self._init_captures(grammar, key="beginCaptures")
+ self.parsers_while = self._init_captures(grammar, key="whileCaptures")
+
+ def __repr__(self) -> str:
+ if self.token:
+ return f"{self.__class__.__name__}:{self.token}"
+ else:
+ identifier = self.key if self.key else "_".join(self.comment.lower().split(" "))
+ return f"{self.__class__.__name__}:<{identifier}>"
+
+ def _initialize_repository(self):
+ """When the grammar has patterns, this method should called to initialize its inclusions."""
+ self.initialized = True
+ super()._initialize_repository()
+ for key, value in self.parsers_end.items():
+ if not isinstance(value, GrammarParser):
+ self.parsers_end[key] = self._find_include(value)
+ for key, value in self.parsers_while.items():
+ if not isinstance(value, GrammarParser):
+ self.parsers_while[key] = self._find_include(value)
+ for parser in self.parsers_begin.values():
+ if not parser.initialized:
+ parser._initialize_repository()
+ for parser in self.parsers_while.values():
+ if not parser.initialized:
+ parser._initialize_repository()
+
+ def _parse(
+ self,
+ handler: ContentHandler,
+ starting: POS,
+ **kwargs,
+ ):
+ """The parse method for grammars for which a begin/while pattern is provided."""
+ raise NotImplementedError
+
+
+from __future__ import annotations
+
+import atexit
+from pathlib import Path
+from pickle import UnpicklingError
+from typing import Protocol
+
+from ..elements import ContentElement
+
+CACHE_DIR = (Path() / ".textmate_cache").resolve()
+CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def _path_to_key(path: Path) -> str:
+ return str(path.resolve())
+
+
+
+[docs]
+class TextmateCache(Protocol):
+ """Interface for a Textmate cache."""
+
+
+[docs]
+ def cache_valid(self, filepath: Path) -> bool:
+ """
+ Check if the cache for the given filepath is valid.
+
+ :param filepath: The path to the file.
+ :return: True if the cache is valid, False otherwise.
+ """
+ ...
+
+
+
+[docs]
+ def load(self, filepath: Path) -> ContentElement:
+ """
+ Load the content from the specified filepath.
+
+ :param filepath: The path to the file to load.
+ :return: The loaded content element.
+ """
+ ...
+
+
+
+[docs]
+ def save(self, filePath: Path, element: ContentElement) -> None:
+ """
+ Save the given content element to the specified file path.
+
+ :param filePath: The file path where the content element should be saved.
+ :param element: The content element to be saved.
+ :return: None
+ """
+ ...
+
+
+
+
+
+[docs]
+class SimpleCache(TextmateCache):
+ """A simple cache implementation for storing content elements."""
+
+ def __init__(self) -> None:
+ """Initialize the SimpleCache."""
+ self._element_cache: dict[str, ContentElement] = dict()
+ self._element_timestamp: dict[str, float] = dict()
+
+
+[docs]
+ def cache_valid(self, filepath: Path) -> bool:
+ """Check if the cache is valid for the given filepath.
+
+ :param filepath: The filepath to check.
+ :return: True if the cache is valid, False otherwise.
+ """
+ key = _path_to_key(filepath)
+ if key not in self._element_cache:
+ return False
+ timestamp = filepath.resolve().stat().st_mtime
+ return timestamp == self._element_timestamp[key]
+
+
+
+[docs]
+ def load(self, filepath: Path) -> ContentElement:
+ """Load the content element from the cache for the given filepath.
+
+ :param filepath: The filepath to load the content element from.
+ :return: The loaded content element.
+ """
+ key = _path_to_key(filepath)
+ return self._element_cache[key]
+
+
+
+[docs]
+ def save(self, filepath: Path, element: ContentElement) -> None:
+ """Save the content element to the cache for the given filepath.
+
+ :param filepath: The filepath to save the content element to.
+ :param element: The content element to save.
+ :return: None
+ """
+ key = _path_to_key(filepath)
+ self._element_cache[key] = element
+ self._element_timestamp[key] = filepath.resolve().stat().st_mtime
+
+
+
+
+
+[docs]
+class ShelveCache(TextmateCache):
+ """A cache implementation using the shelve module."""
+
+ def __init__(self) -> None:
+ """Initialize the ShelveCache."""
+ import shelve
+
+ database_path = CACHE_DIR / "textmate.db"
+ self._database = shelve.open(str(database_path))
+
+ def exit():
+ self._database.sync()
+ self._database.close()
+
+ atexit.register(exit)
+
+
+[docs]
+ def cache_valid(self, filepath: Path) -> bool:
+ """Check if the cache is valid for the given filepath.
+
+ :param filepath: The filepath to check.
+ :return: True if the cache is valid, False otherwise.
+ """
+ key = _path_to_key(filepath)
+ if key not in self._database:
+ return False
+ timestamp = filepath.resolve().stat().st_mtime
+ try:
+ valid = timestamp == self._database[key][0]
+ except UnpicklingError:
+ valid = False
+ else:
+ valid = False
+ return valid
+
+
+
+[docs]
+ def load(self, filepath: Path) -> ContentElement:
+ """Load the content element from the cache for the given filepath.
+
+ :param filepath: The path for the cached content element.
+ :return: The loaded content element.
+ """
+ key = _path_to_key(filepath)
+ return self._database[key][1]
+
+
+
+[docs]
+ def save(self, filepath: Path, element: ContentElement) -> None:
+ """Save the content element to the cache for the given filepath.
+
+ :param filepath: The filepath to save the content element to.
+ :param element: The content element to save.
+ """
+ element._dispatch(nested=True)
+ key = _path_to_key(filepath)
+ timestamp = filepath.resolve().stat().st_mtime
+ self._database[key] = (timestamp, element)
+
+
+
+
+CACHE: TextmateCache = SimpleCache()
+
+
+
+[docs]
+def init_cache(type: str = "simple") -> TextmateCache:
+ """
+ Initialize the cache based on the given type.
+
+ :param type: The type of cache to initialize. Defaults to "simple".
+ :return: The initialized cache object.
+ """
+ global CACHE
+ if type == "shelve":
+ CACHE = ShelveCache()
+ elif type == "simple":
+ CACHE = SimpleCache()
+ else:
+ raise NotImplementedError(f"Cache type {type} not implemented.")
+ return CACHE
+
+
+from __future__ import annotations
+
+
+
+[docs]
+class IncludedParserNotFound(Exception):
+ """Exception raised when an included parser is not found in the store."""
+
+ def __init__(self, key: str = "UNKNOWN", **kwargs) -> None:
+ """
+ Initialize the exception.
+
+ :param key: The key of the included parser.
+ :param kwargs: Additional keyword arguments.
+ """
+ message = f"Included parser <{key}> not found in store."
+ super().__init__(message, **kwargs)
+
+
+
+
+[docs]
+class IncompatibleFileType(Exception):
+ """Exception raised when the input file has an incompatible file type."""
+
+ def __init__(self, extensions: list[str], **kwargs) -> None:
+ """
+ Initialize the exception.
+
+ :param extensions: List of compatible file extensions.
+ :param kwargs: Additional keyword arguments.
+ """
+ message = f"Input file must have extension {' / '.join(extensions)}"
+ super().__init__(message, **kwargs)
+
+
+
+
+[docs]
+class FileNotFound(Exception):
+ """Exception raised when a file is not found."""
+
+ def __init__(self, file: str, **kwargs) -> None:
+ """
+ Initialize the exception.
+
+ :param file: The path of the file.
+ :param kwargs: Additional keyword arguments.
+ """
+ message = f"File not found: {file}"
+ super().__init__(message, **kwargs)
+
+
+
+
+[docs]
+class FileNotParsed(Exception):
+ """Exception raised when a file is not parsed."""
+
+ def __init__(self, file: str, **kwargs) -> None:
+ """
+ Initialize the exception.
+
+ :param file: The path of the file.
+ :param kwargs: Additional keyword arguments.
+ """
+ message = f"File not parsed: {file}"
+ super().__init__(message, **kwargs)
+
+
+
+
+[docs]
+class ImpossibleSpan(Exception):
+ """Exception raised when a span is impossible."""
+
+ def __init__(self, **kwargs) -> None:
+ """
+ Initialize the exception.
+
+ :param kwargs: Additional keyword arguments.
+ """
+ super().__init__(
+ "The closing position cannot be less or equal than the starting position",
+ **kwargs,
+ )
+
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from onigurumacffi import _Match as Match
+from onigurumacffi import _Pattern as Pattern
+from onigurumacffi import compile
+
+from .exceptions import FileNotFound, ImpossibleSpan
+from .logger import LOGGER
+
+POS = tuple[int, int]
+
+
+
+[docs]
+class ContentHandler:
+ """The handler object targetted for parsing.
+
+ To parse a string or file, it needs to be loaded into the ContentHandler object.
+ The handler will take care of all read actions on the input stream, where the contents
+ are index by a tuple (line_number, line_position). Additionally, the handler contains the
+ search method to match a search span against a input oniguruma regex pattern.
+ """
+
+ notLookForwardEOL = compile(r"(?<!\(\?=[^\(]*)\$")
+
+ def __init__(self, source: str) -> None:
+ """
+ Initialize a new instance of the Handler class.
+
+ :param source: The source code to be processed.
+ :type source: str
+
+ :ivar source: The source code to be processed.
+ :ivar lines: A list of lines in the source code, with a newline character at the end of each line.
+ :ivar line_lengths: A list of lengths of each line in the source code.
+ :ivar anchor: The current position in the source code.
+ """
+ self.source = source
+ self.lines = [line + "\n" for line in source.split("\n")]
+ self.line_lengths = [len(line) for line in self.lines]
+ self.anchor: int = 0
+
+
+[docs]
+ @classmethod
+ def from_path(cls, file_path: Path):
+ """Loads a file from a path"""
+
+ if not file_path.exists():
+ raise FileNotFound(str(file_path))
+
+ # Open file and replace Windows/Mac line endings
+ with open(file_path) as file:
+ content = file.read()
+ content = content.replace("\r\n", "\n")
+ content = content.replace("\r", "\n")
+
+ return cls(content)
+
+
+ def _check_pos(self, pos: POS):
+ if pos[0] > len(self.lines) or pos[1] > self.line_lengths[pos[0]]:
+ raise ImpossibleSpan
+
+
+[docs]
+ def next(self, pos: POS, step: int = 1) -> POS:
+ """Returns the next position on the current handler.
+
+ :param pos: The current position as a tuple (line, column).
+ :param step: The number of steps to move forward. Defaults to 1.
+ :return: The next position as a tuple (line, column).
+ """
+ if step > 1:
+ pos = self.next(pos, step=step - 1)
+ if pos[1] == self.line_lengths[pos[0]]:
+ if pos[0] == len(self.lines):
+ return pos
+ else:
+ return (pos[0] + 1, 0)
+ else:
+ return (pos[0], pos[1] + 1)
+
+
+
+[docs]
+ def prev(self, pos: POS, step: int = 1) -> POS:
+ """Returns the previous position on the current handler.
+
+ :param pos: The current position as a tuple (line, column).
+ :param step: The number of steps to go back. Defaults to 1.
+ :return: The previous position as a tuple (line, column).
+ """
+ if step > 1:
+ pos = self.prev(pos, step=step - 1)
+ if pos[1] == 0:
+ if pos[0] == 0:
+ return (0, 0)
+ else:
+ return (pos[0] - 1, self.line_lengths[pos[0] - 1])
+ else:
+ return (pos[0], pos[1] - 1)
+
+
+
+[docs]
+ def range(self, start: POS, close: POS) -> list[POS]:
+ """
+ Returns a list of positions between the start and close positions.
+
+ :param start: The starting position.
+ :param close: The closing position.
+ :return: A list of positions between the start and close positions.
+ """
+ indices = []
+ if start[0] == close[0]:
+ for lp in range(start[1], close[1]):
+ indices.append((start[0], lp))
+ else:
+ for lp in range(start[1], self.line_lengths[start[0]]):
+ indices.append((start[0], lp))
+ for ln in range(start[0] + 1, close[0]):
+ for lp in range(self.line_lengths[ln]):
+ indices.append((ln, lp))
+ for lp in range(close[1]):
+ indices.append((close[0], lp))
+ return indices
+
+
+
+[docs]
+ def chars(self, start: POS, close: POS) -> dict[POS, str]:
+ """
+ Returns a dictionary mapping each position within the given range to the corresponding source character.
+
+ :param start: The starting position of the range.
+ :param close: The closing position of the range.
+ :return: A dictionary mapping each position within the range to the corresponding source character.
+ """
+ indices = self.range(start, close)
+ return {pos: self.read(pos) for pos in indices}
+
+
+
+[docs]
+ def read_pos(self, start_pos: POS, close_pos: POS, skip_newline: bool = True) -> str:
+ """Reads the content between the start and end positions.
+
+ :param start_pos: The starting position of the content.
+ :param close_pos: The closing position of the content.
+ :param skip_newline: Whether to skip the newline character at the end of the content.
+ :return: The content between the start and end positions.
+ :raises ImpossibleSpan: If the start position is greater than the close position.
+ """
+ self._check_pos(start_pos)
+ self._check_pos(close_pos)
+ if start_pos > close_pos:
+ raise ImpossibleSpan
+
+ if start_pos[0] == close_pos[0]:
+ readout = self.lines[start_pos[0]][start_pos[1] : close_pos[1]]
+ else:
+ readout = ""
+ for ln in range(start_pos[0], close_pos[0] + 1):
+ if ln == start_pos[0]:
+ readout += self.lines[ln][start_pos[1] :]
+ elif ln == close_pos[0]:
+ readout += self.lines[ln][: close_pos[1]]
+ else:
+ readout += self.lines[ln]
+
+ if skip_newline and readout and readout[-1] == "\n":
+ readout = readout[:-1]
+
+ return readout
+
+
+
+[docs]
+ def read_line(self, pos: POS) -> str:
+ """
+ Reads a line from the specified position and returns it.
+
+ :param pos: The position of the line to read. The first element is the line number (0-based),
+ and the second element is the starting position within the line.
+ :return: The line starting from the specified position.
+ """
+ line = self.lines[pos[0]]
+ return line[pos[1] :]
+
+
+
+[docs]
+ def read(self, start_pos: POS, length: int = 1, skip_newline: bool = True) -> str:
+ """Reads the content from start for a length.
+
+ :param start_pos: The starting position to read from.
+ :param length: The number of characters to read. Defaults to 1.
+ :param skip_newline: Whether to skip the newline character at the end of the read content. Defaults to True.
+ :return: The content read from the specified position.
+ :raises ImpossibleSpan: If the length is negative.
+ """
+ self._check_pos(start_pos)
+ if length < 0:
+ raise ImpossibleSpan
+
+ remainder = self.line_lengths[start_pos[0]] - start_pos[1]
+
+ if length <= remainder:
+ readout = self.lines[start_pos[0]][start_pos[1] : (start_pos[1] + length)]
+ else:
+ readout = self.lines[start_pos[0]][start_pos[1] :]
+ unread_length = length - remainder
+ ln = start_pos[0] + 1
+ if ln >= len(self.lines):
+ return ""
+
+ while unread_length > self.line_lengths[ln]:
+ readout += self.lines[ln]
+ unread_length -= self.line_lengths[ln]
+ ln += 1
+ else:
+ readout += self.lines[ln][:unread_length]
+
+ if skip_newline and readout[-1] == "\n":
+ readout = readout[:-1]
+
+ return readout
+
+
+
+[docs]
+ def search(
+ self,
+ pattern: Pattern,
+ starting: POS,
+ boundary: POS | None = None,
+ greedy: bool = False,
+ **kwargs,
+ ) -> tuple[Match | None, tuple[POS, POS] | None]:
+ """Matches the stream against a capture group.
+
+ :param pattern: The regular expression pattern to match against the stream.
+ :param starting: The starting position in the stream.
+ :param boundary: The boundary position in the stream. Defaults to None.
+ :param greedy: Determines if the matching should be greedy or not. Defaults to False.
+ :param kwargs: Additional keyword arguments.
+
+ :return: A tuple containing the matching result and the span of the match.
+
+ .. note::
+ - The stream is matched against the input pattern. If there are any capture groups,
+ each is then subsequently parsed by the inputted parsers. The number of parsers therefore
+ must match the number of capture groups of the expression, or there must be a single parser
+ and no capture groups.
+ - The `greedy` parameter determines if the matching should be greedy or not. If set to True,
+ the matching will try to consume as much of the stream as possible. If set to False,
+ the matching will stop at the first match found.
+ - The `boundary` parameter can be used to specify a boundary position in the stream. If provided,
+ the matching will not go beyond this boundary position.
+ - The `leading_chars` parameter can be used to specify the type of leading characters allowed, with:
+ - `0`: none allowed
+ - `1`: whitespace characters allowed
+ - `2`: any character allowed.
+ """
+
+ if pattern._pattern in ["\\z", "\\Z"]:
+ greedy = True
+
+ # Get line from starting (and boundary) positions
+ if boundary and starting[0] == boundary[0]:
+ line = self.lines[starting[0]][: boundary[1]]
+ else:
+ line = self.lines[starting[0]]
+
+ # Gets the previous matching end position from anchor in case of \G.
+ init_pos = self.anchor if "\\G" in pattern._pattern else starting[1]
+
+ # Find begin of line and search starting from the initial position
+ matching = pattern.search(line, start=init_pos)
+
+ # Check that no charaters are skipped in case ws-only is enabled
+ if matching:
+ leading_string = line[init_pos : matching.start()]
+ if leading_string and not (greedy or (not greedy and leading_string.isspace())):
+ return None, None
+ else:
+ return None, None
+
+ # Get span of current matching, taking into account the lookback operation
+ start_pos = (starting[0], matching.start())
+ close_pos = (starting[0], matching.end())
+
+ # Do not allow matching past a boundary positition, if provided
+ if boundary and close_pos > boundary:
+ return None, None
+
+ if leading_string and not leading_string.isspace() and greedy:
+ LOGGER.warning(
+ f"skipping < {leading_string} >",
+ position=start_pos,
+ depth=kwargs.get("depth", 0),
+ )
+
+ # Include \n in match span if pattern matches on end of line $
+ if (
+ self.notLookForwardEOL.search(pattern._pattern)
+ and matching.end() + 1 == self.line_lengths[starting[0]]
+ ):
+ newline_matching = pattern.search(line[:-1])
+ if newline_matching and newline_matching.span() == matching.span():
+ close_pos = (starting[0], matching.end() + 1)
+
+ # Set anchor for next matching
+ self.anchor = matching.end()
+
+ return matching, (start_pos, close_pos)
+
+
+
+from __future__ import annotations
+
+import logging
+from functools import wraps
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from ..parser import GrammarParser
+
+
+MAX_LENGTH = 79
+
+
+
+[docs]
+def track_depth(func):
+ """Simple decorator to track recusion depth."""
+
+ @wraps(func)
+ def wrapper(*args, depth: int = -1, **kwargs):
+ return func(*args, depth=depth + 1, **kwargs)
+
+ return wrapper
+
+
+
+
+[docs]
+class LogFormatter(logging.Formatter):
+ """
+ A custom log formatter that formats log records with color-coded messages.
+ """
+
+ green = "\x1b[32;32m"
+ grey = "\x1b[38;20m"
+ yellow = "\x1b[33;20m"
+ red = "\x1b[31;20m"
+ bold_red = "\x1b[31;1m"
+ reset = "\x1b[0m"
+ format_string = "%(name)s:%(message)s"
+
+ FORMATS = {
+ logging.DEBUG: green + format_string + reset,
+ logging.INFO: grey + format_string + reset,
+ logging.WARNING: yellow + format_string + reset,
+ logging.ERROR: red + format_string + reset,
+ logging.CRITICAL: bold_red + format_string + reset,
+ }
+
+
+[docs]
+ def format(self, record):
+ """
+ Formats the log record with the color-coded format based on the log level.
+
+ :param record: The log record to be formatted.
+ :return: The formatted log message.
+ """
+ log_fmt = self.FORMATS.get(record.levelno)
+ formatter = logging.Formatter(log_fmt)
+ return formatter.format(record)
+
+
+
+
+
+[docs]
+class Logger:
+ """
+ The logger object for the grammar parsers.
+ """
+
+ long_msg_div = "\x1b[1;32m ... \x1b[0m"
+
+ def __init__(self, **kwargs) -> None:
+ self.id = None
+ self.max_token_length = 50
+ self.line_decimals = 3
+ self.position_decimals = 3
+ self.scope = "UNKNOWN"
+ self.logger = logging.getLogger("textmate_grammar")
+ channel = logging.StreamHandler()
+ channel.setFormatter(LogFormatter())
+ self.logger.addHandler(channel)
+
+
+[docs]
+ def configure(self, parser: GrammarParser, height: int, width: int, **kwargs) -> None:
+ """Configures the logger to a specific grammar and content length"""
+ self.line_decimals = len(str(height))
+ self.position_decimals = len(str(width))
+ id = parser.token if parser.token else parser.key
+ if self.id != id:
+ self.id = id
+ tokens = _gen_all_tokens(parser.grammar)
+ self.max_token_length = max(len(token) for token in tokens)
+ self.scope = parser.token
+
+
+
+[docs]
+ def format_message(
+ self,
+ message: str,
+ parser: GrammarParser | None = None,
+ position: tuple[int, int] | None = None,
+ depth: int = 0,
+ ) -> str:
+ """
+ Formats a logging message to the defined format.
+
+ :param message: The logging message to be formatted.
+ :param parser: The GrammarParser object associated with the message. Defaults to None.
+ :param position: The position tuple (line, column) associated with the message. Defaults to None.
+ :param depth: The depth of the message in the logging hierarchy. Defaults to 0.
+ :return: The formatted logging message.
+ """
+ if position:
+ msg_pos = "{:{ll}d}-{:{lp}d}".format(
+ *position, ll=self.line_decimals, lp=self.position_decimals
+ ).replace(" ", "0")
+ else:
+ msg_pos = "." * (self.line_decimals + self.position_decimals + 1)
+
+ if parser:
+ parser_id = parser.token if parser.token else parser.key
+ msg_id = (
+ "." * (self.max_token_length - len(parser_id)) + parser_id[: self.max_token_length]
+ )
+ else:
+ msg_id = "." * self.max_token_length
+
+ vb_message = f"{'|'*(depth-1)}{'-'*bool(depth)}{message}"
+
+ if len(vb_message) > MAX_LENGTH:
+ half_length = min([(MAX_LENGTH - 6) // 2, (len(vb_message) - 6) // 2])
+ vb_message = vb_message[:half_length] + self.long_msg_div + vb_message[-half_length:]
+
+ return f"{self.scope}:{msg_pos}:{msg_id}: {vb_message}"
+
+
+
+[docs]
+ def debug(self, *args, **kwargs) -> None:
+ if self.logger.getEffectiveLevel() > logging.DEBUG:
+ return
+ message = self.format_message(*args, **kwargs)
+ self.logger.debug(message)
+
+
+
+[docs]
+ def info(self, *args, **kwargs) -> None:
+ if self.logger.getEffectiveLevel() > logging.INFO:
+ return
+ message = self.format_message(*args, **kwargs)
+ self.logger.info(message)
+
+
+
+[docs]
+ def warning(self, *args, **kwargs) -> None:
+ if self.logger.getEffectiveLevel() > logging.WARNING:
+ return
+ message = self.format_message(*args, **kwargs)
+ self.logger.warning(message)
+
+
+
+[docs]
+ def error(self, *args, **kwargs) -> None:
+ if self.logger.getEffectiveLevel() > logging.ERROR:
+ return
+ message = self.format_message(*args, **kwargs)
+ self.logger.error(message)
+
+
+
+[docs]
+ def critical(self, *args, **kwargs) -> None:
+ if self.logger.getEffectiveLevel() > logging.CRITICAL:
+ return
+ message = self.format_message(*args, **kwargs)
+ self.logger.critical(message)
+
+
+
+
+def _gen_all_tokens(grammar: dict, items: list[str] | None = None) -> list[str]:
+ if items is None:
+ items = []
+ for key, value in grammar.items():
+ if key in ["name", "contentName"]:
+ items.append(value)
+ elif isinstance(value, list):
+ for nested_grammar in (item for item in value if isinstance(item, dict)):
+ _gen_all_tokens(nested_grammar, items)
+ elif isinstance(value, dict):
+ _gen_all_tokens(value, items)
+ return items
+
+
+LOGGER = Logger()
+
' + + '' + + _("Hide Search Matches") + + "
" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/styles/furo-extensions.css b/_static/styles/furo-extensions.css new file mode 100644 index 0000000..bc447f2 --- /dev/null +++ b/_static/styles/furo-extensions.css @@ -0,0 +1,2 @@ +#furo-sidebar-ad-placement{padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)}#furo-sidebar-ad-placement .ethical-sidebar{background:var(--color-background-secondary);border:none;box-shadow:none}#furo-sidebar-ad-placement .ethical-sidebar:hover{background:var(--color-background-hover)}#furo-sidebar-ad-placement .ethical-sidebar a{color:var(--color-foreground-primary)}#furo-sidebar-ad-placement .ethical-callout a{color:var(--color-foreground-secondary)!important}#furo-readthedocs-versions{background:transparent;display:block;position:static;width:100%}#furo-readthedocs-versions .rst-versions{background:#1a1c1e}#furo-readthedocs-versions .rst-current-version{background:var(--color-sidebar-item-background);cursor:unset}#furo-readthedocs-versions .rst-current-version:hover{background:var(--color-sidebar-item-background)}#furo-readthedocs-versions .rst-current-version .fa-book{color:var(--color-foreground-primary)}#furo-readthedocs-versions>.rst-other-versions{padding:0}#furo-readthedocs-versions>.rst-other-versions small{opacity:1}#furo-readthedocs-versions .injected .rst-versions{position:unset}#furo-readthedocs-versions:focus-within,#furo-readthedocs-versions:hover{box-shadow:0 0 0 1px var(--color-sidebar-background-border)}#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:hover .rst-current-version{background:#1a1c1e;font-size:inherit;height:auto;line-height:inherit;padding:12px;text-align:right}#furo-readthedocs-versions:focus-within .rst-current-version .fa-book,#furo-readthedocs-versions:hover .rst-current-version .fa-book{color:#fff;float:left}#furo-readthedocs-versions:focus-within .fa-caret-down,#furo-readthedocs-versions:hover .fa-caret-down{display:none}#furo-readthedocs-versions:focus-within .injected,#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:focus-within .rst-other-versions,#furo-readthedocs-versions:hover .injected,#furo-readthedocs-versions:hover .rst-current-version,#furo-readthedocs-versions:hover .rst-other-versions{display:block}#furo-readthedocs-versions:focus-within>.rst-current-version,#furo-readthedocs-versions:hover>.rst-current-version{display:none}.highlight:hover button.copybtn{color:var(--color-code-foreground)}.highlight button.copybtn{align-items:center;background-color:var(--color-code-background);border:none;color:var(--color-background-item);cursor:pointer;height:1.25em;opacity:1;right:.5rem;top:.625rem;transition:color .3s,opacity .3s;width:1.25em}.highlight button.copybtn:hover{background-color:var(--color-code-background);color:var(--color-brand-content)}.highlight button.copybtn:after{background-color:transparent;color:var(--color-code-foreground);display:none}.highlight button.copybtn.success{color:#22863a;transition:color 0ms}.highlight button.copybtn.success:after{display:block}.highlight button.copybtn svg{padding:0}body{--sd-color-primary:var(--color-brand-primary);--sd-color-primary-highlight:var(--color-brand-content);--sd-color-primary-text:var(--color-background-primary);--sd-color-shadow:rgba(0,0,0,.05);--sd-color-card-border:var(--color-card-border);--sd-color-card-border-hover:var(--color-brand-content);--sd-color-card-background:var(--color-card-background);--sd-color-card-text:var(--color-foreground-primary);--sd-color-card-header:var(--color-card-marginals-background);--sd-color-card-footer:var(--color-card-marginals-background);--sd-color-tabs-label-active:var(--color-brand-content);--sd-color-tabs-label-hover:var(--color-foreground-muted);--sd-color-tabs-label-inactive:var(--color-foreground-muted);--sd-color-tabs-underline-active:var(--color-brand-content);--sd-color-tabs-underline-hover:var(--color-foreground-border);--sd-color-tabs-underline-inactive:var(--color-background-border);--sd-color-tabs-overline:var(--color-background-border);--sd-color-tabs-underline:var(--color-background-border)}.sd-tab-content{box-shadow:0 -2px var(--sd-color-tabs-overline),0 1px var(--sd-color-tabs-underline)}.sd-card{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)}.sd-shadow-sm{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-md{box-shadow:0 .3rem .75rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-lg{box-shadow:0 .6rem 1.5rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-card-hover:hover{transform:none}.sd-cards-carousel{gap:.25rem;padding:.25rem}body{--tabs--label-text:var(--color-foreground-muted);--tabs--label-text--hover:var(--color-foreground-muted);--tabs--label-text--active:var(--color-brand-content);--tabs--label-text--active--hover:var(--color-brand-content);--tabs--label-background:transparent;--tabs--label-background--hover:transparent;--tabs--label-background--active:transparent;--tabs--label-background--active--hover:transparent;--tabs--padding-x:0.25em;--tabs--margin-x:1em;--tabs--border:var(--color-background-border);--tabs--label-border:transparent;--tabs--label-border--hover:var(--color-foreground-muted);--tabs--label-border--active:var(--color-brand-content);--tabs--label-border--active--hover:var(--color-brand-content)}[role=main] .container{max-width:none;padding-left:0;padding-right:0}.shadow.docutils{border:none;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1)!important}.sphinx-bs .card{background-color:var(--color-background-secondary);color:var(--color-foreground)} +/*# sourceMappingURL=furo-extensions.css.map*/ \ No newline at end of file diff --git a/_static/styles/furo-extensions.css.map b/_static/styles/furo-extensions.css.map new file mode 100644 index 0000000..9ba5637 --- /dev/null +++ b/_static/styles/furo-extensions.css.map @@ -0,0 +1 @@ +{"version":3,"file":"styles/furo-extensions.css","mappings":"AAGA,2BACE,oFACA,4CAKE,6CAHA,YACA,eAEA,CACA,kDACE,yCAEF,8CACE,sCAEJ,8CACE,kDAEJ,2BAGE,uBACA,cAHA,gBACA,UAEA,CAGA,yCACE,mBAEF,gDAEE,gDADA,YACA,CACA,sDACE,gDACF,yDACE,sCAEJ,+CACE,UACA,qDACE,UAGF,mDACE,eAEJ,yEAEE,4DAEA,mHASE,mBAPA,kBAEA,YADA,oBAGA,aADA,gBAIA,CAEA,qIAEE,WADA,UACA,CAEJ,uGACE,aAEF,iUAGE,cAEF,mHACE,aC1EJ,gCACE,mCAEF,0BAKE,mBAUA,8CACA,YAFA,mCAKA,eAZA,cALA,UASA,YADA,YAYA,iCAdA,YAcA,CAEA,gCAEE,8CADA,gCACA,CAEF,gCAGE,6BADA,mCADA,YAEA,CAEF,kCAEE,cADA,oBACA,CACA,wCACE,cAEJ,8BACE,UC5CN,KAEE,6CAA8C,CAC9C,uDAAwD,CACxD,uDAAwD,CAGxD,iCAAsC,CAGtC,+CAAgD,CAChD,uDAAwD,CACxD,uDAAwD,CACxD,oDAAqD,CACrD,6DAA8D,CAC9D,6DAA8D,CAG9D,uDAAwD,CACxD,yDAA0D,CAC1D,4DAA6D,CAC7D,2DAA4D,CAC5D,8DAA+D,CAC/D,iEAAkE,CAClE,uDAAwD,CACxD,wDAAyD,CAG3D,gBACE,qFAGF,SACE,6EAEF,cACE,uFAEF,cACE,uFAEF,cACE,uFAGF,qBACE,eAEF,mBACE,WACA,eChDF,KACE,gDAAiD,CACjD,uDAAwD,CACxD,qDAAsD,CACtD,4DAA6D,CAC7D,oCAAqC,CACrC,2CAA4C,CAC5C,4CAA6C,CAC7C,mDAAoD,CACpD,wBAAyB,CACzB,oBAAqB,CACrB,6CAA8C,CAC9C,gCAAiC,CACjC,yDAA0D,CAC1D,uDAAwD,CACxD,8DAA+D,CCbjE,uBACE,eACA,eACA,gBAGF,iBACE,YACA,+EAGF,iBACE,mDACA","sources":["webpack:///./src/furo/assets/styles/extensions/_readthedocs.sass","webpack:///./src/furo/assets/styles/extensions/_copybutton.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-design.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-inline-tabs.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-panels.sass"],"sourcesContent":["// This file contains the styles used for tweaking how ReadTheDoc's embedded\n// contents would show up inside the theme.\n\n#furo-sidebar-ad-placement\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n .ethical-sidebar\n // Remove the border and box-shadow.\n border: none\n box-shadow: none\n // Manage the background colors.\n background: var(--color-background-secondary)\n &:hover\n background: var(--color-background-hover)\n // Ensure the text is legible.\n a\n color: var(--color-foreground-primary)\n\n .ethical-callout a\n color: var(--color-foreground-secondary) !important\n\n#furo-readthedocs-versions\n position: static\n width: 100%\n background: transparent\n display: block\n\n // Make the background color fit with the theme's aesthetic.\n .rst-versions\n background: rgb(26, 28, 30)\n\n .rst-current-version\n cursor: unset\n background: var(--color-sidebar-item-background)\n &:hover\n background: var(--color-sidebar-item-background)\n .fa-book\n color: var(--color-foreground-primary)\n\n > .rst-other-versions\n padding: 0\n small\n opacity: 1\n\n .injected\n .rst-versions\n position: unset\n\n &:hover,\n &:focus-within\n box-shadow: 0 0 0 1px var(--color-sidebar-background-border)\n\n .rst-current-version\n // Undo the tweaks done in RTD's CSS\n font-size: inherit\n line-height: inherit\n height: auto\n text-align: right\n padding: 12px\n\n // Match the rest of the body\n background: #1a1c1e\n\n .fa-book\n float: left\n color: white\n\n .fa-caret-down\n display: none\n\n .rst-current-version,\n .rst-other-versions,\n .injected\n display: block\n\n > .rst-current-version\n display: none\n",".highlight\n &:hover button.copybtn\n color: var(--color-code-foreground)\n\n button.copybtn\n // Make it visible\n opacity: 1\n\n // Align things correctly\n align-items: center\n\n height: 1.25em\n width: 1.25em\n\n top: 0.625rem // $code-spacing-vertical\n right: 0.5rem\n\n // Make it look better\n color: var(--color-background-item)\n background-color: var(--color-code-background)\n border: none\n\n // Change to cursor to make it obvious that you can click on it\n cursor: pointer\n\n // Transition smoothly, for aesthetics\n transition: color 300ms, opacity 300ms\n\n &:hover\n color: var(--color-brand-content)\n background-color: var(--color-code-background)\n\n &::after\n display: none\n color: var(--color-code-foreground)\n background-color: transparent\n\n &.success\n transition: color 0ms\n color: #22863a\n &::after\n display: block\n\n svg\n padding: 0\n","body\n // Colors\n --sd-color-primary: var(--color-brand-primary)\n --sd-color-primary-highlight: var(--color-brand-content)\n --sd-color-primary-text: var(--color-background-primary)\n\n // Shadows\n --sd-color-shadow: rgba(0, 0, 0, 0.05)\n\n // Cards\n --sd-color-card-border: var(--color-card-border)\n --sd-color-card-border-hover: var(--color-brand-content)\n --sd-color-card-background: var(--color-card-background)\n --sd-color-card-text: var(--color-foreground-primary)\n --sd-color-card-header: var(--color-card-marginals-background)\n --sd-color-card-footer: var(--color-card-marginals-background)\n\n // Tabs\n --sd-color-tabs-label-active: var(--color-brand-content)\n --sd-color-tabs-label-hover: var(--color-foreground-muted)\n --sd-color-tabs-label-inactive: var(--color-foreground-muted)\n --sd-color-tabs-underline-active: var(--color-brand-content)\n --sd-color-tabs-underline-hover: var(--color-foreground-border)\n --sd-color-tabs-underline-inactive: var(--color-background-border)\n --sd-color-tabs-overline: var(--color-background-border)\n --sd-color-tabs-underline: var(--color-background-border)\n\n// Tabs\n.sd-tab-content\n box-shadow: 0 -2px var(--sd-color-tabs-overline), 0 1px var(--sd-color-tabs-underline)\n\n// Shadows\n.sd-card // Have a shadow by default\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n.sd-shadow-sm\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-md\n box-shadow: 0 0.3rem 0.75rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-lg\n box-shadow: 0 0.6rem 1.5rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Cards\n.sd-card-hover:hover // Don't change scale on hover\n transform: none\n\n.sd-cards-carousel // Have a bit of gap in the carousel by default\n gap: 0.25rem\n padding: 0.25rem\n","// This file contains styles to tweak sphinx-inline-tabs to work well with Furo.\n\nbody\n --tabs--label-text: var(--color-foreground-muted)\n --tabs--label-text--hover: var(--color-foreground-muted)\n --tabs--label-text--active: var(--color-brand-content)\n --tabs--label-text--active--hover: var(--color-brand-content)\n --tabs--label-background: transparent\n --tabs--label-background--hover: transparent\n --tabs--label-background--active: transparent\n --tabs--label-background--active--hover: transparent\n --tabs--padding-x: 0.25em\n --tabs--margin-x: 1em\n --tabs--border: var(--color-background-border)\n --tabs--label-border: transparent\n --tabs--label-border--hover: var(--color-foreground-muted)\n --tabs--label-border--active: var(--color-brand-content)\n --tabs--label-border--active--hover: var(--color-brand-content)\n","// This file contains styles to tweak sphinx-panels to work well with Furo.\n\n// sphinx-panels includes Bootstrap 4, which uses .container which can conflict\n// with docutils' `.. container::` directive.\n[role=\"main\"] .container\n max-width: initial\n padding-left: initial\n padding-right: initial\n\n// Make the panels look nicer!\n.shadow.docutils\n border: none\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Make panel colors respond to dark mode\n.sphinx-bs .card\n background-color: var(--color-background-secondary)\n color: var(--color-foreground)\n"],"names":[],"sourceRoot":""} \ No newline at end of file diff --git a/_static/styles/furo.css b/_static/styles/furo.css new file mode 100644 index 0000000..3d29a21 --- /dev/null +++ b/_static/styles/furo.css @@ -0,0 +1,2 @@ +/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */html{-webkit-text-size-adjust:100%;line-height:1.15}body{margin:0}main{display:block}h1{font-size:2em;margin:.67em 0}hr{box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent}abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details{display:block}summary{display:list-item}[hidden],template{display:none}@media print{.content-icon-container,.headerlink,.mobile-header,.related-pages{display:none!important}.highlight{border:.1pt solid var(--color-foreground-border)}a,blockquote,dl,ol,pre,table,ul{page-break-inside:avoid}caption,figure,h1,h2,h3,h4,h5,h6,img{page-break-after:avoid;page-break-inside:avoid}dl,ol,ul{page-break-before:avoid}}.visually-hidden{clip:rect(0,0,0,0)!important;border:0!important;height:1px!important;margin:-1px!important;overflow:hidden!important;padding:0!important;position:absolute!important;white-space:nowrap!important;width:1px!important}:-moz-focusring{outline:auto}body{--font-stack:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;--font-stack--monospace:"SFMono-Regular",Menlo,Consolas,Monaco,Liberation Mono,Lucida Console,monospace;--font-size--normal:100%;--font-size--small:87.5%;--font-size--small--2:81.25%;--font-size--small--3:75%;--font-size--small--4:62.5%;--sidebar-caption-font-size:var(--font-size--small--2);--sidebar-item-font-size:var(--font-size--small);--sidebar-search-input-font-size:var(--font-size--small);--toc-font-size:var(--font-size--small--3);--toc-font-size--mobile:var(--font-size--normal);--toc-title-font-size:var(--font-size--small--4);--admonition-font-size:0.8125rem;--admonition-title-font-size:0.8125rem;--code-font-size:var(--font-size--small--2);--api-font-size:var(--font-size--small);--header-height:calc(var(--sidebar-item-line-height) + var(--sidebar-item-spacing-vertical)*4);--header-padding:0.5rem;--sidebar-tree-space-above:1.5rem;--sidebar-caption-space-above:1rem;--sidebar-item-line-height:1rem;--sidebar-item-spacing-vertical:0.5rem;--sidebar-item-spacing-horizontal:1rem;--sidebar-item-height:calc(var(--sidebar-item-line-height) + var(--sidebar-item-spacing-vertical)*2);--sidebar-expander-width:var(--sidebar-item-height);--sidebar-search-space-above:0.5rem;--sidebar-search-input-spacing-vertical:0.5rem;--sidebar-search-input-spacing-horizontal:0.5rem;--sidebar-search-input-height:1rem;--sidebar-search-icon-size:var(--sidebar-search-input-height);--toc-title-padding:0.25rem 0;--toc-spacing-vertical:1.5rem;--toc-spacing-horizontal:1.5rem;--toc-item-spacing-vertical:0.4rem;--toc-item-spacing-horizontal:1rem;--icon-search:url('data:image/svg+xml;charset=utf-8,