Add find/findall + mypy fixes

watermarkhu · Feb 17, 2024 · 330509f · 330509f
1 parent acbaf61
commit 330509f
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 40 deletions.
diff --git a/textmate_grammar/elements.py b/textmate_grammar/elements.py
@@ -4,7 +4,7 @@
 from collections import defaultdict
 from itertools import groupby
 from pprint import pprint
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Generator
 
 from .handler import POS, ContentHandler, Match, Pattern
 from .logger import LOGGER
@@ -16,13 +16,7 @@
 TOKEN_DICT = dict[POS, list[str]]
 
 
-class Element:
-    def _token_by_index(self, *args, **kwargs):
-        # Stub for Mypy
-        return
-
-
-class Capture(Element):
+class Capture:
     """A captured matching group.
 
     After mathing, any pattern can have a number of capture groups for which subsequent parsers can be defined.
@@ -62,7 +56,7 @@ def __eq__(self, other: object) -> bool:
     def __repr__(self) -> str:
         return f"@capture<{self.key}>"
 
-    def dispatch(self) -> list[Element]:
+    def dispatch(self) -> list[Capture | ContentElement]:
         """Dispatches the remaining parse of the capture group."""
         elements = []
         for group_id, parser in self.parsers.items():
@@ -107,20 +101,20 @@ def dispatch(self) -> list[Element]:
 
 
 def dispatch_list(
-    pending_elements: list[Element], parent: ContentElement | None = None
-) -> list[Element]:
+    pending_elements: list[Capture | ContentElement], parent: ContentElement | None = None
+) -> list[ContentElement]:
     """Dispatches all captured parsers in the list."""
     elements = []
     for item in pending_elements:
         if isinstance(item, Capture):
-            captured_elements = dispatch_list(item.dispatch())
+            captured_elements: list[ContentElement] = dispatch_list(item.dispatch())
             elements.extend(captured_elements)
         elif item != parent:
             elements.append(item)
     return elements
 
 
-class ContentElement(Element):
+class ContentElement:
     """The base grammar element object."""
 
     def __init__(
@@ -129,7 +123,7 @@ def __init__(
         grammar: dict,
         content: str,
         characters: dict[POS, str],
-        children: list[Element] | None = None,
+        children: list[Capture | ContentElement] | None = None,
     ) -> None:
         if children is None:
             children = []
@@ -138,11 +132,15 @@ def __init__(
         self.content = content
         self.characters = characters
         self._children_pending = children
-        self._children_dispached: list[Element] = []
+        self._children_dispached: list[ContentElement] = []
         self._dispatched_children: bool = False
 
     @property
-    def children(self) -> list[Element]:
+    def _subelements(self) -> list[ContentElement]:
+        return self.children
+
+    @property
+    def children(self) -> list[ContentElement]:
         "Children elements"
         if self._children_pending:
             if not self._dispatched_children:
@@ -172,6 +170,38 @@ def to_dict(self, verbosity: int = -1, all_content: bool = False, **kwargs) -> d
             )
         return out_dict
 
+    def find(
+        self, tokens: str | list[str], verbosity: int = -1, stack: list[str] | None = None
+    ) -> Generator[tuple[ContentElement, list[str]], None, None]:
+        """Find the next subelement that match the input token(s).
+
+        The find method will return a generator that globs though the element-tree, searching for the next
+        subelement that matches the given token.
+        """
+        if isinstance(tokens, str):
+            tokens = [tokens]
+        if stack is None:
+            stack = []
+        stack += [self.token]
+
+        if verbosity:
+            verbosity -= 1
+            for child in self._subelements:
+                if child.token in tokens or tokens == ["*"]:
+                    yield child, [e for e in stack]
+                if verbosity:
+                    nested_generator = child.find(
+                        tokens, verbosity=verbosity - 1, stack=[e for e in stack]
+                    )
+                    yield from nested_generator
+        return None
+
+    def findall(
+        self, tokens: str | list[str], verbosity: int = -1
+    ) -> list[tuple[ContentElement, list[str]]]:
+        """Returns subelements that match the input token(s)."""
+        return list(self.find(tokens, verbosity=verbosity))
+
     def flatten(self) -> list[tuple[tuple[int, int], str, list[str]]]:
         """Converts the object to a flattened array of tokens per index."""
         token_dict = self._token_by_index(defaultdict(list))
@@ -238,8 +268,8 @@ class ContentBlockElement(ContentElement):
 
     def __init__(
         self,
-        begin: list[Element] | None = None,
-        end: list[Element] | None = None,
+        begin: list[Capture | ContentElement] | None = None,
+        end: list[Capture | ContentElement] | None = None,
         **kwargs,
     ) -> None:
         if end is None:
@@ -249,13 +279,17 @@ def __init__(
         super().__init__(**kwargs)
         self._begin_pending = begin
         self._end_pending = end
-        self._begin_dispached: list[Element] = []
-        self._end_dispached: list[Element] = []
+        self._begin_dispached: list[ContentElement] = []
+        self._end_dispached: list[ContentElement] = []
         self._dispatched_begin: bool = False
         self._dispatched_end: bool = False
 
     @property
-    def begin(self) -> list[Element]:
+    def _subelements(self) -> list[ContentElement]:
+        return self.begin + self.children + self.end
+
+    @property
+    def begin(self) -> list[ContentElement]:
         "Begin elements"
         if self._begin_pending:
             if not self._dispatched_begin:
@@ -266,7 +300,7 @@ def begin(self) -> list[Element]:
             return []
 
     @property
-    def end(self) -> list[Element]:
+    def end(self) -> list[ContentElement]:
         "End elements"
         if self._end_pending:
             if not self._dispatched_end:

diff --git a/textmate_grammar/language.py b/textmate_grammar/language.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from .elements import Element
+from .elements import Capture, ContentElement
 from .exceptions import IncompatibleFileType
 from .handler import POS, ContentHandler
 from .logger import LOGGER
@@ -80,7 +80,7 @@ def _initialize_repository(self):
 
         super()._initialize_repository()
 
-    def parse_file(self, filePath: str | Path, **kwargs) -> Element | None:
+    def parse_file(self, filePath: str | Path, **kwargs) -> Capture | ContentElement | None:
         """Parses an entire file with the current grammar"""
         if type(filePath) != Path:
             filePath = Path(filePath)
@@ -102,15 +102,15 @@ def parse_string(self, input: str, **kwargs):
         LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths))
         return self._parse_language(handler, **kwargs)
 
-    def _parse_language(self, handler: ContentHandler, **kwargs) -> Element | None:
+    def _parse_language(self, handler: ContentHandler, **kwargs) -> Capture | ContentElement | None:
         """Parses the current stream with the language scope."""
 
         parsed, elements, _ = self.parse(handler, (0, 0), **kwargs)
         return elements[0] if parsed else None
 
     def _parse(
         self, handler: ContentHandler, starting: POS, **kwargs
-    ) -> tuple[bool, list[Element], tuple[int, int]]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int]]:
         kwargs.pop("find_one", None)
         return super()._parse(handler, starting, find_one=False, **kwargs)
 

diff --git a/textmate_grammar/parser.py b/textmate_grammar/parser.py
@@ -5,7 +5,7 @@
 
 import onigurumacffi as re
 
-from .elements import Capture, ContentBlockElement, ContentElement, Element
+from .elements import Capture, ContentBlockElement, ContentElement
 from .exceptions import IncludedParserNotFound
 from .handler import POS, ContentHandler, Pattern
 from .logger import LOGGER, track_depth
@@ -82,7 +82,7 @@ def _parse(
         handler: ContentHandler,
         starting: POS,
         **kwargs,
-    ) -> tuple[bool, list[Element], tuple[int, int] | None]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
         """The abstract method which all parsers much implement
 
         The _parse method is called by parse, which will additionally parse any nested Capture elements.
@@ -104,7 +104,7 @@ def parse(
         starting: POS = (0, 0),
         boundary: POS | None = None,
         **kwargs,
-    ) -> tuple[bool, list[Element], tuple[int, int] | None]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
         """The method to parse a handler using the current grammar."""
         if not self.initialized and self.language is not None:
             self.language._initialize_repository()
@@ -120,7 +120,7 @@ def match_and_capture(
         parsers: dict[int, GrammarParser] | None = None,
         parent_capture: Capture | None = None,
         **kwargs,
-    ) -> tuple[tuple[POS, POS] | None, str, list[Element]]:
+    ) -> tuple[tuple[POS, POS] | None, str, list[Capture | ContentElement]]:
         """Matches a pattern and its capture groups.
 
         Matches the pattern on the handler between the starting and boundary positions. If a pattern is matched,
@@ -170,13 +170,13 @@ def _parse(
         starting: POS,
         boundary: POS,
         **kwargs,
-    ) -> tuple[bool, list[Element], tuple[POS, POS] | None]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
         """The parse method for grammars for which only the token is provided.
 
         When no regex patterns are provided. The element is created between the initial and boundary positions.
         """
         content = handler.read_pos(starting, boundary)
-        elements: list[Element] = [
+        elements: list[Capture | ContentElement] = [
             ContentElement(
                 token=self.token,
                 grammar=self.grammar,
@@ -228,7 +228,7 @@ def _parse(
         starting: POS,
         boundary: POS,
         **kwargs,
-    ) -> tuple[bool, list[Element], tuple[POS, POS] | None]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
         """The parse method for grammars for which a match pattern is provided."""
 
         span, content, captures = self.match_and_capture(
@@ -257,7 +257,7 @@ def _parse(
         )
 
         if self.token:
-            elements: list[Element] = [
+            elements: list[Capture | ContentElement] = [
                 ContentElement(
                     token=self.token,
                     grammar=self.grammar,
@@ -318,14 +318,14 @@ def _parse(
         greedy: bool = False,
         find_one: bool = True,
         **kwargs,
-    ) -> tuple[bool, list[Element], tuple[POS, POS]]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS]]:
         """The parse method for grammars for which a match pattern is provided."""
 
         if boundary is None:
             boundary = (len(handler.lines) - 1, handler.line_lengths[-1])
 
         parsed = False
-        elements: list[Element] = []
+        elements: list[Capture | ContentElement] = []
         patterns = [parser for parser in self.patterns if not parser.disabled]
 
         current = (starting[0], starting[1])
@@ -499,7 +499,7 @@ def _parse(
         boundary: POS,
         greedy: bool = False,
         **kwargs,
-    ) -> tuple[bool, list[Element], tuple[POS, POS] | None]:
+    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
         """The parse method for grammars for which a begin/end pattern is provided."""
 
         begin_span, _, begin_elements = self.match_and_capture(
@@ -533,8 +533,8 @@ def _parse(
             boundary = (len(handler.lines) - 1, handler.line_lengths[-1])
 
         # Define loop parameters
-        end_elements: list[Element] = []
-        mid_elements: list[Element] = []
+        end_elements: list[Capture | ContentElement] = []
+        mid_elements: list[Capture | ContentElement] = []
         patterns = [parser for parser in self.patterns if not parser.disabled]
         first_run = True
 
@@ -788,7 +788,7 @@ def _parse(
 
         # Construct output elements
         if self.token:
-            elements: list[Element] = [
+            elements: list[Capture | ContentElement] = [
                 ContentBlockElement(
                     token=self.token,
                     grammar=self.grammar,