Skip to content

Commit

Permalink
Add find/findall + mypy fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
watermarkhu committed Feb 17, 2024
1 parent acbaf61 commit 330509f
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 40 deletions.
78 changes: 56 additions & 22 deletions textmate_grammar/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import defaultdict
from itertools import groupby
from pprint import pprint
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Generator

from .handler import POS, ContentHandler, Match, Pattern
from .logger import LOGGER
Expand All @@ -16,13 +16,7 @@
TOKEN_DICT = dict[POS, list[str]]


class Element:
def _token_by_index(self, *args, **kwargs):
# Stub for Mypy
return


class Capture(Element):
class Capture:
"""A captured matching group.
After mathing, any pattern can have a number of capture groups for which subsequent parsers can be defined.
Expand Down Expand Up @@ -62,7 +56,7 @@ def __eq__(self, other: object) -> bool:
def __repr__(self) -> str:
return f"@capture<{self.key}>"

def dispatch(self) -> list[Element]:
def dispatch(self) -> list[Capture | ContentElement]:
"""Dispatches the remaining parse of the capture group."""
elements = []
for group_id, parser in self.parsers.items():
Expand Down Expand Up @@ -107,20 +101,20 @@ def dispatch(self) -> list[Element]:


def dispatch_list(
pending_elements: list[Element], parent: ContentElement | None = None
) -> list[Element]:
pending_elements: list[Capture | ContentElement], parent: ContentElement | None = None
) -> list[ContentElement]:
"""Dispatches all captured parsers in the list."""
elements = []
for item in pending_elements:
if isinstance(item, Capture):
captured_elements = dispatch_list(item.dispatch())
captured_elements: list[ContentElement] = dispatch_list(item.dispatch())
elements.extend(captured_elements)
elif item != parent:
elements.append(item)
return elements


class ContentElement(Element):
class ContentElement:
"""The base grammar element object."""

def __init__(
Expand All @@ -129,7 +123,7 @@ def __init__(
grammar: dict,
content: str,
characters: dict[POS, str],
children: list[Element] | None = None,
children: list[Capture | ContentElement] | None = None,
) -> None:
if children is None:
children = []
Expand All @@ -138,11 +132,15 @@ def __init__(
self.content = content
self.characters = characters
self._children_pending = children
self._children_dispached: list[Element] = []
self._children_dispached: list[ContentElement] = []
self._dispatched_children: bool = False

@property
def children(self) -> list[Element]:
def _subelements(self) -> list[ContentElement]:
return self.children

@property
def children(self) -> list[ContentElement]:
"Children elements"
if self._children_pending:
if not self._dispatched_children:
Expand Down Expand Up @@ -172,6 +170,38 @@ def to_dict(self, verbosity: int = -1, all_content: bool = False, **kwargs) -> d
)
return out_dict

def find(
self, tokens: str | list[str], verbosity: int = -1, stack: list[str] | None = None
) -> Generator[tuple[ContentElement, list[str]], None, None]:
"""Find the next subelement that match the input token(s).
The find method will return a generator that globs though the element-tree, searching for the next
subelement that matches the given token.
"""
if isinstance(tokens, str):
tokens = [tokens]
if stack is None:
stack = []
stack += [self.token]

if verbosity:
verbosity -= 1
for child in self._subelements:
if child.token in tokens or tokens == ["*"]:
yield child, [e for e in stack]
if verbosity:
nested_generator = child.find(
tokens, verbosity=verbosity - 1, stack=[e for e in stack]
)
yield from nested_generator
return None

def findall(
self, tokens: str | list[str], verbosity: int = -1
) -> list[tuple[ContentElement, list[str]]]:
"""Returns subelements that match the input token(s)."""
return list(self.find(tokens, verbosity=verbosity))

def flatten(self) -> list[tuple[tuple[int, int], str, list[str]]]:
"""Converts the object to a flattened array of tokens per index."""
token_dict = self._token_by_index(defaultdict(list))
Expand Down Expand Up @@ -238,8 +268,8 @@ class ContentBlockElement(ContentElement):

def __init__(
self,
begin: list[Element] | None = None,
end: list[Element] | None = None,
begin: list[Capture | ContentElement] | None = None,
end: list[Capture | ContentElement] | None = None,
**kwargs,
) -> None:
if end is None:
Expand All @@ -249,13 +279,17 @@ def __init__(
super().__init__(**kwargs)
self._begin_pending = begin
self._end_pending = end
self._begin_dispached: list[Element] = []
self._end_dispached: list[Element] = []
self._begin_dispached: list[ContentElement] = []
self._end_dispached: list[ContentElement] = []
self._dispatched_begin: bool = False
self._dispatched_end: bool = False

@property
def begin(self) -> list[Element]:
def _subelements(self) -> list[ContentElement]:
return self.begin + self.children + self.end

@property
def begin(self) -> list[ContentElement]:
"Begin elements"
if self._begin_pending:
if not self._dispatched_begin:
Expand All @@ -266,7 +300,7 @@ def begin(self) -> list[Element]:
return []

@property
def end(self) -> list[Element]:
def end(self) -> list[ContentElement]:
"End elements"
if self._end_pending:
if not self._dispatched_end:
Expand Down
8 changes: 4 additions & 4 deletions textmate_grammar/language.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

from .elements import Element
from .elements import Capture, ContentElement
from .exceptions import IncompatibleFileType
from .handler import POS, ContentHandler
from .logger import LOGGER
Expand Down Expand Up @@ -80,7 +80,7 @@ def _initialize_repository(self):

super()._initialize_repository()

def parse_file(self, filePath: str | Path, **kwargs) -> Element | None:
def parse_file(self, filePath: str | Path, **kwargs) -> Capture | ContentElement | None:
"""Parses an entire file with the current grammar"""
if type(filePath) != Path:
filePath = Path(filePath)
Expand All @@ -102,15 +102,15 @@ def parse_string(self, input: str, **kwargs):
LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths))
return self._parse_language(handler, **kwargs)

def _parse_language(self, handler: ContentHandler, **kwargs) -> Element | None:
def _parse_language(self, handler: ContentHandler, **kwargs) -> Capture | ContentElement | None:
"""Parses the current stream with the language scope."""

parsed, elements, _ = self.parse(handler, (0, 0), **kwargs)
return elements[0] if parsed else None

def _parse(
self, handler: ContentHandler, starting: POS, **kwargs
) -> tuple[bool, list[Element], tuple[int, int]]:
) -> tuple[bool, list[Capture | ContentElement], tuple[int, int]]:
kwargs.pop("find_one", None)
return super()._parse(handler, starting, find_one=False, **kwargs)

Expand Down
28 changes: 14 additions & 14 deletions textmate_grammar/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import onigurumacffi as re

from .elements import Capture, ContentBlockElement, ContentElement, Element
from .elements import Capture, ContentBlockElement, ContentElement
from .exceptions import IncludedParserNotFound
from .handler import POS, ContentHandler, Pattern
from .logger import LOGGER, track_depth
Expand Down Expand Up @@ -82,7 +82,7 @@ def _parse(
handler: ContentHandler,
starting: POS,
**kwargs,
) -> tuple[bool, list[Element], tuple[int, int] | None]:
) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
"""The abstract method which all parsers much implement
The _parse method is called by parse, which will additionally parse any nested Capture elements.
Expand All @@ -104,7 +104,7 @@ def parse(
starting: POS = (0, 0),
boundary: POS | None = None,
**kwargs,
) -> tuple[bool, list[Element], tuple[int, int] | None]:
) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
"""The method to parse a handler using the current grammar."""
if not self.initialized and self.language is not None:
self.language._initialize_repository()
Expand All @@ -120,7 +120,7 @@ def match_and_capture(
parsers: dict[int, GrammarParser] | None = None,
parent_capture: Capture | None = None,
**kwargs,
) -> tuple[tuple[POS, POS] | None, str, list[Element]]:
) -> tuple[tuple[POS, POS] | None, str, list[Capture | ContentElement]]:
"""Matches a pattern and its capture groups.
Matches the pattern on the handler between the starting and boundary positions. If a pattern is matched,
Expand Down Expand Up @@ -170,13 +170,13 @@ def _parse(
starting: POS,
boundary: POS,
**kwargs,
) -> tuple[bool, list[Element], tuple[POS, POS] | None]:
) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
"""The parse method for grammars for which only the token is provided.
When no regex patterns are provided. The element is created between the initial and boundary positions.
"""
content = handler.read_pos(starting, boundary)
elements: list[Element] = [
elements: list[Capture | ContentElement] = [
ContentElement(
token=self.token,
grammar=self.grammar,
Expand Down Expand Up @@ -228,7 +228,7 @@ def _parse(
starting: POS,
boundary: POS,
**kwargs,
) -> tuple[bool, list[Element], tuple[POS, POS] | None]:
) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
"""The parse method for grammars for which a match pattern is provided."""

span, content, captures = self.match_and_capture(
Expand Down Expand Up @@ -257,7 +257,7 @@ def _parse(
)

if self.token:
elements: list[Element] = [
elements: list[Capture | ContentElement] = [
ContentElement(
token=self.token,
grammar=self.grammar,
Expand Down Expand Up @@ -318,14 +318,14 @@ def _parse(
greedy: bool = False,
find_one: bool = True,
**kwargs,
) -> tuple[bool, list[Element], tuple[POS, POS]]:
) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS]]:
"""The parse method for grammars for which a match pattern is provided."""

if boundary is None:
boundary = (len(handler.lines) - 1, handler.line_lengths[-1])

parsed = False
elements: list[Element] = []
elements: list[Capture | ContentElement] = []
patterns = [parser for parser in self.patterns if not parser.disabled]

current = (starting[0], starting[1])
Expand Down Expand Up @@ -499,7 +499,7 @@ def _parse(
boundary: POS,
greedy: bool = False,
**kwargs,
) -> tuple[bool, list[Element], tuple[POS, POS] | None]:
) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
"""The parse method for grammars for which a begin/end pattern is provided."""

begin_span, _, begin_elements = self.match_and_capture(
Expand Down Expand Up @@ -533,8 +533,8 @@ def _parse(
boundary = (len(handler.lines) - 1, handler.line_lengths[-1])

# Define loop parameters
end_elements: list[Element] = []
mid_elements: list[Element] = []
end_elements: list[Capture | ContentElement] = []
mid_elements: list[Capture | ContentElement] = []
patterns = [parser for parser in self.patterns if not parser.disabled]
first_run = True

Expand Down Expand Up @@ -788,7 +788,7 @@ def _parse(

# Construct output elements
if self.token:
elements: list[Element] = [
elements: list[Capture | ContentElement] = [
ContentBlockElement(
token=self.token,
grammar=self.grammar,
Expand Down

0 comments on commit 330509f

Please sign in to comment.