diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..f6b71dd --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,45 @@ +name: documentation + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - 'src/**.py' + - 'docs/**' + - '.github/workflows/docs.yml' + +permissions: + contents: write + +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install poetry + uses: abatilo/actions-poetry@v3 + + - name: Install dependencies + run: | + poetry install --without test,dev + + - name: Sphinx build + run: | + source $(poetry env info --path)/bin/activate + cd docs && make html + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_build/html + force_orphan: true \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e56e8a9..d3b61ad 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,23 +1,23 @@ -Install the repository after cloning with [poetry](https://python-poetry.org/), and setup [pre-commit](https://pre-commit.com/) such that code is linted and formatted with [Ruff](https://docs.astral.sh/ruff/). +Install the repository after cloning with [poetry](https://python-poetry.org/), and setup [pre-commit](https://pre-commit.com/) such that code is linted and formatted with [Ruff](https://docs.astral.sh/ruff/) and checked with [mypy](https://mypy-lang.org/). ```bash -> pip install poetry -> cd textmate-grammar-python -> poetry install -> pre-commit install +pip install poetry +cd textmate-grammar-python +poetry install +pre-commit install ``` Run unit tests ```bash -> tox run +tox run ``` Run static type checker ```bash -> tox run -e mypy +tox run -e mypy ``` -Run regression testing against vscode-textmate (will install npm and required packages) +Run regression testing against vscode-textmate (will install npm and required packages). ```bash -> tox run -e regression +tox run -e regression ``` diff --git a/README.md b/README.md index 47af605..c8a5d47 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,35 @@ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) [![Python versions](https://img.shields.io/pypi/pyversions/textmate-grammar-python.svg)](https://pypi.python.org/pypi/textmate-grammar-python) [![CI/CD](https://github.com/watermarkhu/textmate-grammar-python/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/watermarkhu/textmate-grammar-python/blob/main/.github/workflows/ci.yml) - +[![readthedocs](https://readthedocs.org/projects/textmate-grammar-python/badge/?version=latest)](https://textmate-grammar-python.readthedocs.io) # textmate-grammar-python -A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format. +A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python. + +Textmate grammars are made for [vscode-texmate](https://github.com/microsoft/vscode-textmate), allowing for syntax highlighting in VSCode after tokenization. This presents textmate-grammar-python with a large list of potentially supported languages. + +```mermaid +flowchart TD + A[grammar file] + Z[code] + B("`vscode-textmate **js**`") + C("`textmate-grammar-**python**`") + D[tokens] + click C "https://github.com/microsoft/vscode-textmate" + + Z --> B + Z --> C + A -.-> B --> D + A -.-> C --> D +``` ## Usage -Install the module using `pip install textmate-grammar-python`. +Install the module with: +```bash +pip install textmate-grammar-python +``` Before tokenization is possible, a `LanguageParser` needs to be initialized using a loaded grammar. @@ -23,9 +43,9 @@ from textmate_grammar.grammars import matlab parser = LanguageParser(matlab.GRAMMAR) ``` -After this, one can either choose to call `parser.parsing_string` to parse a input string directly, or call `parser.parse_file` with the path to the appropiate source file as the first argument, such as the the example `example.py`. +After this, one can either choose to call [`parser.parsing_string`](https://textmate-grammar-python.readthedocs.io/en/latest/apidocs/textmate_grammar/textmate_grammar.language.html#textmate_grammar.language.LanguageParser.parse_string) to parse a input string directly, or call [`parser.parse_file`](https://textmate-grammar-python.readthedocs.io/en/latest/apidocs/textmate_grammar/textmate_grammar.language.html#textmate_grammar.language.LanguageParser.parse_file) with the path to the appropiate source file as the first argument, such as in the example [`example.py`](https://github.com/watermarkhu/textmate-grammar-python/blob/main/example.py). -The parsed `element` object can be displayed directly by calling the `print` method. By default the element is printed as an element tree in a dictionary format. +The parsed `element` object can be displayed directly by calling the [`print`](https://textmate-grammar-python.readthedocs.io/en/latest/apidocs/textmate_grammar/textmate_grammar.elements.html#textmate_grammar.elements.ContentElement.print) method. By default the element is printed as an element tree in a dictionary format. ```python >>> element = parser.parse_string("value = num2str(10);") @@ -59,9 +79,7 @@ Alternatively, with the keyword argument `flatten` the element is displayed as a [(0, 19), ';', ['source.matlab', 'punctuation.terminator.semicolon.matlab']]] ``` -## Supported Languages -- [MATLAB](https://github.com/mathworks/MATLAB-Language-grammar) - -## TODO -- Implement Begin/While pattern, required for other grammars. +## Information +- For further information, please checkout the [documentation](https://textmate-grammar-python.readthedocs.io/en/latest/). +- To setup an environment for development, see [CONTRIBUTING.md](https://github.com/watermarkhu/textmate-grammar-python/blob/main/CONTRIBUTING.md) \ No newline at end of file diff --git a/docs/apidocs/index.rst b/docs/apidocs/index.rst deleted file mode 100644 index 2ec03ef..0000000 --- a/docs/apidocs/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -API Reference -============= - -This page contains auto-generated API reference documentation [#f1]_. - -.. toctree:: - :titlesonly: - - textmate_grammar/textmate_grammar - -.. [#f1] Created with `sphinx-autodoc2 `_ diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.cache.rst b/docs/apidocs/textmate_grammar/textmate_grammar.cache.rst deleted file mode 100644 index 997c238..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.cache.rst +++ /dev/null @@ -1,147 +0,0 @@ -:py:mod:`textmate_grammar.cache` -================================ - -.. py:module:: textmate_grammar.cache - -.. autodoc2-docstring:: textmate_grammar.cache - :allowtitles: - -Module Contents ---------------- - -Classes -~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`TextmateCache ` - - .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache - :summary: - * - :py:obj:`SimpleCache ` - - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache - :summary: - * - :py:obj:`ShelveCache ` - - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache - :summary: - -Functions -~~~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`init_cache ` - - .. autodoc2-docstring:: textmate_grammar.cache.init_cache - :summary: - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`CACHE_DIR ` - - .. autodoc2-docstring:: textmate_grammar.cache.CACHE_DIR - :summary: - * - :py:obj:`CACHE ` - - .. autodoc2-docstring:: textmate_grammar.cache.CACHE - :summary: - -API -~~~ - -.. py:data:: CACHE_DIR - :canonical: textmate_grammar.cache.CACHE_DIR - :value: 'resolve(...)' - - .. autodoc2-docstring:: textmate_grammar.cache.CACHE_DIR - -.. py:class:: TextmateCache - :canonical: textmate_grammar.cache.TextmateCache - - Bases: :py:obj:`typing.Protocol` - - .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache - - .. py:method:: cache_valid(filepath: pathlib.Path) -> bool - :canonical: textmate_grammar.cache.TextmateCache.cache_valid - - .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache.cache_valid - - .. py:method:: load(filepath: pathlib.Path) -> textmate_grammar.elements.ContentElement - :canonical: textmate_grammar.cache.TextmateCache.load - - .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache.load - - .. py:method:: save(filePath: pathlib.Path, element: textmate_grammar.elements.ContentElement) -> None - :canonical: textmate_grammar.cache.TextmateCache.save - - .. autodoc2-docstring:: textmate_grammar.cache.TextmateCache.save - -.. py:class:: SimpleCache() - :canonical: textmate_grammar.cache.SimpleCache - - Bases: :py:obj:`textmate_grammar.cache.TextmateCache` - - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.__init__ - - .. py:method:: cache_valid(filepath: pathlib.Path) -> bool - :canonical: textmate_grammar.cache.SimpleCache.cache_valid - - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.cache_valid - - .. py:method:: load(filepath: pathlib.Path) -> textmate_grammar.elements.ContentElement - :canonical: textmate_grammar.cache.SimpleCache.load - - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.load - - .. py:method:: save(filepath: pathlib.Path, element: textmate_grammar.elements.ContentElement) -> None - :canonical: textmate_grammar.cache.SimpleCache.save - - .. autodoc2-docstring:: textmate_grammar.cache.SimpleCache.save - -.. py:class:: ShelveCache() - :canonical: textmate_grammar.cache.ShelveCache - - Bases: :py:obj:`textmate_grammar.cache.TextmateCache` - - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.__init__ - - .. py:method:: cache_valid(filepath: pathlib.Path) -> bool - :canonical: textmate_grammar.cache.ShelveCache.cache_valid - - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.cache_valid - - .. py:method:: load(filepath: pathlib.Path) -> textmate_grammar.elements.ContentElement - :canonical: textmate_grammar.cache.ShelveCache.load - - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.load - - .. py:method:: save(filepath: pathlib.Path, element: textmate_grammar.elements.ContentElement) -> None - :canonical: textmate_grammar.cache.ShelveCache.save - - .. autodoc2-docstring:: textmate_grammar.cache.ShelveCache.save - -.. py:data:: CACHE - :canonical: textmate_grammar.cache.CACHE - :type: textmate_grammar.cache.TextmateCache - :value: 'SimpleCache(...)' - - .. autodoc2-docstring:: textmate_grammar.cache.CACHE - -.. py:function:: init_cache(type: str = 'simple') -> textmate_grammar.cache.TextmateCache - :canonical: textmate_grammar.cache.init_cache - - .. autodoc2-docstring:: textmate_grammar.cache.init_cache diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.elements.rst b/docs/apidocs/textmate_grammar/textmate_grammar.elements.rst deleted file mode 100644 index 70d248f..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.elements.rst +++ /dev/null @@ -1,129 +0,0 @@ -:py:mod:`textmate_grammar.elements` -=================================== - -.. py:module:: textmate_grammar.elements - -.. autodoc2-docstring:: textmate_grammar.elements - :allowtitles: - -Module Contents ---------------- - -Classes -~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`Capture ` - - .. autodoc2-docstring:: textmate_grammar.elements.Capture - :summary: - * - :py:obj:`ContentElement ` - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement - :summary: - * - :py:obj:`ContentBlockElement ` - - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement - :summary: - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`TOKEN_DICT ` - - .. autodoc2-docstring:: textmate_grammar.elements.TOKEN_DICT - :summary: - -API -~~~ - -.. py:data:: TOKEN_DICT - :canonical: textmate_grammar.elements.TOKEN_DICT - :value: None - - .. autodoc2-docstring:: textmate_grammar.elements.TOKEN_DICT - -.. py:class:: Capture(handler: textmate_grammar.handler.ContentHandler, pattern: textmate_grammar.handler.Pattern, matching: textmate_grammar.handler.Match, parsers: dict[int, textmate_grammar.parser.GrammarParser], starting: tuple[int, int], boundary: tuple[int, int], key: str = '', **kwargs) - :canonical: textmate_grammar.elements.Capture - - .. autodoc2-docstring:: textmate_grammar.elements.Capture - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.elements.Capture.__init__ - - .. py:method:: dispatch() -> list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] - :canonical: textmate_grammar.elements.Capture.dispatch - - .. autodoc2-docstring:: textmate_grammar.elements.Capture.dispatch - -.. py:class:: ContentElement(token: str, grammar: dict, content: str, characters: dict[textmate_grammar.handler.POS, str], children: list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] | None = None) - :canonical: textmate_grammar.elements.ContentElement - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.__init__ - - .. py:property:: children - :canonical: textmate_grammar.elements.ContentElement.children - :type: list[textmate_grammar.elements.ContentElement] - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.children - - .. py:method:: find(tokens: str | list[str], start_tokens: str | list[str] = '', hide_tokens: str | list[str] = '', stop_tokens: str | list[str] = '', depth: int = -1, attribute: str = '_subelements', stack: list[str] | None = None) -> typing.Generator[tuple[textmate_grammar.elements.ContentElement, list[str]], None, None] - :canonical: textmate_grammar.elements.ContentElement.find - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.find - - .. py:method:: findall(tokens: str | list[str], start_tokens: str | list[str] = '', hide_tokens: str | list[str] = '', stop_tokens: str | list[str] = '', depth: int = -1, attribute: str = '_subelements') -> list[tuple[textmate_grammar.elements.ContentElement, list[str]]] - :canonical: textmate_grammar.elements.ContentElement.findall - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.findall - - .. py:method:: to_dict(depth: int = -1, all_content: bool = False, **kwargs) -> dict - :canonical: textmate_grammar.elements.ContentElement.to_dict - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.to_dict - - .. py:method:: flatten() -> list[tuple[tuple[int, int], str, list[str]]] - :canonical: textmate_grammar.elements.ContentElement.flatten - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.flatten - - .. py:method:: print(flatten: bool = False, depth: int = -1, all_content: bool = False, **kwargs) -> None - :canonical: textmate_grammar.elements.ContentElement.print - - .. autodoc2-docstring:: textmate_grammar.elements.ContentElement.print - -.. py:class:: ContentBlockElement(begin: list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] | None = None, end: list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement] | None = None, **kwargs) - :canonical: textmate_grammar.elements.ContentBlockElement - - Bases: :py:obj:`textmate_grammar.elements.ContentElement` - - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.__init__ - - .. py:property:: begin - :canonical: textmate_grammar.elements.ContentBlockElement.begin - :type: list[textmate_grammar.elements.ContentElement] - - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.begin - - .. py:property:: end - :canonical: textmate_grammar.elements.ContentBlockElement.end - :type: list[textmate_grammar.elements.ContentElement] - - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.end - - .. py:method:: to_dict(depth: int = -1, all_content: bool = False, **kwargs) -> dict - :canonical: textmate_grammar.elements.ContentBlockElement.to_dict - - .. autodoc2-docstring:: textmate_grammar.elements.ContentBlockElement.to_dict diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.exceptions.rst b/docs/apidocs/textmate_grammar/textmate_grammar.exceptions.rst deleted file mode 100644 index b1e7b4c..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.exceptions.rst +++ /dev/null @@ -1,68 +0,0 @@ -:py:mod:`textmate_grammar.exceptions` -===================================== - -.. py:module:: textmate_grammar.exceptions - -.. autodoc2-docstring:: textmate_grammar.exceptions - :allowtitles: - -Module Contents ---------------- - -API -~~~ - -.. py:exception:: IncludedParserNotFound(key: str = 'UNKNOWN', **kwargs) - :canonical: textmate_grammar.exceptions.IncludedParserNotFound - - Bases: :py:obj:`Exception` - - .. autodoc2-docstring:: textmate_grammar.exceptions.IncludedParserNotFound - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.exceptions.IncludedParserNotFound.__init__ - -.. py:exception:: IncompatibleFileType(extensions: list[str], **kwargs) - :canonical: textmate_grammar.exceptions.IncompatibleFileType - - Bases: :py:obj:`Exception` - - .. autodoc2-docstring:: textmate_grammar.exceptions.IncompatibleFileType - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.exceptions.IncompatibleFileType.__init__ - -.. py:exception:: FileNotFound(file: str, **kwargs) - :canonical: textmate_grammar.exceptions.FileNotFound - - Bases: :py:obj:`Exception` - - .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotFound - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotFound.__init__ - -.. py:exception:: FileNotParsed(file: str, **kwargs) - :canonical: textmate_grammar.exceptions.FileNotParsed - - Bases: :py:obj:`Exception` - - .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotParsed - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.exceptions.FileNotParsed.__init__ - -.. py:exception:: ImpossibleSpan(**kwargs) - :canonical: textmate_grammar.exceptions.ImpossibleSpan - - Bases: :py:obj:`Exception` - - .. autodoc2-docstring:: textmate_grammar.exceptions.ImpossibleSpan - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.exceptions.ImpossibleSpan.__init__ diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.markdown.rst b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.markdown.rst deleted file mode 100644 index 58fc6d6..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.markdown.rst +++ /dev/null @@ -1,39 +0,0 @@ -:py:mod:`textmate_grammar.grammars.markdown` -============================================ - -.. py:module:: textmate_grammar.grammars.markdown - -.. autodoc2-docstring:: textmate_grammar.grammars.markdown - :allowtitles: - -Package Contents ----------------- - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`tmLanguageFile ` - - .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageFile - :summary: - * - :py:obj:`tmLanguageYAML ` - - .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageYAML - :summary: - -API -~~~ - -.. py:data:: tmLanguageFile - :canonical: textmate_grammar.grammars.markdown.tmLanguageFile - :value: None - - .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageFile - -.. py:data:: tmLanguageYAML - :canonical: textmate_grammar.grammars.markdown.tmLanguageYAML - :value: None - - .. autodoc2-docstring:: textmate_grammar.grammars.markdown.tmLanguageYAML diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.matlab.rst b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.matlab.rst deleted file mode 100644 index 95bfd56..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.matlab.rst +++ /dev/null @@ -1,39 +0,0 @@ -:py:mod:`textmate_grammar.grammars.matlab` -========================================== - -.. py:module:: textmate_grammar.grammars.matlab - -.. autodoc2-docstring:: textmate_grammar.grammars.matlab - :allowtitles: - -Package Contents ----------------- - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`tmLanguageFile ` - - .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageFile - :summary: - * - :py:obj:`tmLanguageYAML ` - - .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageYAML - :summary: - -API -~~~ - -.. py:data:: tmLanguageFile - :canonical: textmate_grammar.grammars.matlab.tmLanguageFile - :value: None - - .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageFile - -.. py:data:: tmLanguageYAML - :canonical: textmate_grammar.grammars.matlab.tmLanguageYAML - :value: None - - .. autodoc2-docstring:: textmate_grammar.grammars.matlab.tmLanguageYAML diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.rst b/docs/apidocs/textmate_grammar/textmate_grammar.grammars.rst deleted file mode 100644 index 81e1433..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.grammars.rst +++ /dev/null @@ -1,17 +0,0 @@ -:py:mod:`textmate_grammar.grammars` -=================================== - -.. py:module:: textmate_grammar.grammars - -.. autodoc2-docstring:: textmate_grammar.grammars - :allowtitles: - -Subpackages ------------ - -.. toctree:: - :titlesonly: - :maxdepth: 3 - - textmate_grammar.grammars.markdown - textmate_grammar.grammars.matlab diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.handler.rst b/docs/apidocs/textmate_grammar/textmate_grammar.handler.rst deleted file mode 100644 index 6543ea1..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.handler.rst +++ /dev/null @@ -1,102 +0,0 @@ -:py:mod:`textmate_grammar.handler` -================================== - -.. py:module:: textmate_grammar.handler - -.. autodoc2-docstring:: textmate_grammar.handler - :allowtitles: - -Module Contents ---------------- - -Classes -~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`ContentHandler ` - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler - :summary: - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`POS ` - - .. autodoc2-docstring:: textmate_grammar.handler.POS - :summary: - -API -~~~ - -.. py:data:: POS - :canonical: textmate_grammar.handler.POS - :value: None - - .. autodoc2-docstring:: textmate_grammar.handler.POS - -.. py:class:: ContentHandler(source: str) - :canonical: textmate_grammar.handler.ContentHandler - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.__init__ - - .. py:attribute:: notLookForwardEOL - :canonical: textmate_grammar.handler.ContentHandler.notLookForwardEOL - :value: 'compile(...)' - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.notLookForwardEOL - - .. py:method:: from_path(file_path: pathlib.Path) - :canonical: textmate_grammar.handler.ContentHandler.from_path - :classmethod: - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.from_path - - .. py:method:: next(pos: textmate_grammar.handler.POS, step: int = 1) -> textmate_grammar.handler.POS - :canonical: textmate_grammar.handler.ContentHandler.next - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.next - - .. py:method:: prev(pos: textmate_grammar.handler.POS, step: int = 1) -> textmate_grammar.handler.POS - :canonical: textmate_grammar.handler.ContentHandler.prev - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.prev - - .. py:method:: range(start: textmate_grammar.handler.POS, close: textmate_grammar.handler.POS) -> list[textmate_grammar.handler.POS] - :canonical: textmate_grammar.handler.ContentHandler.range - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.range - - .. py:method:: chars(start: textmate_grammar.handler.POS, close: textmate_grammar.handler.POS) -> dict[textmate_grammar.handler.POS, str] - :canonical: textmate_grammar.handler.ContentHandler.chars - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.chars - - .. py:method:: read_pos(start_pos: textmate_grammar.handler.POS, close_pos: textmate_grammar.handler.POS, skip_newline: bool = True) -> str - :canonical: textmate_grammar.handler.ContentHandler.read_pos - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.read_pos - - .. py:method:: read_line(pos: textmate_grammar.handler.POS) -> str - :canonical: textmate_grammar.handler.ContentHandler.read_line - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.read_line - - .. py:method:: read(start_pos: textmate_grammar.handler.POS, length: int = 1, skip_newline: bool = True) -> str - :canonical: textmate_grammar.handler.ContentHandler.read - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.read - - .. py:method:: search(pattern: onigurumacffi._Pattern, starting: textmate_grammar.handler.POS, boundary: textmate_grammar.handler.POS | None = None, greedy: bool = False, **kwargs) -> tuple[onigurumacffi._Match | None, tuple[textmate_grammar.handler.POS, textmate_grammar.handler.POS] | None] - :canonical: textmate_grammar.handler.ContentHandler.search - - .. autodoc2-docstring:: textmate_grammar.handler.ContentHandler.search diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.language.rst b/docs/apidocs/textmate_grammar/textmate_grammar.language.rst deleted file mode 100644 index f644977..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.language.rst +++ /dev/null @@ -1,76 +0,0 @@ -:py:mod:`textmate_grammar.language` -=================================== - -.. py:module:: textmate_grammar.language - -.. autodoc2-docstring:: textmate_grammar.language - :allowtitles: - -Module Contents ---------------- - -Classes -~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`DummyParser ` - - .. autodoc2-docstring:: textmate_grammar.language.DummyParser - :summary: - * - :py:obj:`LanguageParser ` - - .. autodoc2-docstring:: textmate_grammar.language.LanguageParser - :summary: - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`LANGUAGE_PARSERS ` - - .. autodoc2-docstring:: textmate_grammar.language.LANGUAGE_PARSERS - :summary: - -API -~~~ - -.. py:data:: LANGUAGE_PARSERS - :canonical: textmate_grammar.language.LANGUAGE_PARSERS - :value: None - - .. autodoc2-docstring:: textmate_grammar.language.LANGUAGE_PARSERS - -.. py:class:: DummyParser() - :canonical: textmate_grammar.language.DummyParser - - Bases: :py:obj:`textmate_grammar.parser.GrammarParser` - - .. autodoc2-docstring:: textmate_grammar.language.DummyParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.language.DummyParser.__init__ - -.. py:class:: LanguageParser(grammar: dict, **kwargs) - :canonical: textmate_grammar.language.LanguageParser - - Bases: :py:obj:`textmate_grammar.parser.PatternsParser` - - .. autodoc2-docstring:: textmate_grammar.language.LanguageParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.language.LanguageParser.__init__ - - .. py:method:: parse_file(filePath: str | pathlib.Path, **kwargs) -> textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement | None - :canonical: textmate_grammar.language.LanguageParser.parse_file - - .. autodoc2-docstring:: textmate_grammar.language.LanguageParser.parse_file - - .. py:method:: parse_string(input: str, **kwargs) - :canonical: textmate_grammar.language.LanguageParser.parse_string - - .. autodoc2-docstring:: textmate_grammar.language.LanguageParser.parse_string diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.logger.rst b/docs/apidocs/textmate_grammar/textmate_grammar.logger.rst deleted file mode 100644 index d2b25d4..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.logger.rst +++ /dev/null @@ -1,183 +0,0 @@ -:py:mod:`textmate_grammar.logger` -================================= - -.. py:module:: textmate_grammar.logger - -.. autodoc2-docstring:: textmate_grammar.logger - :allowtitles: - -Module Contents ---------------- - -Classes -~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`LogFormatter ` - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter - :summary: - * - :py:obj:`Logger ` - - .. autodoc2-docstring:: textmate_grammar.logger.Logger - :summary: - -Functions -~~~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`track_depth ` - - .. autodoc2-docstring:: textmate_grammar.logger.track_depth - :summary: - -Data -~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`MAX_LENGTH ` - - .. autodoc2-docstring:: textmate_grammar.logger.MAX_LENGTH - :summary: - * - :py:obj:`LOGGER ` - - .. autodoc2-docstring:: textmate_grammar.logger.LOGGER - :summary: - -API -~~~ - -.. py:data:: MAX_LENGTH - :canonical: textmate_grammar.logger.MAX_LENGTH - :value: 79 - - .. autodoc2-docstring:: textmate_grammar.logger.MAX_LENGTH - -.. py:function:: track_depth(func) - :canonical: textmate_grammar.logger.track_depth - - .. autodoc2-docstring:: textmate_grammar.logger.track_depth - -.. py:class:: LogFormatter(fmt=None, datefmt=None, style='%', validate=True, *, defaults=None) - :canonical: textmate_grammar.logger.LogFormatter - - Bases: :py:obj:`logging.Formatter` - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.__init__ - - .. py:attribute:: green - :canonical: textmate_grammar.logger.LogFormatter.green - :value: '\x1b[32;32m' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.green - - .. py:attribute:: grey - :canonical: textmate_grammar.logger.LogFormatter.grey - :value: '\x1b[38;20m' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.grey - - .. py:attribute:: yellow - :canonical: textmate_grammar.logger.LogFormatter.yellow - :value: '\x1b[33;20m' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.yellow - - .. py:attribute:: red - :canonical: textmate_grammar.logger.LogFormatter.red - :value: '\x1b[31;20m' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.red - - .. py:attribute:: bold_red - :canonical: textmate_grammar.logger.LogFormatter.bold_red - :value: '\x1b[31;1m' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.bold_red - - .. py:attribute:: reset - :canonical: textmate_grammar.logger.LogFormatter.reset - :value: '\x1b[0m' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.reset - - .. py:attribute:: format_string - :canonical: textmate_grammar.logger.LogFormatter.format_string - :value: '%(name)s:%(message)s' - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.format_string - - .. py:attribute:: FORMATS - :canonical: textmate_grammar.logger.LogFormatter.FORMATS - :value: None - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.FORMATS - - .. py:method:: format(record) - :canonical: textmate_grammar.logger.LogFormatter.format - - .. autodoc2-docstring:: textmate_grammar.logger.LogFormatter.format - -.. py:class:: Logger(**kwargs) - :canonical: textmate_grammar.logger.Logger - - .. autodoc2-docstring:: textmate_grammar.logger.Logger - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.__init__ - - .. py:attribute:: long_msg_div - :canonical: textmate_grammar.logger.Logger.long_msg_div - :value: '\x1b[1;32m ... \x1b[0m' - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.long_msg_div - - .. py:method:: configure(parser: textmate_grammar.parser.GrammarParser, height: int, width: int, **kwargs) -> None - :canonical: textmate_grammar.logger.Logger.configure - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.configure - - .. py:method:: format_message(message: str, parser: typing.Optional[textmate_grammar.parser.GrammarParser] = None, position: tuple[int, int] | None = None, depth: int = 0) -> str - :canonical: textmate_grammar.logger.Logger.format_message - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.format_message - - .. py:method:: debug(*args, **kwargs) -> None - :canonical: textmate_grammar.logger.Logger.debug - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.debug - - .. py:method:: info(*args, **kwargs) -> None - :canonical: textmate_grammar.logger.Logger.info - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.info - - .. py:method:: warning(*args, **kwargs) -> None - :canonical: textmate_grammar.logger.Logger.warning - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.warning - - .. py:method:: error(*args, **kwargs) -> None - :canonical: textmate_grammar.logger.Logger.error - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.error - - .. py:method:: critical(*args, **kwargs) -> None - :canonical: textmate_grammar.logger.Logger.critical - - .. autodoc2-docstring:: textmate_grammar.logger.Logger.critical - -.. py:data:: LOGGER - :canonical: textmate_grammar.logger.LOGGER - :value: 'Logger(...)' - - .. autodoc2-docstring:: textmate_grammar.logger.LOGGER diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.parser.rst b/docs/apidocs/textmate_grammar/textmate_grammar.parser.rst deleted file mode 100644 index cd9e265..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.parser.rst +++ /dev/null @@ -1,128 +0,0 @@ -:py:mod:`textmate_grammar.parser` -================================= - -.. py:module:: textmate_grammar.parser - -.. autodoc2-docstring:: textmate_grammar.parser - :allowtitles: - -Module Contents ---------------- - -Classes -~~~~~~~ - -.. list-table:: - :class: autosummary longtable - :align: left - - * - :py:obj:`GrammarParser ` - - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser - :summary: - * - :py:obj:`TokenParser ` - - .. autodoc2-docstring:: textmate_grammar.parser.TokenParser - :summary: - * - :py:obj:`MatchParser ` - - .. autodoc2-docstring:: textmate_grammar.parser.MatchParser - :summary: - * - :py:obj:`ParserHasPatterns ` - - - * - :py:obj:`PatternsParser ` - - .. autodoc2-docstring:: textmate_grammar.parser.PatternsParser - :summary: - * - :py:obj:`BeginEndParser ` - - .. autodoc2-docstring:: textmate_grammar.parser.BeginEndParser - :summary: - * - :py:obj:`BeginWhileParser ` - - .. autodoc2-docstring:: textmate_grammar.parser.BeginWhileParser - :summary: - -API -~~~ - -.. py:class:: GrammarParser(grammar: dict, language: textmate_grammar.language.LanguageParser | None = None, key: str = '', is_capture: bool = False, **kwargs) - :canonical: textmate_grammar.parser.GrammarParser - - Bases: :py:obj:`abc.ABC` - - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.__init__ - - .. py:method:: initialize(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.GrammarParser.initialize - :staticmethod: - - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.initialize - - .. py:method:: parse(handler: textmate_grammar.handler.ContentHandler, starting: textmate_grammar.handler.POS = (0, 0), boundary: textmate_grammar.handler.POS | None = None, **kwargs) -> tuple[bool, list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement], tuple[int, int] | None] - :canonical: textmate_grammar.parser.GrammarParser.parse - - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.parse - - .. py:method:: match_and_capture(handler: textmate_grammar.handler.ContentHandler, pattern: textmate_grammar.handler.Pattern, starting: textmate_grammar.handler.POS, boundary: textmate_grammar.handler.POS, parsers: dict[int, textmate_grammar.parser.GrammarParser] | None = None, parent_capture: textmate_grammar.elements.Capture | None = None, **kwargs) -> tuple[tuple[textmate_grammar.handler.POS, textmate_grammar.handler.POS] | None, str, list[textmate_grammar.elements.Capture | textmate_grammar.elements.ContentElement]] - :canonical: textmate_grammar.parser.GrammarParser.match_and_capture - - .. autodoc2-docstring:: textmate_grammar.parser.GrammarParser.match_and_capture - -.. py:class:: TokenParser(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.TokenParser - - Bases: :py:obj:`textmate_grammar.parser.GrammarParser` - - .. autodoc2-docstring:: textmate_grammar.parser.TokenParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.parser.TokenParser.__init__ - -.. py:class:: MatchParser(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.MatchParser - - Bases: :py:obj:`textmate_grammar.parser.GrammarParser` - - .. autodoc2-docstring:: textmate_grammar.parser.MatchParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.parser.MatchParser.__init__ - -.. py:class:: ParserHasPatterns(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.ParserHasPatterns - - Bases: :py:obj:`textmate_grammar.parser.GrammarParser`, :py:obj:`abc.ABC` - -.. py:class:: PatternsParser(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.PatternsParser - - Bases: :py:obj:`textmate_grammar.parser.ParserHasPatterns` - - .. autodoc2-docstring:: textmate_grammar.parser.PatternsParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.parser.PatternsParser.__init__ - -.. py:class:: BeginEndParser(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.BeginEndParser - - Bases: :py:obj:`textmate_grammar.parser.ParserHasPatterns` - - .. autodoc2-docstring:: textmate_grammar.parser.BeginEndParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.parser.BeginEndParser.__init__ - -.. py:class:: BeginWhileParser(grammar: dict, **kwargs) - :canonical: textmate_grammar.parser.BeginWhileParser - - Bases: :py:obj:`textmate_grammar.parser.PatternsParser` - - .. autodoc2-docstring:: textmate_grammar.parser.BeginWhileParser - - .. rubric:: Initialization - - .. autodoc2-docstring:: textmate_grammar.parser.BeginWhileParser.__init__ diff --git a/docs/apidocs/textmate_grammar/textmate_grammar.rst b/docs/apidocs/textmate_grammar/textmate_grammar.rst deleted file mode 100644 index 76025e0..0000000 --- a/docs/apidocs/textmate_grammar/textmate_grammar.rst +++ /dev/null @@ -1,31 +0,0 @@ -:py:mod:`textmate_grammar` -========================== - -.. py:module:: textmate_grammar - -.. autodoc2-docstring:: textmate_grammar - :allowtitles: - -Subpackages ------------ - -.. toctree:: - :titlesonly: - :maxdepth: 3 - - textmate_grammar.grammars - -Submodules ----------- - -.. toctree:: - :titlesonly: - :maxdepth: 1 - - textmate_grammar.logger - textmate_grammar.exceptions - textmate_grammar.elements - textmate_grammar.handler - textmate_grammar.language - textmate_grammar.cache - textmate_grammar.parser diff --git a/docs/conf.py b/docs/conf.py index f163cd2..a6fcc3a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -3,7 +3,7 @@ # -- Project information ----------------------------------------------------- project = "Texmate Grammar Python" -version = "0.2.0" +version = "0.3.0" copyright = f"{date.today().year}, Mark Shui Hu" author = "Mark Shui Hu" @@ -15,6 +15,7 @@ "sphinx.ext.intersphinx", "sphinx.ext.viewcode", "sphinx.ext.todo", + "sphinxcontrib.mermaid", ] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] intersphinx_mapping = { diff --git a/docs/element.md b/docs/element.md new file mode 100644 index 0000000..e6cde9a --- /dev/null +++ b/docs/element.md @@ -0,0 +1,101 @@ +# Element operations + +The [`parse_file`](#textmate_grammar.language.LanguageParser.parse_file) and [`parse_string`](#textmate_grammar.language.LanguageParser.parse_string) methods of the [`LanguageParser`](#textmate_grammar.language.LanguageParser) both return either `None` if the content could not be parsed, or a [`ContentElement`](#textmate_grammar.elements.ContentElement) or [`ContentBlockElement`](#textmate_grammar.elements.ContentBlockElement) if parsing was successful. + +```mermaid +classDiagram + direction LR + class ContentElement{ + +token : str + +grammar : dict + +content : str + +charaters : dict + +children : list + +find() + +findall() + +to_dict() + +flatten() + +print() + } + + class ContentBlockElement{ + +begin : list + +end : list + } + +ContentElement <|-- ContentBlockElement +``` + +Every matching is defined by its `grammar` and identifier `token`. The matching is available under `content` as string and under `characters` as a dictionary of single charaters, with the position in the source file as the dictionary keys. + +## Output standard types + +As illustrated on the [homepage](index.md), the [`print`](#textmate_grammar.elements.ContentElement.print) method prints the nested data structure in dictionary format on the command line. The dictionary output can also be requested through the [`to_dict`](#textmate_grammar.elements.ContentElement.to_dict) method. + + +Alternatively, with the [`flatten`](#textmate_grammar.elements.ContentElement.flatten) method, a list of tuples is returned with every entry representing the unique tokenized elements from start to finish: + +- tuple of line number, position on line +- string of the content +- list of tokens + +This representation is more akin to the output of [vscode-textmate](https://github.com/microsoft/vscode-textmate). The "flattened" output can be printed in the command line by the `flatten` keyword. + +```python +>>> element.print(flatten=True) + +[[(0, 0), 'value', ['source.matlab', 'meta.assignment.variable.single.matlab', 'variable.other.readwrite.matlab']], + [(0, 5), ' ', ['source.matlab']], + [(0, 6), '=', ['source.matlab', 'keyword.operator.assignment.matlab']], + [(0, 7), ' ', ['source.matlab']], + [(0, 8), 'num2str', ['source.matlab', 'meta.function-call.parens.matlab', 'entity.name.function.matlab']], + [(0, 15), '(', ['source.matlab', 'meta.function-call.parens.matlab', 'punctuation.section.parens.begin.matlab']], + [(0, 16), '10', ['source.matlab', 'meta.function-call.parens.matlab', 'constant.numeric.decimal.matlab']], + [(0, 18), ')', ['source.matlab', 'meta.function-call.parens.matlab', 'punctuation.section.parens.end.matlab']], + [(0, 19), ';', ['source.matlab', 'punctuation.terminator.semicolon.matlab']]] +``` + +## Accessing descendent elements + +To find specific descendent elements, instead of indexing manually through the `children` (or `begin` and `end`) attribute, use the provided methods [`find`](#textmate_grammar.elements.ContentElement.find), which yields the found descendent elements one by one, and [`findall`](#textmate_grammar.elements.ContentElement.findall), which returns all descendents as a list. + + +```mermaid +flowchart LR + style root stroke-width:0,fill:#F94144 + style ca stroke-width:0,fill:#577590 + style cb stroke-width:0,fill:#F3722C + style cc stroke-width:0,fill:#1372fC + classDef gca stroke-width:0,fill:#43AA8B + classDef gcb stroke-width:0,fill:#D65780 + root[root] + ca[child.a] + cb[child.b] + cc[child.c] + gcaa["grandchild.a (a)"]:::gca + gcab["grandchild.b (a)"]:::gcb + gcba["grandchild.a (b)"]:::gca + gcbb["grandchild.b (b)"]:::gcb + gcca["grandchild.a (c)"]:::gca + gccb["grandchild.b (c)"]:::gcb + root ----|0| ca + root ----|1| cb + root ----|2| cc + ca ----|0| gcaa + ca ----|1| gcab + cb ----|0| gcba + cb ----|1| gcbb + cc ----|0| gcca + cc ----|1| gccb +``` + +For example, for an example element tree above, the table below shows the possible return values of `findall` on the root element with different criteria. + +| Input | Output | +| ----- | ------ | +| `root.findall(tokens='grandchild.a')` | grandchild.a (a, b, c) | +| `root.findall(tokens='grandchild.a', depth=1)` | | +| `root.findall(tokens='grandchild.a', start_tokens='child.b')` | grandchild.a (b,c) | +| `root.findall(tokens='grandchild.a', stop_tokens='child.c')` | grandchild.a (a,b) | +| `root.findall(tokens=['child.a', 'child.b'])` | child.a, child.b | +| `root.findall(tokens='*', hide_tokens='child.c', depth=1)` | child.a, child.b | diff --git a/docs/index.md b/docs/index.md index f363f1e..86a7eec 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,25 +1,88 @@ -[![PyPI - Version](https://img.shields.io/pypi/v/textmate-grammar-python.svg)](https://pypi.python.org/pypi/textmate-grammar-python) -[![PyPI - License](https://img.shields.io/pypi/l/textmate-grammar-python.svg)](https://github.com/watermarkhu/textmate-grammar-python/tree/main?tab=MIT-1-ov-file) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![Checked with mypy](https://img.shields.io/badge/mypy-checked-blue)](http://mypy-lang.org/) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) -[![Python versions](https://img.shields.io/pypi/pyversions/textmate-grammar-python.svg)](https://pypi.python.org/pypi/textmate-grammar-python) [![CI/CD](https://github.com/watermarkhu/textmate-grammar-python/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/watermarkhu/textmate-grammar-python/blob/main/.github/workflows/ci.yml) +[![readthedocs](https://readthedocs.org/projects/textmate-grammar-python/badge/?version=latest)](https://textmate-grammar-python.readthedocs.io) +A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python. -# textmate-grammar-python +Textmate grammars are made for [vscode-texmate](https://github.com/microsoft/vscode-textmate), allowing for syntax highlighting in VSCode after tokenization. This presents textmate-grammar-python with a large list of potentially supported languages. -A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python. TextMate grammars use the oniguruma dialect (https://github.com/kkos/oniguruma). Supports loading grammar files from JSON, PLIST, or YAML format. +```mermaid +flowchart TD + A[grammar file] + Z[code] + B("`vscode-textmate **js**`") + C("`textmate-grammar-**python**`") + D[tokens] -## Sources -- [Textmate guide](https://www.apeth.com/nonblog/stories/textmatebundle.html) -- [VSCode Syntax Highlighting guide](https://code.visualstudio.com/api/language-extensions/syntax-highlight-guide) -- [vscode-textmate](https://github.com/microsoft/vscode-textmate) -- [Macromates texmate](https://macromates.com/textmate/manual/) + click C "https://github.com/microsoft/vscode-textmate" + + Z --> B + Z --> C + A -.-> B --> D + A -.-> C --> D +``` + +## Installation +Install the module with: +```bash +pip install textmate-grammar-python +``` + +Or, for development purposes, clone the repository and install locally with [poetry](https://python-poetry.org/), and setup [pre-commit](https://pre-commit.com/) such that code is linted and formatted with [Ruff](https://docs.astral.sh/ruff/) and checked with [mypy](https://mypy-lang.org/). + +```bash +pip install poetry +git clone https://github.com/watermarkhu/textmate-grammar-python +cd textmate-grammar-python +poetry install +pre-commit install +``` +For instructions on running the unit and regression tests see [CONTRIBUTING.md](https://github.com/watermarkhu/textmate-grammar-python/blob/main/CONTRIBUTING.md) + + +## Usage +Before tokenization is possible, a [`LanguageParser`](#textmate_grammar.language.LanguageParser) needs to be initialized using a loaded grammar. + +```python +from textmate_grammar.language import LanguageParser +from textmate_grammar.grammars import matlab +parser = LanguageParser(matlab.GRAMMAR) +``` + +After this, one can either choose to call [`parser.parsing_string`](#textmate_grammar.language.LanguageParser.parse_string) to parse a input string directly, or call [`parser.parse_file`](#textmate_grammar.language.LanguageParser.parse_file) with the path to the appropiate source file as the first argument, such as in the example [`example.py`](../example.py). + +The parsed `element` object can be displayed directly by calling the [`print`](#textmate_grammar.elements.ContentElement.print) method. By default the element is printed as an element tree in a dictionary format. + +```python +>>> element = parser.parse_string("value = num2str(10);") +>>> element.print() + +{'token': 'source.matlab', + 'children': [{'token': 'meta.assignment.variable.single.matlab', + 'children': [{'token': 'variable.other.readwrite.matlab', 'content': 'value'}]}, + {'token': 'keyword.operator.assignment.matlab', 'content': '='}, + {'token': 'meta.function-call.parens.matlab', + 'begin': [{'token': 'entity.name.function.matlab', 'content': 'num2str'}, + {'token': 'punctuation.section.parens.begin.matlab', 'content': '('}], + 'end': [{'token': 'punctuation.section.parens.end.matlab', 'content': ')'}], + 'children': [{'token': 'constant.numeric.decimal.matlab', 'content': '10'}]}, + {'token': 'punctuation.terminator.semicolon.matlab', 'content': ';'}]} + +``` ```{toctree} :maxdepth: 2 +languages +element apidocs/index ``` + +## Sources +- [Textmate guide](https://www.apeth.com/nonblog/stories/textmatebundle.html) +- [VSCode Syntax Highlighting guide](https://code.visualstudio.com/api/language-extensions/syntax-highlight-guide) +- [vscode-textmate](https://github.com/microsoft/vscode-textmate) +- [Macromates texmate](https://macromates.com/textmate/manual/) diff --git a/docs/languages.md b/docs/languages.md new file mode 100644 index 0000000..0fc657a --- /dev/null +++ b/docs/languages.md @@ -0,0 +1,5 @@ +# Supported languages + +| language | unit test | regression test | +|-----------|-----------|-----------------| +| [MATLAB](https://github.com/mathworks/MATLAB-Language-grammar) | ✅ | ✅ | \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index 0b5315c..04fa4dc 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -283,6 +283,9 @@ sphinxcontrib-htmlhelp==2.0.5 ; python_version >= "3.11" and python_version < "4 sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 \ --hash=sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8 +sphinxcontrib-mermaid==0.9.2 ; python_version >= "3.11" and python_version < "4.0" \ + --hash=sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af \ + --hash=sha256:6795a72037ca55e65663d2a2c1a043d636dc3d30d418e56dd6087d1459d98a5d sphinxcontrib-qthelp==1.0.7 ; python_version >= "3.11" and python_version < "4.0" \ --hash=sha256:053dedc38823a80a7209a80860b16b722e9e0209e32fea98c90e4e6624588ed6 \ --hash=sha256:e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 diff --git a/example.py b/example.py index ca03a37..173625c 100644 --- a/example.py +++ b/example.py @@ -1,16 +1,19 @@ import logging from pathlib import Path +from pprint import pprint -from textmate_grammar.cache import init_cache from textmate_grammar.grammars import matlab from textmate_grammar.language import LanguageParser +from textmate_grammar.utils.cache import init_cache # Initialize shelved cache init_cache("shelve") -# Initialize language parser +# Enable debug logging logging.getLogger().setLevel(logging.DEBUG) logging.getLogger("textmate_grammar").setLevel(logging.INFO) + +# Initialize language parser parser = LanguageParser(matlab.GRAMMAR) # Parse file @@ -19,3 +22,7 @@ # Print element element.print() + +# Find all enum members +enum_members = element.findall('variable.other.enummember.matlab') +pprint(enum_members) \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index fee08d2..d92e409 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1863,6 +1863,17 @@ files = [ [package.extras] test = ["flake8", "mypy", "pytest"] +[[package]] +name = "sphinxcontrib-mermaid" +version = "0.9.2" +description = "Mermaid diagrams in yours Sphinx powered docs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sphinxcontrib-mermaid-0.9.2.tar.gz", hash = "sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af"}, + {file = "sphinxcontrib_mermaid-0.9.2-py3-none-any.whl", hash = "sha256:6795a72037ca55e65663d2a2c1a043d636dc3d30d418e56dd6087d1459d98a5d"}, +] + [[package]] name = "sphinxcontrib-qthelp" version = "1.0.7" @@ -2093,4 +2104,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "dfc5b6bc41f950531f8739454ce6b1acb5f42ee466e9166adf6650e263d88500" +content-hash = "eddc383a3ade90430d3585c91d929646fc306a1500744a1b5956c81bb5004ab1" diff --git a/pyproject.toml b/pyproject.toml index f0abd7c..3c53198 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "textmate-grammar-python" -version = "0.2.0" +version = "0.3.0" description = "A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python." authors = ["Mark Shui Hu "] license = "MIT" readme = "README.md" repository = "https://github.com/watermarkhu/textmate-grammar-python" -documentation = "https://watermarkhu.nl/textmate-grammar-python" +documentation = "https://textmate-grammar-python.readthedocs.io" keywords = ["textmate", "tokenization"] packages = [{include = "textmate_grammar", from = "src"}] @@ -36,6 +36,7 @@ sphinx = "^7.2.6" sphinx-autodoc2 = "^0.5.0" myst-parser = "^2.0.0" furo = "^2024.1.29" +sphinxcontrib-mermaid = "^0.9.2" [tool.poetry_bumpversion.file."src/textmate_grammar/__init__.py"] diff --git a/src/textmate_grammar/__init__.py b/src/textmate_grammar/__init__.py index d3ec452..493f741 100644 --- a/src/textmate_grammar/__init__.py +++ b/src/textmate_grammar/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/src/textmate_grammar/elements.py b/src/textmate_grammar/elements.py index c4bd5f6..bdcfb41 100644 --- a/src/textmate_grammar/elements.py +++ b/src/textmate_grammar/elements.py @@ -6,8 +6,8 @@ from pprint import pprint from typing import TYPE_CHECKING, Generator -from .handler import POS, ContentHandler, Match, Pattern -from .logger import LOGGER +from .utils.handler import POS, ContentHandler, Match, Pattern +from .utils.logger import LOGGER if TYPE_CHECKING: from .parser import GrammarParser @@ -194,6 +194,8 @@ def _dispatch(self, nested: bool = False): :type nested: bool :return: None """ + if self._dispatched: + return self._dispatched = True self._children: list[ContentElement] = _dispatch_list(self._children_captures, parent=self) self._children_captures = [] @@ -206,7 +208,7 @@ def __eq__(self, other): return False return bool(self.grammar == other.grammar and self.characters == other.characters) - def find( + def _find( self, tokens: str | list[str], start_tokens: str | list[str] = "", @@ -216,26 +218,6 @@ def find( attribute: str = "_subelements", stack: list[str] | None = None, ) -> Generator[tuple[ContentElement, list[str]], None, None]: - """ - Find content elements based on the given criteria. - - The find method will return a generator that globs though the element-tree, searching for the next - subelement that matches the given token. - - :param tokens: The tokens to search for. Can be a single token or a list of tokens. - :param start_tokens: The tokens that mark the start of the search. Can be a single token or a list of tokens. - :param hide_tokens: The tokens to hide from the search results. Can be a single token or a list of tokens. - :param stop_tokens: The tokens that mark the end of the search. Can be a single token or a list of tokens. - :param depth: The maximum depth to search. Defaults to -1 (unlimited depth). - :param attribute: The attribute name to access the subelements. Defaults to "_subelements". - :param stack: The stack of tokens encountered during the search. Defaults to None. - - :yield: A tuple containing the found content element and the stack of tokens encountered. - - :raises ValueError: If the input tokens and stop_tokens are not disjoint. - - :return: None if no matching content elements are found. - """ tokens = _str_to_list(tokens) start_tokens = _str_to_list(start_tokens) hide_tokens = _str_to_list(hide_tokens) @@ -271,7 +253,7 @@ def find( ): yield child, [e for e in stack] if depth: - nested_generator = child.find( + nested_generator = child._find( tokens, start_tokens=start_tokens, hide_tokens=hide_tokens, @@ -282,6 +264,43 @@ def find( yield from nested_generator return None + def find( + self, + tokens: str | list[str], + start_tokens: str | list[str] = "", + hide_tokens: str | list[str] = "", + stop_tokens: str | list[str] = "", + depth: int = -1, + attribute: str = "_subelements", + ) -> Generator[tuple[ContentElement, list[str]], None, None]: + """ + Find content elements based on the given criteria. + + The find method will return a generator that globs though the element-tree, searching for the next + subelement that matches the given token. + + :param tokens: The tokens to search for. Can be a single token or a list of tokens. + :param start_tokens: The tokens that mark the start of the search. Can be a single token or a list of tokens. + :param hide_tokens: The tokens to hide from the search results. Can be a single token or a list of tokens. + :param stop_tokens: The tokens that mark the end of the search. Can be a single token or a list of tokens. + :param depth: The maximum depth to search. Defaults to -1 (unlimited depth). + :param attribute: The attribute name to access the subelements. Defaults to "_subelements". + + :yield: A tuple containing the found content element and the stack of tokens encountered. + + :raises ValueError: If the input tokens and stop_tokens are not disjoint. + + :return: None if no matching content elements are found. + """ + return self._find( + tokens, + start_tokens=start_tokens, + hide_tokens=hide_tokens, + stop_tokens=stop_tokens, + depth=depth, + attribute=attribute, + ) + def findall( self, tokens: str | list[str], @@ -304,7 +323,7 @@ def findall( :return: A list of tuples containing the content element and the found tokens. """ return list( - self.find( + self._find( tokens, start_tokens=start_tokens, hide_tokens=hide_tokens, @@ -425,6 +444,7 @@ class ContentBlockElement(ContentElement): def __init__( self, + *args, begin: list[Capture | ContentElement] | None = None, end: list[Capture | ContentElement] | None = None, **kwargs, @@ -442,7 +462,7 @@ def __init__( end = [] if begin is None: begin = [] - super().__init__(**kwargs) + super().__init__(*args, **kwargs) self._begin_captures = begin self._end_captures = end @@ -477,6 +497,8 @@ def end(self) -> list[ContentElement]: return self._end def _dispatch(self, nested: bool = False): + if self._dispatched: + return super()._dispatch(nested) self._begin: list[ContentElement] = _dispatch_list(self._begin_captures, parent=self) self._end: list[ContentElement] = _dispatch_list(self._end_captures, parent=self) diff --git a/src/textmate_grammar/language.py b/src/textmate_grammar/language.py index 539ea93..f1f3edb 100644 --- a/src/textmate_grammar/language.py +++ b/src/textmate_grammar/language.py @@ -1,11 +1,11 @@ from pathlib import Path -from .cache import TextmateCache, init_cache from .elements import Capture, ContentElement -from .exceptions import IncompatibleFileType -from .handler import POS, ContentHandler -from .logger import LOGGER from .parser import GrammarParser, PatternsParser +from .utils.cache import TextmateCache, init_cache +from .utils.exceptions import IncompatibleFileType +from .utils.handler import POS, ContentHandler +from .utils.logger import LOGGER LANGUAGE_PARSERS = {} @@ -96,7 +96,7 @@ def _initialize_repository(self): super()._initialize_repository() - def parse_file(self, filePath: str | Path, **kwargs) -> Capture | ContentElement | None: + def parse_file(self, filePath: str | Path, **kwargs) -> ContentElement | None: """ Parses an entire file with the current grammar. @@ -122,11 +122,10 @@ def parse_file(self, filePath: str | Path, **kwargs) -> Capture | ContentElement element = self._parse_language(handler, **kwargs) # type: ignore if element is not None: - element._dispatch(nested=True) self._cache.save(filePath, element) return element - def parse_string(self, input: str, **kwargs): + def parse_string(self, input: str, **kwargs) -> ContentElement | None: """ Parses an input string. @@ -137,13 +136,22 @@ def parse_string(self, input: str, **kwargs): handler = ContentHandler(input) # Configure logger LOGGER.configure(self, height=len(handler.lines), width=max(handler.line_lengths)) - return self._parse_language(handler, **kwargs) - def _parse_language(self, handler: ContentHandler, **kwargs) -> Capture | ContentElement | None: + element = self._parse_language(handler, **kwargs) + + return element + + def _parse_language(self, handler: ContentHandler, **kwargs) -> ContentElement | None: """Parses the current stream with the language scope.""" parsed, elements, _ = self.parse(handler, (0, 0), **kwargs) - return elements[0] if parsed else None + + if parsed: + element = elements[0] + element._dispatch(nested=True) # type: ignore + else: + element = None + return element # type: ignore def _parse( self, handler: ContentHandler, starting: POS, **kwargs diff --git a/src/textmate_grammar/parser.py b/src/textmate_grammar/parser.py index 1262495..f1c08e8 100644 --- a/src/textmate_grammar/parser.py +++ b/src/textmate_grammar/parser.py @@ -6,9 +6,9 @@ import onigurumacffi as re from .elements import Capture, ContentBlockElement, ContentElement -from .exceptions import IncludedParserNotFound -from .handler import POS, ContentHandler, Pattern -from .logger import LOGGER, track_depth +from .utils.exceptions import IncludedParserNotFound +from .utils.handler import POS, ContentHandler, Pattern +from .utils.logger import LOGGER, track_depth if TYPE_CHECKING: from .language import LanguageParser @@ -62,12 +62,18 @@ def __init__( self.language = language self.key = key self.token = grammar.get("name", "") - self.comment = grammar.get("comment", "") - self.disabled = grammar.get("disabled", False) self.is_capture = is_capture self.initialized = False self.anchored = False + @property + def comment(self) -> str: + return self.grammar.get("comment", "") + + @property + def disabled(self) -> bool: + return self.grammar.get("disabled", False) + def __repr__(self) -> str: return f"{self.__class__.__name__}:<{self.key}>" diff --git a/src/textmate_grammar/utils/__init__.py b/src/textmate_grammar/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/textmate_grammar/cache.py b/src/textmate_grammar/utils/cache.py similarity index 99% rename from src/textmate_grammar/cache.py rename to src/textmate_grammar/utils/cache.py index 16fd96e..7c42f6d 100644 --- a/src/textmate_grammar/cache.py +++ b/src/textmate_grammar/utils/cache.py @@ -3,7 +3,7 @@ from pickle import UnpicklingError from typing import Protocol -from .elements import ContentElement +from ..elements import ContentElement CACHE_DIR = (Path() / ".textmate_cache").resolve() CACHE_DIR.mkdir(parents=True, exist_ok=True) diff --git a/src/textmate_grammar/exceptions.py b/src/textmate_grammar/utils/exceptions.py similarity index 100% rename from src/textmate_grammar/exceptions.py rename to src/textmate_grammar/utils/exceptions.py diff --git a/src/textmate_grammar/handler.py b/src/textmate_grammar/utils/handler.py similarity index 100% rename from src/textmate_grammar/handler.py rename to src/textmate_grammar/utils/handler.py diff --git a/src/textmate_grammar/logger.py b/src/textmate_grammar/utils/logger.py similarity index 99% rename from src/textmate_grammar/logger.py rename to src/textmate_grammar/utils/logger.py index 3f319e1..cec0a0b 100644 --- a/src/textmate_grammar/logger.py +++ b/src/textmate_grammar/utils/logger.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: - from .parser import GrammarParser + from ..parser import GrammarParser MAX_LENGTH = 79 diff --git a/test/regression/test_matlab.py b/test/regression/test_matlab.py index 2314124..0a18100 100755 --- a/test/regression/test_matlab.py +++ b/test/regression/test_matlab.py @@ -1,7 +1,7 @@ import logging +import pytest from pathlib import Path -import pytest from textmate_grammar.grammars import matlab from textmate_grammar.language import LanguageParser diff --git a/test/unit/matlab/__init__.py b/test/unit/matlab/__init__.py index 2020767..8342b0e 100644 --- a/test/unit/matlab/__init__.py +++ b/test/unit/matlab/__init__.py @@ -1,7 +1,7 @@ import logging -from textmate_grammar.language import LanguageParser -from textmate_grammar.grammars import matlab +from textmate_grammar.grammars import matlab +from textmate_grammar.language import LanguageParser logging.getLogger().setLevel(logging.DEBUG) logging.getLogger("textmate_grammar").setLevel(logging.INFO) diff --git a/test/unit/matlab/test_anonymous_function.py b/test/unit/matlab/test_anonymous_function.py index 62715cf..91b69c5 100644 --- a/test/unit/matlab/test_anonymous_function.py +++ b/test/unit/matlab/test_anonymous_function.py @@ -1,7 +1,7 @@ import pytest -from ...unit import MSG_NO_MATCH, MSG_NOT_PARSED -from . import parser +from . import parser +from ...unit import MSG_NO_MATCH, MSG_NOT_PARSED test_vector = {} diff --git a/test/unit/matlab/test_validators.py b/test/unit/matlab/test_validators.py index e5a8375..a757ae6 100644 --- a/test/unit/matlab/test_validators.py +++ b/test/unit/matlab/test_validators.py @@ -1,9 +1,9 @@ import pytest -from textmate_grammar.handler import ContentHandler +from textmate_grammar.utils.handler import ContentHandler + from ...unit import MSG_NO_MATCH, MSG_NOT_PARSED from . import parser as matlabParser - parser = matlabParser.repository["validators"] test_vector = {}