deps: poetry update (#208)

* deps: poetry update * fix: ruff format * misc: poetry lock * misc: update ruff * misc: ruff format with latest ruff
himkt · Mar 13, 2024 · 4ad2139 · 4ad2139
1 parent 839ce44
commit 4ad2139
Show file tree

Hide file tree

Showing 10 changed files with 335 additions and 328 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -15,13 +15,13 @@
 
 import os
 
-on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+on_rtd = os.environ.get("READTHEDOCS", None) == "True"
 
 # -- Project information -----------------------------------------------------
 
-project = 'konoha'
-copyright = '2020, himkt'
-author = 'himkt'
+project = "konoha"
+copyright = "2020, himkt"
+author = "himkt"
 
 
 # -- General configuration ---------------------------------------------------
@@ -30,11 +30,11 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
+    "sphinx.ext.autodoc",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -48,9 +48,9 @@
 # a list of builtin themes.
 #
 if not on_rtd:
-    html_theme = 'pydata_sphinx_theme'
+    html_theme = "pydata_sphinx_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
diff --git a/example/tokenize_demo.py b/example/tokenize_demo.py
@@ -22,9 +22,7 @@
             print("Skip: ", word_tokenizer_name)
 
     try:
-        _tokenizer = WordTokenizer(
-            "Sentencepiece", model_path="./data/model.spm"
-        )  # NOQA
+        _tokenizer = WordTokenizer("Sentencepiece", model_path="./data/model.spm")  # NOQA
         word_tokenizers.append(_tokenizer)
 
     except (ImportError, OSError, RuntimeError):

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,7 +39,7 @@ all = ["janome", "natto-py", "kytea", "sudachipy", "sudachidict-core", "nagisa",
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.4"
 httpx = "^0.26.0"
-ruff = "^0.1.14"
+ruff = "^0.3.0"
 mypy = "^1.8.0"
 sphinx = "<7.2.0"
 pydata-sphinx-theme = "<0.15.0"

diff --git a/src/konoha/__init__.py b/src/konoha/__init__.py
@@ -1,4 +1,5 @@
 """__init__.py."""
+
 from importlib.metadata import version
 
 from konoha.sentence_tokenizer import SentenceTokenizer  # NOQA

diff --git a/src/konoha/data/token.py b/src/konoha/data/token.py
@@ -1,4 +1,5 @@
 """Token class."""
+
 from typing import Dict
 from typing import List
 from typing import Optional

diff --git a/src/konoha/word_tokenizer.py b/src/konoha/word_tokenizer.py
@@ -1,4 +1,5 @@
 """Word Level Tokenizer."""
+
 import warnings
 from typing import Dict
 from typing import List

diff --git a/tests/api/v1/test_batch_tokenization.py b/tests/api/v1/test_batch_tokenization.py
@@ -12,7 +12,8 @@
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab"},
         {"tokenizer": "sudachi", "mode": "A"},
         {"tokenizer": "sudachi", "mode": "B"},
@@ -22,7 +23,7 @@
         {"tokenizer": "character"},
         {"tokenizer": "nagisa"},
         {"tokenizer": "janome"},
-    ]
+    ],
 )
 def test_tokenization(tokenizer_params: Dict):
     if tokenizer_params["tokenizer"] == "kytea" and sys.version_info < (3, 7):
@@ -36,9 +37,10 @@ def test_tokenization(tokenizer_params: Dict):
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
-    ]
+    ],
 )
 def test_tokenization_with_remote_resource(tokenizer_params: Dict):
     if "AWS_ACCESS_KEY_ID" not in os.environ and tokenizer_params["system_dictionary_path"].startswith("s3://"):

diff --git a/tests/api/v1/test_tokenization.py b/tests/api/v1/test_tokenization.py
@@ -12,7 +12,8 @@
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab"},
         {"tokenizer": "mecab", "with_postag": True},
         {"tokenizer": "sudachi", "mode": "A"},
@@ -23,7 +24,7 @@
         {"tokenizer": "character"},
         {"tokenizer": "nagisa"},
         {"tokenizer": "janome"},
-    ]
+    ],
 )
 def test_tokenization(tokenizer_params: Dict):
     if tokenizer_params["tokenizer"] == "kytea" and sys.version_info < (3, 7):
@@ -37,9 +38,10 @@ def test_tokenization(tokenizer_params: Dict):
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
-    ]
+    ],
 )
 def test_tokenization_with_remote_resoruce(tokenizer_params: Dict):
     if "AWS_ACCESS_KEY_ID" not in os.environ and tokenizer_params["system_dictionary_path"].startswith("s3://"):

diff --git a/tests/test_word_tokenizer.py b/tests/test_word_tokenizer.py
@@ -26,7 +26,8 @@ def read_lines(tokenizer: str):
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab"},
         {"tokenizer": "sudachi", "mode": "A"},
         {"tokenizer": "sudachi", "mode": "A"},
@@ -36,7 +37,7 @@ def read_lines(tokenizer: str):
         {"tokenizer": "character"},
         {"tokenizer": "whitespace"},
         {"tokenizer": "sentencepiece", "model_path": "data/model.spm"},
-    ]
+    ],
 )
 def test_tokenize_with_character(raw_texts: List[str], tokenizer_params: Dict):
     if tokenizer_params["tokenizer"] == "kytea" and sys.version_info < (3, 7):
@@ -50,9 +51,10 @@ def test_tokenize_with_character(raw_texts: List[str], tokenizer_params: Dict):
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
-    ]
+    ],
 )
 def test_tokenize(raw_texts: List[str], tokenizer_params: Dict):
     tokenizer_name = tokenizer_params["tokenizer"]
@@ -63,7 +65,8 @@ def test_tokenize(raw_texts: List[str], tokenizer_params: Dict):
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab"},
         {"tokenizer": "sudachi", "mode": "A"},
         {"tokenizer": "sudachi", "mode": "A"},
@@ -73,7 +76,7 @@ def test_tokenize(raw_texts: List[str], tokenizer_params: Dict):
         {"tokenizer": "character"},
         {"tokenizer": "whitespace"},
         {"tokenizer": "sentencepiece", "model_path": "data/model.spm"},
-    ]
+    ],
 )
 def test_batch_tokenize_with_character(raw_texts: List[str], tokenizer_params: Dict):
     if tokenizer_params["tokenizer"] == "kytea" and sys.version_info < (3, 7):
@@ -82,17 +85,17 @@ def test_batch_tokenize_with_character(raw_texts: List[str], tokenizer_params: D
     tokenizer_name = tokenizer_params["tokenizer"]
     tokenizer = WordTokenizer(**tokenizer_params)
     expect = [
-        [Token.from_dict(token_param) for token_param in token_params]
-        for token_params in read_lines(tokenizer_name)
+        [Token.from_dict(token_param) for token_param in token_params] for token_params in read_lines(tokenizer_name)
     ]
     result = tokenizer.batch_tokenize(raw_texts)
     assert expect == result
 
 
 @pytest.mark.parametrize(
-    "tokenizer_params", [
+    "tokenizer_params",
+    [
         {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
-    ]
+    ],
 )
 def test_batch_tokenize(raw_texts: List[str], tokenizer_params: Dict):
     if "AWS_ACCESS_KEY_ID" not in os.environ and tokenizer_params["system_dictionary_path"].startswith("s3://"):
@@ -101,8 +104,7 @@ def test_batch_tokenize(raw_texts: List[str], tokenizer_params: Dict):
     tokenizer_name = tokenizer_params["tokenizer"]
     tokenizer = WordTokenizer(**tokenizer_params)
     expect = [
-        [Token.from_dict(token_param) for token_param in token_params]
-        for token_params in read_lines(tokenizer_name)
+        [Token.from_dict(token_param) for token_param in token_params] for token_params in read_lines(tokenizer_name)
     ]
     result = tokenizer.batch_tokenize(raw_texts)
     assert expect == result