Skip to content

Commit

Permalink
Minor Refactoring. Published package to PYPI.
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-oluwa committed Dec 26, 2023
1 parent 65060b5 commit 5308ac8
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 52 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
**/__pycache__/**
**/tranzlate.egg-info
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ import tranzlate
translator = tranzlate.Translator()

markup = '<p>Good Morning!</p>'
translated_markup = translator.translate(markup, target_lang='yo')
translated_markup = translator.translate(markup, target_lang='yo', is_markup=True)
print(translated_markup)

# Output: <p>Eku ojumo!</p>
Expand Down
Binary file added dist/tranzlate-0.0.1-py3-none-any.whl
Binary file not shown.
Binary file added dist/tranzlate-0.0.1.tar.gz
Binary file not shown.
54 changes: 54 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[tool.setuptools.dynamic]
version = {attr = "tranzlate.__version__"}

[project]
name = "tranzlate"
dynamic = ["version"]
authors = [
{ name="ti-oluwa", email="tioluwa.dev@gmail.com" },
]
maintainers = [
{ name="ti-oluwa", email="tioluwa.dev@gmail.com" },
]
description = "Multilingual translation of text, files, markup and BeautifulSoup objects."
keywords = [
"translate",
"translation",
"google translate",
"bing translate",
"yandex translate",
"translate text",
"translate files",
"translate markup",
"translate html",
"translate xml",
"translate BeautifulSoup",
"detect language",
]
readme = "README.md"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
"Natural Language :: English",
"Typing :: Typed",
]
dependencies = [
"translators==5.8.9",
"beautifulsoup4==4.12.2",
"simple_file_handler>=0.0.1",
]

[project.urls]
"Homepage" = "https://github.com/ti-oluwa/tranzlate"
"Bug Tracker" = "https://github.com/ti-oluwa/tranzlate/issues"
"Repository" = "https://github.com/ti-oluwa/tranzlate"
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
translators
bs4
bs4_web_scraper
translators==5.8.9
beautifulsoup4==4.12.2
simple_file_handler>=0.0.1
2 changes: 1 addition & 1 deletion tests/fixtures/test_file.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Awọn ohun elo ti o wa ni ti o wa ni ti o ti o ti o ti o ti o ti o ti o ba ti o ba ti o ba ti o ba nífẹẹẹ si ipa yìí, jọwọwò béèrè fun àgbéyẹwò lẹyìn.
Awọn ohun elo ti o wa ni ti o wa ni ti o ti o ti o ti o ti o ti o ti o ti o ti o ba ti o ba ti o ba ti o ba ti o ba ti o ba fẹ baẹẹ si ipa yìí, jọwọ béèrè fun àgbégbégbé lẹyìn.
2 changes: 1 addition & 1 deletion tests/test_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,4 @@ def test_translate_soup(self):
if "__name__" == "__main__":
unittest.main()

# RUN WITH 'python -m unittest discover tests "test_*.py"' from project directory
# RUN WITH 'python -m unittest discover tests "test_*.py"' from project's root directory
5 changes: 2 additions & 3 deletions tranzlate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
@Author: Daniel T. Afolayan (ti-oluwa.github.io)
"""

__version__ = "0.0.1"
__author__ = "Daniel T. Afolayan"

from .translator import Translator

__all__ = ["Translator"]
__version__ = "0.0.1"
__author__ = "Daniel T. Afolayan"
91 changes: 48 additions & 43 deletions tranzlate/translator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"""
Translate text, markup content, BeautifulSoup objects and files using the `translators` package.
"""

import functools
import sys
from typing import Callable, Dict, List, Tuple
from typing import Callable, Dict, List, Tuple, IO
from array import array
import time
import copy
Expand All @@ -13,8 +12,8 @@
from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor
from translators.server import TranslatorsServer, tss, Tse
import simple_file_handler as sfh

from bs4_web_scraper.file_handler import FileHandler
from .exceptions import TranslationError, UnsupportedLanguageError


Expand Down Expand Up @@ -86,7 +85,7 @@ def __init__(self, engine: str = "bing"):


@property
def server(self):
def server(self) -> TranslatorsServer:
"""The translation server used by the Translator instance"""
if not isinstance(self._server, TranslatorsServer):
raise TypeError("Invalid type for `_server`")
Expand Down Expand Up @@ -122,7 +121,7 @@ def language_map(self) -> Dict:
return {}

@property
def supported_languages(self):
def supported_languages(self) -> List:
"""
Returns a list of language codes for source
languages supported by the translator's engine.
Expand All @@ -131,12 +130,12 @@ def supported_languages(self):


@classmethod
def engines(cls):
def engines(cls) -> List[str]:
"""Returns a list of supported translation engines"""
return cls._server.translators_pool


def is_supported_language(self, lang_code: str):
def is_supported_language(self, lang_code: str) -> bool:
'''
Check if the source language with the specified
language code is supported by the translator's engine.
Expand Down Expand Up @@ -252,8 +251,9 @@ def translate(
src_lang: str = "auto",
target_lang: str = "en",
is_markup: bool = False,
encoding: str = "utf-8",
**kwargs
):
) -> str | bytes | BeautifulSoup:
'''
Translate content from source language to target language.
Expand All @@ -262,6 +262,7 @@ def translate(
It is advisable to provide a source language to get more accurate translations.
:param target_lang (str, optional): Target language. Defaults to "en".
:param is_markup (bool, optional): Whether `content` is markup. Defaults to False.
:param encoding (str, optional): The encoding of the content (for bytes content only). Defaults to "utf-8".
:param **kwargs: Keyword arguments to be passed to required translation method.
:return: Translated content.
Expand All @@ -276,11 +277,13 @@ def translate(
# Output: "Yorùbá jẹ́ èdè tí ó ń ṣe àwọn èdè ní ìlà oòrùn Áfríkà, tí ó wà ní orílẹ̀-èdè Gúúsù Áfríkà."
'''
is_bytes = isinstance(content, bytes)
if is_markup:
return self.translate_markup(
markup=content,
src_lang=src_lang,
target_lang=target_lang,
target_lang=target_lang,
encoding=encoding,
**kwargs
)
elif isinstance(content, BeautifulSoup):
Expand All @@ -290,12 +293,14 @@ def translate(
target_lang=target_lang,
**kwargs
)
return self.translate_text(
text=content,

translation = self.translate_text(
text=content.decode(encoding) if is_bytes else content,
src_lang=src_lang,
target_lang=target_lang,
**kwargs
)
return translation.encode(encoding) if is_bytes else translation


# @functools.cache
Expand All @@ -305,7 +310,7 @@ def translate_text(
src_lang: str="auto",
target_lang: str="en",
**kwargs
) -> str | Dict:
) -> str:
'''
Translate text from `src_lang` to `target_lang`.
Expand Down Expand Up @@ -337,6 +342,7 @@ def translate_text(

self._check_lang_codes(src_lang, target_lang)
kwargs_ = {'if_ignore_empty_query': True}
kwargs.pop('is_detail_result', None)
kwargs_.update(kwargs)

def _translate(text: str):
Expand All @@ -363,7 +369,7 @@ def translate_file(
src_lang: str="auto",
target_lang: str="en",
**kwargs
):
) -> IO:
'''
Translates file from `src_lang` to `target_lang`.
Expand Down Expand Up @@ -392,27 +398,25 @@ def translate_file(
self._check_lang_codes(src_lang, target_lang)
kwargs_ = {'if_ignore_empty_query': True}
kwargs.pop('is_detail_result', None)

kwargs_.update(kwargs)
try:
file_handler = FileHandler(filepath, exists_ok=True, not_found_ok=False)
except Exception as exc:
raise TranslationError(f"Could not translate file") from exc

if not file_handler.file_content:
return file_handler.file

try:
if file_handler.filetype in ['xhtml', 'htm', 'shtml', 'html', 'xml']:
translated_content = self.translate_markup(file_handler.file_content, src_lang, target_lang, **kwargs_)
else:
translated_content = self.translate_text(file_handler.file_content, src_lang, target_lang, **kwargs_)

file_handler.write_to_file(translated_content, write_mode='w+')
file_handler.close_file()
return file_handler.file
try:
with sfh.FileHandler(filepath, exists_ok=True, not_found_ok=False) as file_handler:
content = file_handler.file_content
if not content:
return file_handler.file

if file_handler.filetype in ['xhtml', 'htm', 'shtml', 'html', 'xml']:
translation = self.translate_markup(content, src_lang, target_lang, **kwargs_)
else:
translation = self.translate_text(content, src_lang, target_lang, **kwargs_)

file_handler.write_to_file(translation, write_mode='w+')
return file_handler.file
except Exception as exc:
raise TranslationError(f"File cannot be translated. {exc}")
raise TranslationError(
"File cannot be translated."
) from exc


def _translate_soup_tag(
Expand Down Expand Up @@ -464,7 +468,7 @@ def _translate_soup_tag(
# try again
_ct += 1
# prevents the translation engine from blocking our IP address
time.sleep(random.random(2, 5) * _ct)
time.sleep(random.random(2, 4) * _ct)
if _ct <= 3:
return self._translate_soup_tag(tag, src_lang, target_lang, _ct, **kwargs)
finally:
Expand All @@ -479,7 +483,7 @@ def translate_soup(
target_lang: str = "en",
thread: bool = True,
**kwargs
):
) -> BeautifulSoup:
'''
Translates the text of a BeautifulSoup object.
Expand Down Expand Up @@ -512,17 +516,21 @@ def translate_soup(
def translate_markup(
self,
markup: str | bytes,
src_lang: str="auto",
target_lang: str="en",
src_lang: str = "auto",
target_lang: str = "en",
markup_parser: str = "lxml",
encoding: str = "utf-8",
**kwargs
):
) -> str | bytes:
'''
Translates markup.
Translates markup (html, xml, etc.)
:param markup (str | bytes): markup content to be translated
:param src_lang (str, optional): Source language. Defaults to "auto".
It is advisable to provide a source language to get more accurate translations.
:param target_lang (str, optional): Target language. Defaults to "en".
:param markup_parser (str, optional): The (beautifulsoup) markup parser to use. Defaults to "lxml".
:param encoding (str, optional): The encoding of the markup (for bytes markup only). Defaults to "utf-8".
:param **kwargs: Keyword arguments to be passed to the `translate_soup` method.
:kwarg thread: bool, default True.
:kwarg professional_field: str, support baidu(), caiyun(), alibaba() only.
Expand All @@ -542,12 +550,9 @@ def translate_markup(
'''
if not isinstance(markup, (str, bytes)):
raise TypeError("Invalid type for `markup`")

is_bytes = isinstance(markup, bytes)
kwargs.pop('is_detail_result', None)
soup = BeautifulSoup(markup, 'lxml')
soup = BeautifulSoup(markup, markup_parser, from_encoding=encoding if is_bytes else None)
translated_markup = self.translate_soup(soup, src_lang, target_lang, **kwargs).prettify()

# re-encode the markup if the initial markup was in bytes
if is_bytes:
translated_markup = translated_markup.encode('utf-8')
return translated_markup
return translated_markup.encode('utf-8') if is_bytes else translated_markup

0 comments on commit 5308ac8

Please sign in to comment.