Skip to content

Commit

Permalink
Merge pull request #2 from ti-oluwa/development
Browse files Browse the repository at this point in the history
Some code refactoring, bug fixes. Updated documentation
  • Loading branch information
ti-oluwa authored Dec 24, 2023
2 parents ae8fd77 + 65060b5 commit 9cc8c5d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 23 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
### tranzlate
## tranzlate

A wrapper around the translators package by UlionTse that enables multilingual translation of text, files, markup and BeautifulSoup objects.

Expand Down Expand Up @@ -125,6 +125,10 @@ To use a proxy, simply pass the proxy to the `Translator` class on instantiation
```python
import tranzlate

translator = tranzlate.Translator()
text = 'Good Morning!'
translation = translator.translate(text, target_lang='yo', proxies={'https': 'https://<proxy>:<port>'})
print(translation)
```

### Other Methods
Expand Down Expand Up @@ -162,7 +166,7 @@ To check if a language pair is supported by the translator's engine:
import tranzlate

translator = tranzlate.Translator()
is_supported = translator.is_supported_pair('en', 'yo')
is_supported = translator.is_supported_pair(src_lang='en', target_lang='yo')
print(is_supported)

# Output: True
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/test_file.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Awọn ohun elo ti o wa ni ti o wa ni ti o ti o ti o ti o ti o ba ti o ba nífẹẹẹ si ipa yìí, jọwọ́wò béèrè fún àgbéyẹ̀wò lẹ́yìn náà.
Awọn ohun elo ti o wa ni ti o wa ni ti o ti o ti o ti o ti o ti o ti o ba ti o ba ti o ba ti o ba nífẹẹẹ si ipa yìí, jọwọwò béèrè fun àgbéyẹwò lẹyìn.
52 changes: 32 additions & 20 deletions tranzlate/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
from bs4 import BeautifulSoup
from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor
try:
from translators.server import TranslatorsServer, tss, Tse
except Exception as exc:
raise ConnectionError(f"Could not import `translators` module: {exc}")
from translators.server import TranslatorsServer, tss, Tse

from bs4_web_scraper.file_handler import FileHandler
from .exceptions import TranslationError, UnsupportedLanguageError
Expand Down Expand Up @@ -280,13 +277,28 @@ def translate(
# Output: "Yorùbá jẹ́ èdè tí ó ń ṣe àwọn èdè ní ìlà oòrùn Áfríkà, tí ó wà ní orílẹ̀-èdè Gúúsù Áfríkà."
'''
if is_markup:
return self.translate_markup(content, src_lang, target_lang, **kwargs)
return self.translate_markup(
markup=content,
src_lang=src_lang,
target_lang=target_lang,
**kwargs
)
elif isinstance(content, BeautifulSoup):
return self.translate_soup(content, src_lang, target_lang, **kwargs)
return self.translate_text(content, src_lang, target_lang, **kwargs)
return self.translate_soup(
soup=content,
src_lang=src_lang,
target_lang=target_lang,
**kwargs
)
return self.translate_text(
text=content,
src_lang=src_lang,
target_lang=target_lang,
**kwargs
)


@functools.cache
# @functools.cache
def translate_text(
self,
text: str,
Expand Down Expand Up @@ -405,7 +417,7 @@ def translate_file(

def _translate_soup_tag(
self,
element: Tag,
tag: Tag,
src_lang: str = "auto",
target_lang: str = "en",
_ct: int = 0,
Expand All @@ -420,41 +432,41 @@ def _translate_soup_tag(
* This function modifies the element in place.
* Translations are cached by default to avoid repeated translations which can be costly.
:param element (bs4.element.Tag): The element whose text is to be translated.
:param element (bs4.element.Tag): The tag whose text content is to be translated.
:param src_lang (str, optional): Source language. Defaults to "auto".
:param target_lang (str, optional): Target language. Defaults to "en".
:param _ct (int, optional): The number of times the function has been called recursively. Defaults to 0.
Do not pass this argument manually.
'''
if not isinstance(element, Tag):
raise TypeError("Invalid type for `element`")
if not isinstance(tag, Tag):
raise TypeError("Invalid type for `tag`")
if not isinstance(_ct, int):
raise TypeError("Invalid type for `_ct`")

if element.string and element.string.strip():
initial_string = copy.copy(element.string)
cached_translation = self._cache.get(element.string, None)
if tag.string and tag.string.strip():
initial_string = copy.copy(tag.string)
cached_translation = self._cache.get(tag.string, None)
if cached_translation:
element.string.replace_with(cached_translation)
tag.string.replace_with(cached_translation)
else:
try:
translation = self.translate_text(
text=element.string,
text=tag.string,
src_lang=src_lang,
target_lang=target_lang,
**kwargs
)
element.string.replace_with(translation)
tag.string.replace_with(translation)

except Exception as exc:
error_ = TranslationError(f"Error translating element: {exc}")
error_ = TranslationError(f"Error translating tag: {exc}")
sys.stderr.write(f"{error_}\n")
# try again
_ct += 1
# prevents the translation engine from blocking our IP address
time.sleep(random.random(2, 5) * _ct)
if _ct <= 3:
return self._translate_soup_tag(element, src_lang, target_lang, _ct, **kwargs)
return self._translate_soup_tag(tag, src_lang, target_lang, _ct, **kwargs)
finally:
self._cache[initial_string] = translation
return None
Expand Down

0 comments on commit 9cc8c5d

Please sign in to comment.