Skip to content

Commit

Permalink
Merge pull request #25 from yaniv-golan/is_valid_language_code
Browse files Browse the repository at this point in the history
Added is_language to check code validity without raising an exception
  • Loading branch information
LBeaudoux authored Oct 23, 2024
2 parents 3c17054 + f5095df commit 5e0d029
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 4 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,31 @@ When a deprecated language value is passed to `Lang`, a `DeprecatedLanguageValue
'Gascon replaced by Occitan (post 1500).'
```

Note that you can use the `is_language` language checker if you don't want to handle exceptions.

### Checker

The `is_language` function checks if a language value is valid according to ISO 639.

```python
>>> from iso639 import is_language
>>> is_language("fr")
True
>>> is_language("French")
True
```

You can restrict the check to certain identifiers or names by passing an additional argument.
```python
>>> is_language("fr", "pt3") # only 639-3
False
>>> is_language("fre", ("pt2b", "pt2t")) # only 639-2/B or 639-2/T
True
```

## Speed

`iso639-lang` loads its mappings into memory to process calls much [faster](https://github.com/LBeaudoux/benchmark-iso639) than libraries that rely on an embedded database.
`iso639-lang` loads its mappings into memory to process calls much [faster](https://github.com/LBeaudoux/benchmark-iso639) than Python libraries that rely on an embedded database.


## Sources
Expand Down
3 changes: 2 additions & 1 deletion iso639/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .iso639 import Lang, iter_langs
from .iso639 import Lang, iter_langs, is_language

__all__ = [
"Lang",
"iter_langs",
"is_language"
]
93 changes: 92 additions & 1 deletion iso639/iso639.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from operator import itemgetter
from typing import Dict, Iterator, List, Optional, Union
from typing import Dict, Iterator, List, Optional, Union, Set, Tuple
from functools import lru_cache

from .datafile import load_file
from .exceptions import DeprecatedLanguageValue, InvalidLanguageValue
Expand Down Expand Up @@ -359,3 +360,93 @@ def iter_langs() -> Iterator[Lang]:
sorted_lang_names = load_file("list_langs")

return (Lang(lang_name) for lang_name in sorted_lang_names)


@lru_cache
def _get_language_values(identifiers_or_names: Tuple[str]) -> Set[str]:
tags = set(identifiers_or_names)
all_tags = {"pt1", "pt2b", "pt2t", "pt3", "pt5", "name", "other_name"}
invalid_tags = tags - all_tags
if invalid_tags:
raise ValueError(
f"Invalid identifiers or names: {', '.join(invalid_tags)}. "
f"Valid options are: {', '.join(sorted(all_tags))}."
)
language_values = set()
if "other_name" in tags:
mapping = load_file("mapping_ref_name")
language_values.update(mapping.keys())
tags.remove("other_name")
if tags:
mapping = load_file("mapping_data")
for tag in tags:
language_values.update(mapping[tag].keys())
return language_values


def is_language(
value: str,
identifiers_or_names: Union[str, Tuple[str, ...]] = (
"pt1",
"pt2b",
"pt2t",
"pt3",
"pt5",
"name",
"other_name",
),
) -> bool:
"""Check if a given value corresponds to a valid ISO 639 language
identifier or name.
Parameters
----------
value : str
The language value to validate.
identifiers_or_names : Union[str, Tuple[str, ...]], optional
The ISO 639 identifiers or names to check against. Defaults to all
available identifiers and names.
Returns
-------
bool
True if the value is valid for the given identifiers and names, False
otherwise.
Raises
------
TypeError
When `identifiers_or_names` is not a tuple or a tuple of strings.
ValueError
When string(s) of `identifiers_or_names` are not 'pt1', 'pt2b', 'pt2t',
'pt3', 'pt5', 'name' or 'other_name'.
Examples
--------
>>> is_language("fr")
True
>>> is_language("French")
True
>>> is_language("fr", "pt3")
False
>>> is_language("fre", ("pt2b", "pt2t"))
True
"""
if isinstance(identifiers_or_names, str):
identifiers_or_names = (identifiers_or_names,)
elif isinstance(identifiers_or_names, (list, set)) and all(
isinstance(s, str) for s in identifiers_or_names
):
identifiers_or_names = tuple(identifiers_or_names)
elif not isinstance(identifiers_or_names, tuple):
raise TypeError(
"'identifiers_or_names' must be a string or an iterable of "
f"strings, got {type(identifiers_or_names).__name__}."
)
elif not all(isinstance(s, str) for s in identifiers_or_names):
all_types = (type(v).__name__ for v in identifiers_or_names)
raise TypeError(
"'identifiers_or_names' must be a string or an iterable of "
f"strings, got tuple of {' and '.join(all_types)}.",
)
return value in _get_language_values(identifiers_or_names)
37 changes: 36 additions & 1 deletion tests/test_iso639.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from iso639 import Lang, iter_langs
from iso639 import Lang, iter_langs, is_language
from iso639.exceptions import InvalidLanguageValue


Expand Down Expand Up @@ -120,3 +120,38 @@ def test_iter_langs():
assert all(isinstance(lg, Lang) for lg in lgs)
assert lg1 == lgs[0]
assert len(set(lgs)) == len(lgs)


class TestChecker:

def test_valid_language(self):
assert is_language("fr") is True # 639-1
assert is_language("fra") is True # 639-3 and 639-2/T
assert is_language("fre") is True # 639-2/B
assert is_language("ber") is True # 639-5
assert is_language("French") is True # name
assert is_language("Chinese, Mandarin") is True # other name

def test_invalid_language(self):
assert is_language("xx") is False
assert is_language("xyz") is False
assert is_language("") is False

def test_valid_language_with_identifier(self):
assert is_language("fr", "pt1") is True
assert is_language("fre", ("pt2b", "pt2t")) is True
assert is_language("fra", ("pt2b", "pt2t")) is True

def test_invalid_language_with_identifier(self):
assert is_language("fr", "pt3") is False

def test_none_input(self):
assert is_language(None) is False

def test_wrong_indentifiers_or_names_type(self):
with pytest.raises(TypeError):
is_language("fr", 42)

def test_wrong_indentifiers_or_names_value(self):
with pytest.raises(ValueError):
is_language("fr", "foobar")

0 comments on commit 5e0d029

Please sign in to comment.