diff --git a/lara/__init__.py b/lara/__init__.py index 4139f1a..5302b8b 100644 --- a/lara/__init__.py +++ b/lara/__init__.py @@ -3,7 +3,7 @@ # Lara - Lingusitic Aim Recognizer API __all__ = 'nlp','parser','stemmer','entities' -__version__ = '1.0.7' +__version__ = '1.0.8' __version_info__ = tuple(int(num) for num in __version__.split('.')) import sys diff --git a/lara/parser.py b/lara/parser.py index 470879b..17e3a8c 100644 --- a/lara/parser.py +++ b/lara/parser.py @@ -8,13 +8,13 @@ class Intents: # STATIC REGULAR EXPRESSIONS - prefixes = r'(?:(?i)'+('|'.join(["abba","alá","át","be","bele","benn","el","ellen","elő","fel","föl","hátra","hozzá","ide","ki","körül","le","meg","mellé","neki","oda","össze","rá","szét","túl","utána","vissza"]))+')?' - typo_prefixes = r'(?:(?i)'+('|'.join(["aba","ala","at","be","bele","ble","ben","el","elen","eln","elo","fel","fol","hatra","htara","harta","hoza","hzoa","ide","ki","korul","kroul","kourl","le","meg","mele","mle","neki","nkei","oda","osze","ozse","ra","szet","sezt","tul","utana","uatna","utna","visza","vsiza","vizsa"]))+')?' - pattern_noun = r'(?i)a?i?n?(?:[aáeéioóöőuúü]?[djknmrst])?(?:[abjhkntv]?[aáeéioóöőuúü]?[lgkntz]?)?(?:[ae][kt])?' + prefixes = r'(?:(?i)'+('|'.join(["abba","alá","át","be","bele","benn","el","ellen","elő","fel","föl","hátra","hozzá","ide","ki","körül","le","meg","mellé","neki","oda","össze","rá","szét","túl","utána","vissza"]))+')?' + typo_prefixes = r'(?:(?i)'+('|'.join(["aba","ala","at","be","bele","ble","ben","el","elen","eln","elo","fel","fol","hatra","htara","harta","hoza","hzoa","ide","ki","korul","kroul","kourl","le","meg","mele","mle","neki","nkei","oda","osze","ozse","ra","szet","sezt","tul","utana","uatna","utna","visza","vsiza","vizsa"]))+')?' + pattern_noun = r'(?i)a?i?n?(?:[aáeéioóöőuúü]?[djknmrst])?(?:[abjhkntv]?[aáeéioóöőuúü]?[lgkntz]?)?(?:[ae][kt])?' typo_pattern_noun = r'(?i)a?i?n?(?:[aeiou]?[djknmrst])?(?:[abjhkntv]?[aeiou]?[lgkntz]?)?(?:[ae][kt])?' - pattern_adj = r'(?i)(?:[aeoóöő]?s)?(?:[aáeéoó]?b{0,2})(?:[ae]?[nk])?(?:(?:[aáeéioóöőuúü]?[dklmnt])?(?:[aáeéioóöőuúü]?[klnt]?)?)' - typo_pattern_adj = r'(?i)(?:[aeo]?s)?(?:[aeo]?b?)(?:[ae]?[nk])?(?:(?:[aeiou]?[dklmnt])?(?:[aeiou]?[klnt]?)?)' - pattern_verb = r'(?i)(?:h[ae][st]+e?)?(?:j?[ae])?(?:[eaá]?s{0,2}e?d?|[aáeéo]tt)?(?:(?:[jntv]|[eo]?g[ae]t+)?(?:[aeioöuü]n?[dklmt]|n[aáeéi]k?|sz|[aái])?(?:t[aáeéou][dkmt]?(?:ok)?)?)?(?:(?:t[ae]t)?(?:h[ae]t(?:[jnt]?[aáeéou](?:[dkm]|t[eéo]k)?)?t*)|[aáeé]?z?ni)?' + pattern_adj = r'(?i)(?:[aeoóöő]?s)?(?:[aáeéoó]?b{0,2})(?:[ae]?[nk])?(?:j?[ae])?(?:(?:[aáeéioóöőuúü]?[dklmnt])?(?:[aáeéioóöőuúü]?[klnt]?)?)(?:s[aáeé]g[ae]?(?:i\w*)?)?' + typo_pattern_adj = r'(?i)(?:[aeo]?s)?(?:[aeo]?b?)(?:[ae]?[nk])?(?:j?[ae])?(?:(?:[aeiou]?[dklmnt])?(?:[aeiou]?[klnt]?)?)(?:s[ae]g[ae]?(?:i\w*)?)?' + pattern_verb = r'(?i)(?:h[ae][st]+e?)?(?:j?[ae])?(?:[eaá]?s{0,2}e?d?|[aáeéo]tt)?(?:(?:[jntv]|[eo]?g[ae]t+)?(?:[aeioöuü]n?[dklmt]|n[aáeéi]k?|sz|[aái])?(?:t[aáeéou][dkmt]?(?:ok)?)?)?(?:(?:t[ae]t)?(?:h[ae]t(?:[jnt]?[aáeéou](?:[dkm]|t[eéo]k)?)?t*)|[aáeé]?z?ni)?' typo_pattern_verb = r'(?i)(?:h[ae][st]e?)?(?:j?[ae])?(?:[eaá]?s?e?d?|[aeo]t)?(?:(?:[jntv]|[eo]?g[ae]t)?(?:[aeiou]n?[dklmt]|n[aei]k?|sz|[ai])?(?:t[aeou][dkmt]?(?:ok)?)?)?(?:(?:t[ae]t)?(?:h[ae]t(?:[jnt]?[aeou](?:[dkm]|t[eo]k)?)?t?)|[ae]?z?ni)?' ##### CONSTRUCTOR #####