Skip to content

Commit

Permalink
updated adjective parsing
Browse files Browse the repository at this point in the history
- updated adjective parsing rules
- fixed formatting for monotype characters
- increased version number to 1.0.8
  • Loading branch information
sedthh committed May 3, 2018
1 parent 98eb9fb commit 43aeb77
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion lara/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Lara - Lingusitic Aim Recognizer API

__all__ = 'nlp','parser','stemmer','entities'
__version__ = '1.0.7'
__version__ = '1.0.8'
__version_info__ = tuple(int(num) for num in __version__.split('.'))

import sys
Expand Down
12 changes: 6 additions & 6 deletions lara/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
class Intents:

# STATIC REGULAR EXPRESSIONS
prefixes = r'(?:(?i)'+('|'.join(["abba","alá","át","be","bele","benn","el","ellen","elő","fel","föl","hátra","hozzá","ide","ki","körül","le","meg","mellé","neki","oda","össze","rá","szét","túl","utána","vissza"]))+')?'
typo_prefixes = r'(?:(?i)'+('|'.join(["aba","ala","at","be","bele","ble","ben","el","elen","eln","elo","fel","fol","hatra","htara","harta","hoza","hzoa","ide","ki","korul","kroul","kourl","le","meg","mele","mle","neki","nkei","oda","osze","ozse","ra","szet","sezt","tul","utana","uatna","utna","visza","vsiza","vizsa"]))+')?'
pattern_noun = r'(?i)a?i?n?(?:[aáeéioóöőuúü]?[djknmrst])?(?:[abjhkntv]?[aáeéioóöőuúü]?[lgkntz]?)?(?:[ae][kt])?'
prefixes = r'(?:(?i)'+('|'.join(["abba","alá","át","be","bele","benn","el","ellen","elő","fel","föl","hátra","hozzá","ide","ki","körül","le","meg","mellé","neki","oda","össze","rá","szét","túl","utána","vissza"]))+')?'
typo_prefixes = r'(?:(?i)'+('|'.join(["aba","ala","at","be","bele","ble","ben","el","elen","eln","elo","fel","fol","hatra","htara","harta","hoza","hzoa","ide","ki","korul","kroul","kourl","le","meg","mele","mle","neki","nkei","oda","osze","ozse","ra","szet","sezt","tul","utana","uatna","utna","visza","vsiza","vizsa"]))+')?'
pattern_noun = r'(?i)a?i?n?(?:[aáeéioóöőuúü]?[djknmrst])?(?:[abjhkntv]?[aáeéioóöőuúü]?[lgkntz]?)?(?:[ae][kt])?'
typo_pattern_noun = r'(?i)a?i?n?(?:[aeiou]?[djknmrst])?(?:[abjhkntv]?[aeiou]?[lgkntz]?)?(?:[ae][kt])?'
pattern_adj = r'(?i)(?:[aeoóöő]?s)?(?:[aáeéoó]?b{0,2})(?:[ae]?[nk])?(?:(?:[aáeéioóöőuúü]?[dklmnt])?(?:[aáeéioóöőuúü]?[klnt]?)?)'
typo_pattern_adj = r'(?i)(?:[aeo]?s)?(?:[aeo]?b?)(?:[ae]?[nk])?(?:(?:[aeiou]?[dklmnt])?(?:[aeiou]?[klnt]?)?)'
pattern_verb = r'(?i)(?:h[ae][st]+e?)?(?:j?[ae])?(?:[eaá]?s{0,2}e?d?|[aáeéo]tt)?(?:(?:[jntv]|[eo]?g[ae]t+)?(?:[aeioöuü]n?[dklmt]|n[aáeéi]k?|sz|[aái])?(?:t[aáeéou][dkmt]?(?:ok)?)?)?(?:(?:t[ae]t)?(?:h[ae]t(?:[jnt]?[aáeéou](?:[dkm]|t[eéo]k)?)?t*)|[aáeé]?z?ni)?'
pattern_adj = r'(?i)(?:[aeoóöő]?s)?(?:[aáeéoó]?b{0,2})(?:[ae]?[nk])?(?:j?[ae])?(?:(?:[aáeéioóöőuúü]?[dklmnt])?(?:[aáeéioóöőuúü]?[klnt]?)?)(?:s[aáeé]g[ae]?(?:i\w*)?)?'
typo_pattern_adj = r'(?i)(?:[aeo]?s)?(?:[aeo]?b?)(?:[ae]?[nk])?(?:j?[ae])?(?:(?:[aeiou]?[dklmnt])?(?:[aeiou]?[klnt]?)?)(?:s[ae]g[ae]?(?:i\w*)?)?'
pattern_verb = r'(?i)(?:h[ae][st]+e?)?(?:j?[ae])?(?:[eaá]?s{0,2}e?d?|[aáeéo]tt)?(?:(?:[jntv]|[eo]?g[ae]t+)?(?:[aeioöuü]n?[dklmt]|n[aáeéi]k?|sz|[aái])?(?:t[aáeéou][dkmt]?(?:ok)?)?)?(?:(?:t[ae]t)?(?:h[ae]t(?:[jnt]?[aáeéou](?:[dkm]|t[eéo]k)?)?t*)|[aáeé]?z?ni)?'
typo_pattern_verb = r'(?i)(?:h[ae][st]e?)?(?:j?[ae])?(?:[eaá]?s?e?d?|[aeo]t)?(?:(?:[jntv]|[eo]?g[ae]t)?(?:[aeiou]n?[dklmt]|n[aei]k?|sz|[ai])?(?:t[aeou][dkmt]?(?:ok)?)?)?(?:(?:t[ae]t)?(?:h[ae]t(?:[jnt]?[aeou](?:[dkm]|t[eo]k)?)?t?)|[ae]?z?ni)?'

##### CONSTRUCTOR #####
Expand Down

0 comments on commit 43aeb77

Please sign in to comment.