Skip to content

Commit

Permalink
updated entities based on user feedback
Browse files Browse the repository at this point in the history
- updated entities based on user feedback
 - increased version number to 1.1.15
  • Loading branch information
sedthh committed Jun 19, 2018
1 parent 8ab124c commit cdc3637
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
2 changes: 1 addition & 1 deletion lara/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Lara - Lingusitic Aim Recognizer API

__all__ = 'nlp','parser','stemmer','entities'
__version__ = '1.1.14'
__version__ = '1.1.15'
__version_info__ = tuple(int(num) for num in __version__.split('.'))

import sys
Expand Down
23 changes: 12 additions & 11 deletions lara/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,19 @@ def smalltalk():
return {
"user_love" : [{"stem":"szeretlek","exc":[{"stem":"nem"}]},{"stem":"szeretsz engem","exc":[{"stem":"nem"}]},{"stem":"tetszek neked","exc":[{"stem":"nem"}]},{"stem":"tetszel nekem","exc":[{"stem":"nem"}]},{"stem":"szerelmes.+?bel[eé]d","wordclass":"regex","exc":[{"stem":"nem"}]},{"stem":"bel[eé]d.+?(szeret|es)tem","wordclass":"regex"},{"stem":"tal([aá]lko|i)z+(hat)?(unk|n[aá]nk)","wordclass":"regex"},{"stem":"([oö]le|karo)[lj]j([aáeé]l)?\s([aá]t|meg|bel[eé]m)","wordclass":"regex"},{"stem":"(meg|[aá]t|bel[eé]m)?([oö]lel|karol)(h[ae]t)?(sz|n[aáeé]l|j)","wordclass":"regex"},{"stem":"(meg)?(cs[oó]kol|puszil)(j([aá]l)?\smeg|sz|hat(sz|n[aá]l)|[oó]z+(hat)?(unk|n[aáeé]n?k))","wordclass":"regex"},{"stem":"(ad|dob|k[uü]ld)([jn]([aáeé]l)?|e?sz)(\segy)?(\snagy)?\s(puszi(k[aá])?t|cs[oó]kot)","wordclass":"regex"},{"stem":"le(szel|n+[eé]l|gy[eé]l)\sa\s(bar[aá]t(om|n[oöő]m)|fi[uú]m|csajom|szerelmem|valent[ií]n\w+)","wordclass":"regex"},{"stem":"ismerkedn","prefix":["meg"],"affix":["i","ék"],"inc":[{"stem":"veled"}]},{"stem":"szeretem","inc":[{"stem":"önt"},{"stem":"magát"}],"exc":[{"stem":"nem"}]}],
"user_flirting" : [{"stem":"mi(lyen)?\s(ruha\s)?van\s?(most\s?)?rajtad","wordclass":"regex"},{"stem":"(meg)?(basz|dug)(unk|n[aá]lak|lak)","wordclass":"regex"},{"stem":"sz?exi?(e[lt]\w*)?","wordclass":"regex"},{"stem":"folyt(ogas+([aá]?[dl])?|s([aá]l)?\smeg)\s(a\snyakam(at)?\s)?(a\s|egy\s)?(d[oö]gl[oö]t+|halot+)\smacsk[aá]val","wordclass":"regex"},{"stem":"(le)?szop(sz|ol|(hat)?n[aá]l)","wordclass":"regex"},{"stem":"van barátod","max_words":4}],
"user_bored" : [{"stem":"un(atkoz)?(om|unk)","wordclass":"regex","exc":[{"stem":"nem"}]}],
"user_bored" : [{"stem":"un(atkoz)?(om|unk)","wordclass":"regex","exc":[{"stem":"nem"}]},{"stem":"szórakoztass"}],
"user_happy" : [{"stem":"j[oó]\s(a\s)?kedvem(\svan)?","wordclass":"regex","exc":[{"stem":"nincs"},{"stem":"nem"}]},{"stem":"jól vagyok","exc":[{"stem":"nincs"},{"stem":"nem"}]},{"stem":"boldog","exc":[{"stem":"(sz[uü]l(i|t[eé]s\w*)|[uü]n+ep\w*|kar[aá]csony\w*|[eé]vfordul\w|([uú]j)?[eé]v\w*|h[uú]sv[eé]t\w*|n[eé]v\s?nap\w*|[ns]em)","wordclass":"regex"}]}],
"user_sad" : [{"stem":"j[oó]\s(a\s)?kedvem","wordclass":"regex","inc":[{"stem":"nincs"},{"stem":"nem"}]},{"stem":"szomorú","wordclass":"adjective","inc":[{"stem":"vagyok"}]},{"stem":"nem\s+(vagyok|[eé]rzem).+?j[oó]l","wordclass":"regex"}],
"user_angry_at_you": [{"stem":"ne\s((h[uú]z+|bas+z|d[uü]h[ií])\w*\s?fel|idege(s[ií]ts|lj([eé]l)?\s?(ki)?))","wordclass":"regex"},{"stem":"(ideges|m[eé]rges|d[uü]h[oö]s)\s(vagyok|voltam)","wordclass":"regex"},{"stem":"haragszom","exc":[{"stem":"nem"}]},{"stem":"(mi([eé]r)?t?\s)?nem\s(hal+|[eé]rt)([ae]sz|[eo]d)","wordclass":"regex"},{"stem":"nem?\sbesz[eé]l(j|het(sz)?)\s[ií]gy","wordclass":"regex"},{"stem":"megbántott","affix":["ál"]},{"stem":"ez nem volt szép"}],
"user_sick" : [{"stem":"((beteg|ros+zul)\s(vagyok|[eé]rzem)|(meg|le)betegedtem|nem\s[eé]rzem\s(magam(at)?\sj[oó]l|j[oó]l\smagam(at)?)|nem\svagyok\s(t[uú]l\s)?j[oó]l)","wordclass":"regex"}],
"user_angry_at_you": [{"stem":"ne\s((h[uú]z+|bas+z|d[uü]h[ií])\w*\s?fel|idege(s[ií]ts|lj([eé]l)?\s?(ki)?))","wordclass":"regex"},{"stem":"(ideges|m[eé]rges|d[uü]h[oö]s)\s(vagyok|voltam)","wordclass":"regex"},{"stem":"haragszom","exc":[{"stem":"nem"}]},{"stem":"(mi([eé]r)?t?\s)?nem\s(hal+|[eé]rt)([ae]sz|[eo]d)","wordclass":"regex"},{"stem":"nem?\sbesz[eé]l(j|het(sz)?)\s[ií]gy","wordclass":"regex"},{"stem":"megbántott","affix":["ál"]},{"stem":"ez nem volt szép"},{"stem":"buta vagy"}],
"user_forgiving_you": [{"stem":"meg\s?(van\s)?bocs[aá]l?j?t(o(t+a)?[km]|va)","wordclass":"regex"},{"stem":"(nem|dehogy)\sharagszo[km]","wordclass":"regex"},{"stem":"(sem+i|[ns]i[nc]+s)\s?(baj|gond)","wordclass":"regex"}],
"user_sorry" : [{"stem":"meg\s?(tud(sz|n[aá]l)\s)?bocs[aá]l?j?ta?(ni|sz|od|t*ot+ad)","wordclass":"regex"},{"stem":"ne haragudj"},{"stem":"bocsáss meg","exc":[{"stem":"bocs[aá]s+\s?meg\,?\s?\w+","wordclass":"regex"}]},{"stem":"sajnálom", "exc":[{"stem":"sajn[aá]lom\,?\s?\w+","wordclass":"regex"}]},{"stem":"megbántottalak","inc":[{"stem":"ha"},{"stem":"hogy"}]}],
"user_sorry" : [{"stem":"meg\s?(tud(sz|n[aá]l)\s)?bocs[aá]l?j?ta?(ni|sz|od|t*ot+ad)","wordclass":"regex"},{"stem":"ne haragudj"},{"stem":"bocsáss meg","exc":[{"stem":"bocs[aá]s+\s?meg\,?\s?\w+","wordclass":"regex"}]},{"stem":"sajnálom", "exc":[{"stem":"sajn[aá]lom\,?\s?\w+","wordclass":"regex"}]},{"stem":"megbántottalak","inc":[{"stem":"ha"},{"stem":"hogy"}]},{"stem":"megs[eé]rt[eoöő][dt]+[eé]l\w*","wordclass":"regex"}],
"user_friend" : [{"stem":"(lesz(e[kl]|[uü]nk)|legy[uü]nk|len+[eé][kl]|lehet([uü]nk|n[eé]n?k))\s(az?\s)?(egyik\s|legjob+\s|k[eé]pzele?t(beli)?\s)?([oö]r[oöi]k?[\s\-]?)?(bar[aá]to|bari|havero|spano)[dkm]","wordclass":"regex"},{"stem":"(bar[aá]to[km]|havero[km])\svagy(unk)?","wordclass":"regex"},{"stem":"te\svagy\sa.+?bar[aá]tom","wordclass":"regex"},{"stem":"gyönyörű barátság","affix":["unk"],"inc":[{"stem":"kezdete"}]}],
"user_back" : [{"stem":"(vis+za|meg|haza)\s?(is\s)?(j[oö]t+|t?[eé]rt|[eé]rkezt)(em|[uü]nk)","wordclass":"regex","exc":[{"stem":"meg[eé]rt\w*","wordclass":"regex"}]},{"stem":"[io]t+(hon)?\s(is\s)?vagy(ok|unk)","wordclass":"regex"}],
"user_hungry" : [{"stem":"([eé]he[ns]\s?(vagyok|halok)|en+[eé]k\s(most|egy|valamit?)|(meg)?tudn[eé]k\s(most\s)?en+i)","wordclass":"regex"}],
"user_thirsty" : [{"stem":"(szomja[ns]\s?(vagyok|halok)|in+[eé]k\s(most|egy|valamit?)|(meg)?tudn[eé]k\s(most\s)?in+i)","wordclass":"regex"}],
"how_are_you" : [{"stem":"hogy vagy"},{"stem":"j[oó](l|b+an)\svagy","wordclass":"regex"},{"stem":"(j[oó]l|hogy)\s[eé]rzed\s(most\s)?magad(at)?","wordclass":"regex"},{"stem":"mizu","affix":["js","jság"]},{"stem":"hogy ityeg"},{"stem":"(hogy\stelt\sa|milyen(\svolt\sa)?)\snapod(\svan)?","wordclass":"regex"},{"stem":"[vw]+h*[aá]+[csz]+[aáu]+p+","wordclass":"regex"},{"stem":"(j[oó]|milyen)\s(a\s)?kedved(\svan)?","wordclass":"regex"},{"stem":"mi\sa(z\s[aá]bra|\sst[aá]jsz)","wordclass":"regex"},{"stem":"hogy\s[eé]rz(i|ed)\smag[aá][dt]","wordclass":"regex"}],
"about_name" : [{"stem":"(mond*(ja)?\ski|mi\sa)\s(bece)nev[eé][dt](et)?","wordclass":"regex"},{"stem":"(hogy(an)?|minek)\s(is\s)?(h[ií]v([jn][aá](la)?k|hatom)|nevez+(nek|elek))","wordclass":"regex"},{"stem":"(mi?[eé]rt\s|hogy[\s\-]?hogy\s)(let+\s)?(pont\s)?(ezt?\s(let+\s)?(a\s)?|[ií]gy\s|ilyen\s)(nevez[nt]ek|h[ií]v[nt]ak|neved|nevet\s(kapt[aá][dl]|adt[aá]k))","wordclass":"regex"},{"stem":"mi\sa\s(bece)?neved?","wordclass":"regex","exc":[{"stem":"az|[ae]n+[ae]k|amiben?|amelyik\w*","wordclass":"regex"}]}],
"how_are_you" : [{"stem":"hogy vagy"},{"stem":"j[oó](l|b+an)\svagy","wordclass":"regex"},{"stem":"(j[oó]l|hogy)\s[eé]rzed\s(most\s)?magad(at)?","wordclass":"regex"},{"stem":"mizu","affix":["js","jság"]},{"stem":"hogy ityeg"},{"stem":"(hogy\stelt\sa|milyen(\svolt\sa)?)\snapod(\svan)?","wordclass":"regex"},{"stem":"[vw]+h*[aá]+[csz]+[aáu]+p+","wordclass":"regex"},{"stem":"(j[oó]|milyen)\s(a\s)?kedved(\svan)?","wordclass":"regex"},{"stem":"mi\sa(z\s[aá]bra|\sst[aá]jsz)","wordclass":"regex"},{"stem":"hogy\s[eé]rz(i|ed)\smag[aá][dt]","wordclass":"regex"},{"stem":"mi a","inc":[{"stem":"helyzet"},{"stem":"stájsz"}]},{"stem":"mit csinálsz","max_words":3},{"stem":"mi a stájsz"},{"stem":"hogy ityeg"}],
"about_name" : [{"stem":"(mond*(ja)?\ski|mi\sa)\s(bece)nev[eé][dt](et)?","wordclass":"regex"},{"stem":"(hogy(an)?|minek)\s(is\s)?(h[ií]v([jn][aá](la)?k|hatom)|nevez+(nek|elek))","wordclass":"regex","exc":[{"stem":"engem"},{"stem":"én"}]},{"stem":"(mi?[eé]rt\s|hogy[\s\-]?hogy\s)(let+\s)?(pont\s)?(ezt?\s(let+\s)?(a\s)?|[ií]gy\s|ilyen\s)(nevez[nt]ek|h[ií]v[nt]ak|neved|nevet\s(kapt[aá][dl]|adt[aá]k))","wordclass":"regex"},{"stem":"mi\sa\s(bece)?neved?","wordclass":"regex","exc":[{"stem":"az|[ae]n+[ae]k|amiben?|amelyik\w*","wordclass":"regex"},{"stem":"engem"},{"stem":"én"}]},{"stem":"n[eé]v(ed)?\seredete","wordclass":"regex"}],
"about_you" : [{"stem":"(mes[eé]lj|besz[eé]lj|mondj)([eo]n)?.+?mag(ad|[aá])r[oó]l","wordclass":"regex"},{"stem":"mutatkoz+([aá]l|on)?\s+be","wordclass":"regex"},{"stem":"(be)?muta(koz(hat)?n[aá]l|(tn[aá]d|sd)\s.+?magad(at)?)","wordclass":"regex"},{"stem":"([km]i(\s|\sa\s.+?)vagy te|te [km]i(\s|\sa\s.+?)vagy)","wordclass":"regex"}],
"about_creator" : [{"stem":"(ki|hogy(an)?)\s(a\s)?(k[eé]sz([ií]t([oöő]d|et+(ek)?)|[uü]lt([eé]l)?)|gazd[aá]d|programoz([oó]d|ot+|tak)|[ií]rt[aá]k?|(hoz(ot+|tak)|j[oö]t+[eé]l).+?(l[eé]tre|vil[aá]gra|k[oó]dod(at)?)|alkot([oó][dt]+|tak)|teremt(et+|[oöő]d)|(keresztelt|nevezet+|adtak)\sel|adot+\s(neked\s)?nevet)","wordclass":"regex"}],
"about_look" : [{"stem":"hogy(an)?\s(n[eé]zn?[eé]l\ski|mutatsz|festesz)","wordclass":"regex"},{"stem":"(k[uü]ldj|mutas+).+?(k[eé]pet|fot[oó]t|sz?elfie?t)\smagadr[oó]l","wordclass":"regex"},{"stem":"(k[uü]ldj|mutas+)\smagadr[oó]l.+?(k[eé]pet|fot[oó]t|sz?elfie?t)","wordclass":"regex"},{"stem":"(van|milyen)\s(az?\s)?(arcod|kin[eé]zeted)","wordclass":"regex"},{"stem":"szép vagy"}],
Expand All @@ -127,8 +128,8 @@ def smalltalk():
"about_location": [{"stem":"(hol|helyen)\s(k[eé]sz[uü]lt[eé]l|k[eé]sz[ií]tet+ek|sz[uü]let+[eé]l|(hoztak|j[oö]t+[eé]l).+?l[eé]tre)","wordclass":"regex"},{"stem":"hon+an\s(sz[aá]rmazol|[ií]rsz|val[oó]\svagy)","wordclass":"regex"},{"stem":"ho(n+an|l)\svagy\s(most\s)?(helyileg|most|pontosan)","wordclass":"regex"},{"stem":"(hol\s|mer+e\s)(laksz|(van|az?).+?ot+honod)","wordclass":"regex"},{"stem":"hol vagy","max_words":3}],
"about_family" : [{"stem":"ki(k|t|ket)?\s(az?\s|tartasz\sa\s)?(te\s)?(csal[aá]dod(nak)?|sz[uü]l(t|et+[eé]l)|sz[uü]leid(nek)?|([eé]des)?(any(uk)?[aá]d|ap(uk)?[aá]d)(nak)?)","wordclass":"regex"},{"stem":"csal[aá]dban\s([eé]l(sz|tek)|sz[uü]let+[eé]l)","wordclass":"regex"},{"stem":"(h[aá]ny|van(nak)?)\stestv[eé]rei?d","wordclass":"regex"},{"stem":"(kik?|van(n?ak)?[\-\s]?e?)(\sa)?(\shoz+[aá]d?\s?tartoz[oó]i?d|csal[aá]dod)","wordclass":"regex"}],
"about_software": [{"stem":"(hogy(hogy|an)?|mit[oöő]l).+?(m[uüű]k[oö]dsz|(tudsz |vagy k[eé]pes )?(meg)?[eé]rte(sz|d|ni)\,? (meg )?(hogy )?(a?mit mond(ok|tam)|a?mit [ií]r(ok|tam)|engem))","wordclass":"regex"},{"stem":"mi(jen|lyen|en|\s?f[eé]le|\s?fajta)\sfekete\s?m[aá]gia","wordclass":"regex"},{"stem":"neur[aá]lis\sh[aá]l[oó]\w*","wordclass":"regex","inc":[{"stem":"vagy"},{"stem":"te"},{"stem":"működ","wordclass":"verb"}]}],
"about_skills" : [{"stem":"mi(lyen|(ke)?t|k?re)\s(funkci[oó](id?|kat)\s|dolgok(at|ra)\s|tr[uü]k+([oö]k(et|re)|jeid?)\s|parancsok(at|ra)\s)?(tud(sz|n[aá]l)?\s(csin[aá]lni|mutatni)?|ismer(sz)?|(vagy\s|van\s)?(k[eé]pes|(be|meg)?tan[ií]tva)|tan[ií]tot+[aá]k\s(be|neked|meg)?|(k[eé]pes+[eé]gei?d?|tulajdons[aá]g(o|ai)d?)\svan(nak)?)","wordclass":"regex","exc":[{"stem":"mond","wordclass":"verb"}]},{"stem":"mihez ért","affix":["esz"]},{"stem":"mi((ke)?t|k?r[oöő]l)\s(lehet\s|szabad\s|tudok\s)?k[eé]rdez+h?e\w+","wordclass":"regex"}],
"about_topics" : [{"stem":"mir[oöő]l\s.*?besz[eé]lge[st]\w+","wordclass":"regex"},{"stem":"milyen\st[eé]m[aá][bk]+a[nt]","wordclass":"regex"}],
"about_skills" : [{"stem":"mi(lyen|(ke)?t|k?re)\s(funkci[oó](id?|kat)\s|dolgok(at|ra)\s|tr[uü]k+([oö]k(et|re)|jeid?)\s|parancsok(at|ra)\s)?(tud(sz|n[aá]l)?\s(csin[aá]lni|mutatni)?|ismer(sz)?|(vagy\s|van\s)?(k[eé]pes|(be|meg)?tan[ií]tva)|tan[ií]tot+[aá]k\s(be|neked|meg)?|(k[eé]pes+[eé]gei?d?|tulajdons[aá]g(o|ai)d?)\svan(nak)?)","wordclass":"regex","exc":[{"stem":"mond","wordclass":"verb"}]},{"stem":"mihez ért","affix":["esz"]},{"stem":"mi((ke)?t|k?r[oöő]l)\s(lehet\s|szabad\s|tudok\s)?k[eé]rdez+h?e\w+","wordclass":"regex"},{"stem":"miben tudsz"},{"stem":"k[eé]rdez+(het)?(ek|ni)\st[oöő]led","wordclass":"regex"}],
"about_topics" : [{"stem":"mir[oöő]l\s.*?besz[eé]lge[st]\w+","wordclass":"regex"},{"stem":"milyen\st[eé]m[aá][bk]*r?[aoó][lnt]","wordclass":"regex"}],
"about_thoughts": [{"stem":"mi(n|re)?\s(gondol(kodsz|ko[dz]ol|sz)|agyalsz|t[oö]prenge?sz|j[aá]r\s(az?\s)?(fejed|agyad)(b[ae]n?)?)","wordclass":"regex"}],
"about_favorite": [{"stem":"melyik","inc":[{"stem":"kedvenc","affix":["ed"]},{"stem":"szeret","affix":["i","ed"],"match_stem":False}]}],
"are_you_conscious": [{"stem":"(([oö]n)?tudat|akarat|l[eé]le?ke?)\w*","wordclass":"regex","inc":[{"stem":"van"},{"stem":"ébred","wordclass":"verb","prefix":[]},{"stem":"szabad"}]}],
Expand All @@ -137,9 +138,9 @@ def smalltalk():
"are_you_thirsty": [{"stem":"kérsz","inc":[{"stem":"inni"}]},{"stem":"nem vagy szomjas"},{"stem":"szomjas vagy"},{"stem":"(nem\s)?i(n+[aá]|szo)l?\s(meg\s)?(most\s)?(velem\s)?valamit?","wordclass":"regex"}],
"are_you_busy" : [{"stem":"elfoglalt","inc":[{"stem":"vagy"}]},{"stem":"r[aá]m?\s?[eé]r(n[eé]l|sz)(\smost)?(\segy)?(\skicsit|\skis\s\w+|\svalamen+yi\w*)?","wordclass":"regex"},{"stem":"(van|volna)\s(most\s)?(r[aá]m?\s)?(most\s)?(egy\s)?(kis\s|kev[eé]s\s|valamen+yi\s)?(szabad\s?)?id[oöő]d(\sr[aá]m)?","wordclass":"regex"},{"stem":"sok dolgod van"}],
"are_you_lying" : [{"stem":"hazud","wordclass":"verb"},{"stem":"nem mondt[aá][dl]\s((el|meg)\saz\s)?igaz(at|s[aá]got)","wordclass":"regex"}],
"are_you_serious": [{"stem":"(nem?|csak)\s(vic+el(sz|j)?|mond+(od|ja)?|ideges[ií]ts(en)?)","wordclass":"regex"},{"stem":"(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad?)|hisz(i|ed)|hit+ed?)","wordclass":"regex"},{"stem":"biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mond(ta|o)d|mond[jt]a)","wordclass":"regex"},{"stem":"ezt?\s(most\s)?komoly(an)?","wordclass":"regex"}],
"are_you_serious": [{"stem":"(nem?|csak)\s(vic+el(sz|j)?|mond+(od|ja)?|ideges[ií]ts(en)?)","wordclass":"regex"},{"stem":"(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad?)|hisz(i|ed)|hit+ed?)","wordclass":"regex"},{"stem":"biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mond(ta|o)d|mond[jt]a)","wordclass":"regex"},{"stem":"ezt?\s(most\s)?komoly(an)?","wordclass":"regex"},{"stem":"viccelsz","max_words":1}],
"can_you_hear_me": [{"stem":"(olvas+a|hal+ja|n[eé]zi|van\sit+)(\sezt)?\s(vala|b[aá]r)ki(\sis)?","wordclass":"regex"},{"stem":"(hal+(asz|od)|l[aá]t(sz|od)|vesze[ld])\s(engem|a?mit\s(mondok|[ií]rok|k[eé]rdezek))","wordclass":"regex"},{"stem":"valaki\s(hal+(ja)?\s|olvas+a|figyeli?(\sar+a)?)\sa?mit\s(ide\s?|it+\s)?([ií]rok|mondok|k[eé]rdezek)","wordclass":"regex"},{"stem":"felfogtad","max_words":3},{"stem":"itt","inc":[{"stem":"vagy"},{"stem":"van"}],"max_words":3},{"stem":"halló","max_words":3},{"stem":"hallasz","max_words":3},{"stem":"mikrofon próba"}],
"can_you_learn": [{"stem":"(k[eé]pes(\svagy)?|tud(sz)?)\stanulni","wordclass":"regex"},{"stem":"tanulsz\s(is|[ae].+?b[oóöő]l)","wordclass":"regex"},{"stem":"[dln][aáeéo][km]\s(be|meg)?tan[ií]tani\b","wordclass":"regex","boundary":False}],
"can_you_learn": [{"stem":"(k[eé]pes(\svagy)?|tud(sz)?)\stanulni","wordclass":"regex"},{"stem":"tanulsz\s(is|[ae].+?b[oóöő]l)","wordclass":"regex"},{"stem":"[dln][aáeéo][km]\s(be|meg)?tan[ií]tani\b","wordclass":"regex","boundary":False},{"stem":"(lehet|tudlak|tudom)\s(t[eé]ged|[oö]nt)?\stan[ií]tani","wordclass":"regex"}],
"can_you_understand_me":[{"stem":"(meg)?[eé]rt(e(d|sz|t+ed?)|i)\,?((\shogy)?\sa?mit\s([ií]r|mond)\w+|\smagyarul)","wordclass":"regex"}],
"contact" : [{"stem":"mi(lyen)?\s(.+?\s)?(e\-?mail\s?)?c[ií]me[dn]?","wordclass":"regex"},{"stem":"elérhetőség","wordclass":"noun"},{"stem":"elér","wordclass":"verb","inc":[{"stem":"önt"},{"stem":"téged"}]}],
"no_answer" : [{"stem":"válaszol","wordclass":"verb","prefix":[],"inc":[{"stem":"nem"}]},{"stem":"ír","wordclass":"verb","prefix":[],"inc":[{"stem":"nem"}]}],
Expand All @@ -166,10 +167,10 @@ def emoji():
# entities you want to ignore in search results or disallow in user inputs
def disallow():
return {
"obscene" : [{"stem":"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?","wordclass":"regex","exc":[{"stem":"megye"}]},{"stem":"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b","wordclass":"regex","boundary":False},{"stem":"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?","wordclass":"regex"}],
"obscene" : [{"stem":"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?","wordclass":"regex","exc":[{"stem":"megye"}]},{"stem":"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b","wordclass":"regex","boundary":False},{"stem":"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?","wordclass":"regex","exc":[{"stem":"hányadik","affix":["a","án","ai"]}]}],
"racist" : [{"stem":"(fek[aá]|nig+(er|a)|n[aá]ci|cig[oó]|cig[aá]n+y|gypsy|dzsip[oó]|zsidr?[ó])[aáeégklnmstv]*","wordclass":"regex","boundary":False}],
"erotic" : [{"stem":"(sz?ex|an[aá]l|[bv]agina|[bp][eé][np]isz?|creampie|cum|sperma?|fuck|homo(kos|sexu[aá]l(is)?)?|milf|bisexual|gay|dild[oó]|vibr[aá]tor|fel+atio|blow\s?job|whore|geci|pus{2}y|pics[aá]|pin[aá]|fasz|pis{2}|boner|dick(pic)?|x{3,}|hentai|catgirl|ec+hi|yaoi|loli|shot[aá]|\w*porn[oó]?(film)?)[aáeéioöőuüdgklmnprstvz]*","wordclass":"regex","boundary":False},{"stem":"maki verem"}],
"unpleasant" : [{"stem":"AIDS","wordclass":"noun"},{"stem":"HIV","ignorecase":False},{"stem":"Hitler","wordclass":"noun"},{"stem":"(Sz?t[aá]lin|Len+in)\w*","wordclass":"regex"},{"stem":"pedof[ií]l(i[aá])?[aokltv]*","wordclass":"regex"},{"stem":"(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*","wordclass":"regex"},{"stem":"mej?i?n\s?kamp+f+\w*","wordclass":"regex"},{"stem":"(any[aá]d|gy[oö]k[eé]r)\w*","wordclass":"regex"},{"stem":"nemz\w*","wordclass":"regex"}],
"unpleasant" : [{"stem":"AIDS","wordclass":"noun"},{"stem":"HIV","ignorecase":False},{"stem":"Hitler","wordclass":"noun"},{"stem":"(Sz?t[aá]lin|Len+in)\w*","wordclass":"regex"},{"stem":"pedof[ií]l(i[aá])?[aokltv]*","wordclass":"regex"},{"stem":"(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*","wordclass":"regex"},{"stem":"mej?i?n\s?kamp+f+\w*","wordclass":"regex"},{"stem":"(any[aá]d|gy[oö]k[eé]r)\w*","wordclass":"regex"},{"stem":"nemz\w*","wordclass":"regex"},{"stem":"kak[aái][abklnstv]*","wordclass":"regex"}],
}

# decide whether user is talking to you in a formal or informal way
Expand Down

0 comments on commit cdc3637

Please sign in to comment.