diff --git a/src/fst/morphology/root.lexc b/src/fst/morphology/root.lexc index 2281b41b..51f0674e 100644 --- a/src/fst/morphology/root.lexc +++ b/src/fst/morphology/root.lexc @@ -692,9 +692,6 @@ Multichar_Symbols @P.Stem.Single@ ! does not participate in compounding (used for words that would normally compound) @R.Stem.Single@ @D.Stem.Single@ ! more restrictive than @R.Part.One@ - @P.Stem.Final@ ! may be only the last part: alam+N, ülem+N, vana+N - @R.Stem.Final@ - @D.Stem.Final@ @P.Stem.Nom@ ! Even if this word is Sg Nom, smth may follow it @R.Stem.Nom@ ! Previous word should have been in Sg Nom @D.Stem.Nom@ ! Previous word should not have been in Sg Nom @@ -702,18 +699,6 @@ Multichar_Symbols @R.Stem.Verb@ @D.Stem.Verb@ -! @P.Stem.ise@ ! enese + anything -! @R.Stem.ise@ -! @D.Stem.ise@ -! @P.Stem.üle@ ! üle + geoname + line (üle+siberi+line) -! @R.Stem.üle@ -! @D.Stem.üle@ -! @P.Stem.vähe@ ! vähe, puht + adjective -! @R.Stem.vähe@ -! @D.Stem.vähe@ -! @P.Stem.topelt@ ! Adv topelt et al; Adv + anything -! @R.Stem.topelt@ -! @D.Stem.topelt@ @P.Stem.Guessed@ ! stem is guessed, it is not from the lexicon @R.Stem.Guessed@ @D.Stem.Guessed@ ! if a word has it, then this word cannot follow a guessed stem @@ -1105,8 +1090,8 @@ LEXICON Nonfirst ! D.Der means that the previous word should not be a derived one; ! it may be too restrictive here... - ! previous word should not be allowed only single, nor one that can be only final - @D.Stem.Single@@D.Stem.Final@@C.Cap@@D.Case.PlNom@@D.Der.N@@D.Der.A@@D.Der.Adv@ Latter ; ! a latter part + ! previous word should not be allowed only single + @D.Stem.Single@@C.Cap@@D.Case.PlNom@@D.Der.N@@D.Der.A@@D.Der.Adv@ Latter ; ! a latter part ! the nature of the previous word is reflected by flag diacritics that were set back then @@ -1354,32 +1339,29 @@ LEXICON LatterVerb ! a verb derivation LEXICON StartCompoundException - sama+Pron+Sg+Nom:sama NextPlainLatterAdjective ; ! samasuur - ise+Pron+Sg+Gen:enese NextLatterNoun ; ! eneseabi, enesekriitika etc - ise+Pron+Sg+Gen:enese NextPlainLatterAdjective ; ! enesepunane, eneseuhke + sama+Pron+Sg+Nom:sama NextPlainLatterAdjective "weight: 4 "; ! samasuur + ise+Pron+Sg+Gen:enese NextLatterNoun "weight: 4 "; ! eneseabi, enesekriitika etc + ise+Pron+Sg+Gen:enese NextPlainLatterAdjective "weight: 4 "; ! enesepunane, eneseuhke - igaüks+Pron+Sg+Gen#:iga#ühe NextLatterNoun ; ! igaühekriitika etc + igaüks+Pron+Sg+Gen#:iga#ühe NextLatterNoun "weight: 7 " ; ! igaühekriitika etc + mitu+Pron+Sg+Gen#mitme NextLatterNoun "weight: 5 " ; ! mitmepäevareis ! general pattern is Adverb + LatterAdjective_v ! thus we must add Adverb + PlainLatterAdjective, Adverb + LatterAdjective_ne - üle+Adv:üle NextLatterProperNounGeo "weight: 3 " ; ! ülesiberiline ? - üle+Adv:üle NextLatterNoun "weight: 3 " ; ! ületoodang etc - üle+Adv:üle NextPlainLatterAdjective "weight: 3 " ; ! ülesuur ??? - üle+Adv:üle NextLatterAdjective_ne "weight: 3 " ; ! üleaastaline vähe+Adv:vähe NextPlainLatterAdjective "weight: 5 " ; ! vähesuur vähe+Adv:vähe NextLatterAdjective_ne "weight: 5 " ; ! vähekompilatiivne; vähe + kalor +Der/line; not väheautone puht+Adv:puht NextPlainLatterAdjective "weight: 9 " ; ! puhtsuur puht+Adv:puht NextLatterAdjective_ne "weight: 9 " ; ! puhtkompilatiivne - otse+Adv:otse NextNounAdj "weight: 6 " ; püsti+Adv:p˘üsʲti NextNounAdj "weight: 6 " ; - järel+Adv:järel NextNounAdj "weight: 5 " ; koos+Adv:k˘oos NextNounAdj "weight: 4 " ; kõrval+Adv:kõrval NextNounAdj "weight: 5 " ; + läbi+Adv:läbi NextNounAdj "weight: 4 " ; ratsa+Adv:r˘atsa NextNounAdj "weight: 10 " ; - taga+Adv:taga NextNounAdj "weight: 5 " ; - topelt+Adv:topelt NextNounAdj "weight: 9 " ; ! topeltriiul, topelthallikas, ... + tagasi+Adv:tagasi NextLatterNoun "weight: 4 " ; + ümber+Adv:˘ümber NextLatterNoun "weight: 4 " ; + tasa+Adv:tasa NextNounAdj "weight: 7 " ; vallas+Adv:vallas NextNounAdj "weight: 11 " ; vastas+Adv:vastas NextNounAdj "weight: 8 " ; vastu+Adv:v˘astu NextNounAdj "weight: 4 " ; diff --git a/src/fst/morphology/stems/comparative_adjectives.lexc b/src/fst/morphology/stems/comparative_adjectives.lexc index 199bd489..c7dd4b8c 100644 --- a/src/fst/morphology/stems/comparative_adjectives.lexc +++ b/src/fst/morphology/stems/comparative_adjectives.lexc @@ -375,7 +375,7 @@ vapram+A+Comp:v˘apram MAGUS "weight: 11 " ; varasem+A+Comp:varasem MAGUS "weight: 6 " ; varem+A+Comp:varem MAGUS "weight: 5 " ; varjulisem+A+Comp:varjulisem MAGUS "weight: 11 " ; -vasem+A+Comp:vasem MAGUS "weight: 8 " ; +@P.Stem.Nom@vasem+A+Comp:@P.Stem.Nom@vasem MAGUS "weight: 8 " ; vastikum+A+Comp:vasʲtikum MAGUS "weight: 10 " ; vastupidavam+A+Comp:v˘astu#pidavam MAGUS "weight: 9 " ; vastutustundlikum+A+Comp:vastutus#t˘undlikum MAGUS "weight: 11 " ; diff --git a/src/fst/morphology/stems/nouns.lexc b/src/fst/morphology/stems/nouns.lexc index d8328eee..4f408867 100644 --- a/src/fst/morphology/stems/nouns.lexc +++ b/src/fst/morphology/stems/nouns.lexc @@ -921,7 +921,7 @@ alalisvool+N:alalis#v˘ool%>%{pl.e%} KOON "weight: 11 " ; alaltütlev+N:alalt#˘ütlev MAGUS "weight: 11 " ; alalõug+N:ala#l˘õuG1%>%{pl.u%} PIIM "weight: 10 " ; alalütlev+N:alal#˘ütlev MAGUS "weight: 11 " ; -@P.Stem.Final@@P.Stem.Nom@alam+N:@P.Stem.Final@@P.Stem.Nom@alam MAGUS "weight: 8 " ; +@P.Stem.Nom@alam+N:@P.Stem.Nom@alam MAGUS "weight: 8 " ; alamaadel+N:alam#˘aadel TOOBER "weight: 11 " ; alamalseisev+N:alama#ls˘eisE2v MAGUS "weight: 11 " ; alamik+N:ˈalam˘ik ELANIK "weight: 11 " ; @@ -17310,7 +17310,7 @@ liipjalg+N:l˘iip#j˘alG1%>%{pl.u%} JALG "weight: 11 " ; @D.Case.Nom@liir+N:@D.Case.Nom@l˘iir TAUD "weight: 9 " ; liisingufirma+N:liisʲingu#firma PERE "weight: 11 " ; liisk+N:l˘iisK1%>%{pl.e%}%{rare%} KOON "weight: 10 " ; -liist+N:l˘iist%>%{pl.e%} KOON "weight: 9 " ; +@P.Stem.Nom@liist+N:@P.Stem.Nom@l˘iist%>%{pl.e%} KOON "weight: 9 " ; @P.Stem.Nom@liistak+N:@P.Stem.Nom@liistak VIRSIK "weight: 11 " ; @P.Stem.Nom@liistik+N:@P.Stem.Nom@liistik VIRSIK "weight: 11 " ; liisualune+N:liisu#alune SOOLANE "weight: 11 " ; @@ -19591,7 +19591,7 @@ matuserahvas+N:matuse#r˘ahva KOTKAS "weight: 11 " ; matuserong+N:matuse#r˘ong TAUD "weight: 11 " ; matuserongkäik+N:matuse#r˘ong#k˘äik%>%{pl.e%} KOON "weight: 11 " ; @P.Stem.Nom@matusetalitus+N:@P.Stem.Nom@matuse#talitus OLULINE "weight: 10 " ; -matš+N:m˘atš TAUD "weight: 9 " ; +@P.Stem.Nom@matš+N:@P.Stem.Nom@m˘atš TAUD "weight: 9 " ; matšeete+N:matšeete PERE "weight: 11 " ; @P.Stem.Single@mauk+N:@P.Stem.Single@m˘auk%>%{pl.e%}%{rare%} KOON "weight: 11 " ; @P.Stem.Single@maur+N:@P.Stem.Single@m˘aur TAUD "weight: 11 " ; @@ -32761,7 +32761,7 @@ sürjalane+N:sürjalane OLULINE "weight: 11 " ; sürrealism+N:sürreal˘ism TAUD "weight: 10 " ; süsadminn+N:s˘üs#adm˘inn TAUD "weight: 11 " ; @P.Stem.Nom@süserään+N:@P.Stem.Nom@süser˘ään TAUD "weight: 11 " ; -süsi+N:süS1 MESI "weight: 8 " ; +@P.Stem.Nom@süsi+N:@P.Stem.Nom@süS1 MESI "weight: 8 " ; süsihappegaas+N:süsi#h˘appe#g˘aasʲ TAUD "weight: 9 " ; @P.Stem.Nom@süsik+N:@P.Stem.Nom@süsik VIRSIK "weight: 11 " ; süsikas+N:süsika LOHAKAS "weight: 11 " ; @@ -34109,7 +34109,7 @@ termokauter+N:termo#k˘auter TOOBER "weight: 11 " ; termomeeter+N:termo#m˘eeter TOOBER "weight: 10 " ; @P.Stem.Nom@termos+N:@P.Stem.Nom@termos SOOLANE "weight: 10 " ; termosnõu+N:termos#n˘õu KAVA "weight: 11 " ; -termostaat+N:termo#st˘aat TAUD "weight: 10 " ; +@P.Stem.Nom@termostaat+N:@P.Stem.Nom@termo#st˘aat TAUD "weight: 10 " ; ternes+N:t˘erne KOTKAS "weight: 11 " ; ternespiim+N:ternes#p˘iim%>%{pl.u%}%{rare%} PIIM "weight: 11 " ; terraarium+N:terr˘aarium MUUSEUM "weight: 11 " ; @@ -35249,7 +35249,7 @@ tuhamägi+N:tuha#mäG1%{stemill%} NIMI "weight: 11 " ; tuhandeline+N:tuhandeline OLULINE "weight: 11 " ; tuhandene+N:tuhandene OLULINE "weight: 11 " ; tuhandik+N:tˈuhand˘ik ELANIK "weight: 11 " ; -tuhar+N:tuhar MAGUS "weight: 10 " ; +@P.Stem.Nom@tuhar+N:@P.Stem.Nom@tuhar MAGUS "weight: 10 " ; tuhasool+N:tuha#s˘ool%>%{pl.i%} PIIM "weight: 11 " ; tuhastamine+N:tuhastamine OLULINE "weight: 11 " ; tuhastamisplats+N:tuhastamis#pl˘atʲs TAUD "weight: 11 " ; @@ -37418,7 +37418,7 @@ valvkond+N:v˘alvk˘onD1%>%{pl.i%} PÄEV "weight: 11 " ; @P.Stem.Nom@vamp+N:@P.Stem.Nom@v˘amp TAUD "weight: 11 " ; @P.Stem.Nom@vampiir+N:@P.Stem.Nom@vamp˘iir TAUD "weight: 10 " ; @P.Stem.Nom@vampiiritar+N:@P.Stem.Nom@vampiiritar TAUD "weight: 11 " ; -@P.Stem.Final@vana+N:@P.Stem.Final@vana%>%{pl.u%} KAVA "weight: 4 " ; +vana+N:vana%>%{pl.u%} KAVA "weight: 4 " ; vana-aasta+N:vana-˘aasta AASTA "weight: 11 " ; @P.Stem.Nom@vanaadium+N:@P.Stem.Nom@van˘aadium MUUSEUM "weight: 11 " ; vanaaegkond+N:vana#˘aegk˘onD1%>%{pl.i%} PÄEV "weight: 10 " ; @@ -40395,7 +40395,7 @@ zoroastrist+N:zoro#astr˘ist TAUD "weight: 11 " ; ülelend+N:üle#l˘enD1%>%{pl.e%} KOON "weight: 11 " ; @P.Stem.Nom@üleliigsus+N:@P.Stem.Nom@üle#l˘iigsus SUULINE "weight: 11 " ; ülelöök+N:üle#l˘öök TAUD "weight: 11 " ; -@P.Stem.Final@ülem+N:@P.Stem.Final@ülem MAGUS "weight: 7 " ; +ülem+N:ülem MAGUS "weight: 7 " ; ülemaadel+N:ülem#˘aadel TOOBER "weight: 11 " ; @P.Stem.Nom@ülemeelikus+N:@P.Stem.Nom@üle#meelikus OLULINE "weight: 11 " ; ülemik+N:ˈülem˘ik ELANIK "weight: 11 " ; diff --git a/src/fst/morphology/stems/prefixes.lexc b/src/fst/morphology/stems/prefixes.lexc index 6ca0cd13..20c2d167 100644 --- a/src/fst/morphology/stems/prefixes.lexc +++ b/src/fst/morphology/stems/prefixes.lexc @@ -13,7 +13,7 @@ anti+Pref:anʲti»- # ; argi+Pref:argi»- # ; astro+Pref:astro»- # ; bio+Pref:b˘io»- # ; -dendro+Pref:dendro»- # ; +dendro+Pref:d˘endro»- # ; detsi+Pref:detsi»- # ; digi+Pref:digi»- # ; eba+Pref:eba»- # ; @@ -46,6 +46,7 @@ isas+Pref:isas»- # ; isoleer+Pref:isoleer»- # ; iti+Pref:iti»- # ; jae+Pref:j˘ae»- # ; +järel+Pref:järel»- # ; kahend+Pref:kahend»- # ; kaksis+Pref:kaksis»- # ; kald+Pref:k˘ald»- # ; @@ -89,10 +90,12 @@ nuri+Pref:nuri»- # ; nõuka+Pref:nõuka»- # ; nöbi+Pref:nöbi»- # ; nüüdis+Pref:nüüdis»- # ; +otse+Pref:otse»- # ; paisk+Pref:p˘aisk»- # ; paleo+Pref:paleo»- # ; para+Pref:para»- # ; pikamaa+Pref:pika#m˘aa»- # ; +piki+Pref:piki»- # ; piko+Pref:piko»- # ; pisi+Pref:pisi»- # ; polaar+Pref:pol˘aar»- # ; @@ -102,10 +105,12 @@ pop+Pref:p˘op»- # ; pseudo+Pref:pseudo»- # ; psühho+Pref:psühho»- # ; puhke+Pref:p˘uhke»- # ; +puht+Pref:p˘uht»- # ; puhtavee+Pref:p˘uhta#v˘ee»- # ; põhi+Pref:põhi»- # ; +põiki+Pref:põiki»- # ; põlis+Pref:põlis»- # ; -pöörd+Pref:pöörd»- # ; +pöörd+Pref:p˘öörd»- # ; püst+Pref:p˘üst»- # ; raid+Pref:r˘aid»- # ; ripp+Pref:r˘ipp»- # ; @@ -120,11 +125,14 @@ sise+Pref:sise»- # ; sisi+Pref:sisi»- # ; supel+Pref:supel»- # ; süva+Pref:süva»- # ; +taas+Pref:t˘aas»- # ; +taga+Pref:taga»- # ; tehno+Pref:tehno»- # ; termo+Pref:termo»- # ; tetra+Pref:t˘etra»- # ; -toor+Pref:toor»- # ; +toor+Pref:t˘oor»- # ; tootmis+Pref:t˘ootmis»- # ; +topelt+Pref:topelt»- # ; turbo+Pref:turbo»- # ; umb+Pref:˘umb»- # ; vaeg+Pref:v˘aeg»- # ; @@ -141,5 +149,6 @@ vääris+Pref:vääris»- # ; ühis+Pref:ühis»- # ; üla+Pref:üla»- # ; üld+Pref:˘üld»- # ; +üle+Pref:üle»- # ; üli+Pref:üli»- # ; ürg+Pref:˘ürg»- # ; diff --git a/src/import/fsgt2final.sh b/src/import/fsgt2final.sh index 1f27222a..b75b59b6 100755 --- a/src/import/fsgt2final.sh +++ b/src/import/fsgt2final.sh @@ -182,6 +182,7 @@ cat comparative_adjectives.tmp1 \ | sed '/^noorem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^parem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^pahem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^vasem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^suurem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^vanem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^vähem+/s/^\([^:]*+A+Comp\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ @@ -559,6 +560,14 @@ cat fs_gt.inflecting.tmp1.tagged | grep '\(+N:\)\|\(+N+Usage\)' \ | sed '/^killustik+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^kapsas+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^nälg+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^liist+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^matš+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^prill+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^suum+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^süsi+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^termostaat+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^tsitrus+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ +| sed '/^tuhar+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^kameeleon+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ | sed '/^sari+N.*PIIM/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Nom@\1:@P.Stem.Nom@\2\3/' \ \ @@ -574,11 +583,6 @@ cat nouns.proto1 \ | sed -f nomstem_first_eit.sed \ \ | sed '/;.*mnocompound/s/^\([^:]*+N[^:]*\):\([^;]*;\)\(.*\)mnocompound/@P.Stem.Single@\1:@P.Stem.Single@\2\3/' \ -| sed '/^vana+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Final@\1:@P.Stem.Final@\2\3/' \ -| sed '/^alam+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Final@\1:@P.Stem.Final@\2\3/' \ -| sed '/@alam+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Final@\1:@P.Stem.Final@\2\3/' \ -| sed '/^ülem+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Final@\1:@P.Stem.Final@\2\3/' \ -| sed '/@ülem+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Final@\1:@P.Stem.Final@\2\3/' \ \ | sed '/^pee+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Single@\1:@P.Stem.Single@\2\3/' \ | sed '/^aar+/s/^\([^:]*+N\):\([^;]*;\)\(.*\)/@P.Stem.Single@\1:@P.Stem.Single@\2\3/' \