-
Notifications
You must be signed in to change notification settings - Fork 1
/
6-1-analysis-morphind-etc-editing.R
619 lines (562 loc) · 45 KB
/
6-1-analysis-morphind-etc-editing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
# script to edit morphind tag, wordclass, etc. from within the 6-analysis.R data
verbs <- verbs %>%
filter(!word_form %in% c("memperikan", "dipertim", "dipin",
"beradaan", # split from "keberadaan"
"berbedaan", # typo of "perbedaan"
"berbuatan", # typo of perbuatan
"berdayaan", # split from pemberdayaan (peN-/-an) and ketidakberdayaan (ke-/-an)
"berdin", # typo for berdiri
"bergan", # split typo for "bergantung"
"beredan", # typo for beredar
"bergen", # place name Bergen in Norway
"berdoa-menataptersenyum-berjalan", "berkai-kali", "berusahan",
"teras-stupa", "terdidik-tercerahkan", "termi-nologi",
"tergan-tung", "terlintas-linlas", "terburu-burumerealisasi",
"ber-gantung", "ber-henti", "berkai-kali", "ber-bicara",
"ber-lanjut", "ber-ubah-ubah", "ber-angsur-angsur",
"ber-ulang", "ber-tanya", "bermaian", "bergman", "berjiran",
"memperbaki", "terpen",
"berianjak-tanjak", "ber-temu", "ber-juang",
"beri-msiatif", "ber-kumpulnya", "berba-gai",
"berkedu-dukan", "berkelan-jutan", "ber-saing",
"ber-tumpu", "ber-tindak", "berkontri-busi",
"berlan", "berandan", "bersaman",
"bersenjata-kelihatan", "berpan", "berbelan-", "bermaian",
"diempat", "berain", "berari",
"berbagal", # typo for 'berbagai'? based on the concordance
"berbasi", # typo for 'berbasi'? based on the concordance
"berbah", # name of place
"berbas", # name of place
"berban" # split word for 'berbanding'
)) %>%
mutate(morphind = str_replace_all(morphind, "(?<=\\+)jerit\\<v\\>", "jerit<n>"),
morphind = replace(morphind, word_form=='menjerit-jerit', "meN+jerit<n>_VPA"),
root_pos_morphind = ifelse(root_morphind == "jerit", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "jerit",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)dagang\\<v\\>", "dagang<n>"),
root_pos_morphind = ifelse(root_morphind == "dagang", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "dagang",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)semi\\<f\\>", "semi<n>"),
root_pos_morphind = ifelse(root_morphind == "semi", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "semi",
str_replace(affix_morphind_wclass, "_f_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)laku\\<a\\>", "laku<n>"),
root_pos_morphind = ifelse(root_morphind == "laku" & word_form == "diperlaku-kan", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "laku" & word_form == "diperlaku-kan",
str_replace(affix_morphind_wclass, "_a_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)lebur\\<v\\>", "lebur<a>"),
root_pos_morphind = ifelse(root_morphind == "lebur", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "lebur",
str_replace(affix_morphind_wclass, "_v_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)heran\\<v\\>", "heran<a>"),
root_pos_morphind = ifelse(root_morphind == "heran", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "heran",
str_replace(affix_morphind_wclass, "_v_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)sehat\\<n\\>", "sehat<a>"),
root_pos_morphind = ifelse(root_morphind == "sehat", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "sehat",
str_replace(affix_morphind_wclass, "_n_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)libur\\<n\\>", "libur<v>"),
root_pos_morphind = ifelse(root_morphind == "libur", "v", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "libur",
str_replace(affix_morphind_wclass, "_n_", "_v_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)bagi\\<v\\>", "bagi<n>"),
morphind = str_replace_all(morphind, "(?<=\\+)silah?kan\\<v\\>", "sila<v>+kan"),
suff_morphind = replace(suff_morphind, root_morphind %in% c("silakan", "silahkan"), "kan"),
affix_morphind = ifelse(root_morphind %in% c("silakan", "silahkan"),
str_replace_all(affix_morphind, "_0$", "_kan"),
affix_morphind),
affix_morphind_wclass = ifelse(root_morphind %in% c("silakan", "silahkan"),
str_replace(affix_morphind_wclass, "_0$", "_kan"),
affix_morphind_wclass),
root_morphind = replace(root_morphind, root_morphind %in% c("silakan", "silahkan"), "sila"),
morphind = replace(morphind, word_form=='membagi-bagi', "meN+bagi<n>_VPA"),
morphind = replace(morphind, word_form=='terperangkap', "ter+perangkap<n>_VSP"),
morphind = replace(morphind, word_form=='terperangkapnya', "ter+perangkap<n>_VSP+dia<p>_PS3"),
morphind = replace(morphind, word_form=='diperangkap', "di+perangkap<n>_VSP"),
morphind = replace(morphind, word_form=='diperban', "di+perban<n>_VSP"),
morphind = str_replace_all(morphind, "(?<=\\+)acuh\\<a\\>", "acuh<v>"),
affix_morphind = replace(affix_morphind, word_form=='diperband', "di-_0"),
affix_morphind = replace(affix_morphind, word_form=='diperangkap', "di-_0"),
pref_morphind = replace(pref_morphind, word_form=='diperangkap', "di-"),
pref_morphind = replace(pref_morphind, word_form=='diperban', "di-"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form=='diperangkap', "di-_n_0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form=='diperban', "di-_n_0"),
root_morphind = replace(root_morphind,
word_form %in% c("diperban"), "perban"),
affix_morphind = replace(affix_morphind,
word_form %in% c("terperangkap", "terperangkapnya"),
"ter-_0"),
pref_morphind = replace(pref_morphind,
word_form %in% c("terperangkap", "terperangkapnya"),
"ter-"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form %in% c("terperangkap", "terperangkapnya"),
"ter-_n_0"),
root_morphind = replace(root_morphind,
word_form %in% c("terperangkap", "diperangkap",
"terperangkapnya"),
"perangkap"),
root_pos_morphind = replace(root_pos_morphind,
word_form %in% c("terperangkap", "diperangkap", "terperangkapnya"), "n"),
morphind = replace(morphind, word_form=='mendamprat', "meN+damprat<n>_VSA"),
root_morphind = replace(root_morphind, word_form=='mendamprat', "damprat"),
morphind = replace(morphind, word_form=='mencerocos', "meN+cerocos<pre-cat>_VSA"),
morphind = replace(morphind, word_form=='mendandani', "meN+dandan<v>+i_VSA"),
root_morphind = replace(root_morphind, word_form=='mendandani', "dandan"),
morphind = replace(morphind, word_form=='menepuk-nepuk', "meN+tepuk<v>_VPA"),
morphind = replace(morphind, word_form=='membagi-bagikan', "meN+bagi<n>+kan_VPA"),
root_pos_morphind = ifelse(root_morphind == "bagi", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "bagi",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = replace(morphind,
word_form %in% c("diperhatikan", "diper-hatikan"),
"di+per+hati<n>+kan_VSP"),
root_morphind = replace(root_morphind,
word_form %in% c("diperhatikan", "diper-hatikan"),
"hati"),
root_pos_morphind = replace(root_pos_morphind,
word_form %in% c("diperhatikan", "diper-hatikan"),
"n"),
pref_morphind = replace(pref_morphind,
word_form %in% c("diperhatikan", "diper-hatikan"), "di-+per-"),
suff_morphind = replace(suff_morphind,
word_form %in% c("diperhatikan", "diper-hatikan"), "kan"),
affix_morphind = replace(affix_morphind,
word_form %in% c("diperhatikan", "diper-hatikan"), "di-+per-_kan"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form %in% c("diperhatikan", "diper-hatikan"),
"di-+per-_n_kan"),
morphind = replace(morphind, word_form == "memerhatikannya", "meN+per+hati<n>+kan_VSA"),
pref_morphind = replace(pref_morphind, word_form == "memerhatikannya", "meN-+per-"),
suff_morphind = replace(suff_morphind, word_form == "memerhatikannya", "kan"),
affix_morphind = replace(affix_morphind, word_form == "memerhatikannya", "meN-+per-_kan"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form == "memerhatikannya", "meN-+per-_n_kan"),
morphind = replace(morphind, word_form == "diempati", "di+empati<n>_VSP"),
root_morphind = replace(root_morphind, word_form == "diempati", "empati"),
root_pos_morphind = replace(root_pos_morphind, word_form %in% c("diempati"), "n"),
suff_morphind = replace(suff_morphind, word_form %in% c("diempati"), "0"),
affix_morphind = replace(affix_morphind, word_form %in% c("diempati"), "di-_0"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form %in% c("diempati"),
"di-_n_0"),
morphind = replace(morphind, word_form %in% c("diperbaiki", "diperbaikii"), "di+per+baik<a>+i_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("diperbaiki", "diperbaikii"), "baik"),
root_pos_morphind = replace(root_pos_morphind, word_form %in% c("diperbaiki", "diperbaikii"), "a"),
pref_morphind = replace(pref_morphind, word_form %in% c("diperbaiki", "diperbaikii"), "di-+per-"),
suff_morphind = replace(suff_morphind, word_form %in% c("diperbaiki", "diperbaikii"), "i"),
affix_morphind = replace(affix_morphind, word_form %in% c("diperbaiki", "diperbaikii"), "di-+per-_i"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form %in% c("diperbaiki", "diperbaikii"),
"di-+per-_a_i"),
root_morphind = replace(root_morphind, root_morphind == "disain", "desain"),
morphind = if_else(str_detect(morphind, "(?<=\\+)disain(?=\\<n\\>)"),
str_replace(morphind, "(?<=\\+)disain(?=\\<n\\>)", "desain"),
morphind),
root_morphind = replace(root_morphind, root_morphind == "ijin", "izin"),
morphind = if_else(str_detect(morphind, "(?<=\\+)ijin(?=\\<n\\>)"),
str_replace(morphind, "(?<=\\+)ijin(?=\\<n\\>)", "izin"),
morphind),
morphind = if_else(root_morphind == "bahaya",
str_replace_all(morphind, "bahaya<a>", "bahaya<n>"),
morphind),
morphind = replace(morphind, word_form == "memasak", "meN+masak<v>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("memasak"), "masak"),
root_pos_morphind = replace(root_pos_morphind, word_form %in% c("memasak"), "v"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form == "memasak", "meN-_v_0"),
verb_tagged = replace(verb_tagged, word_form %in% c("terpenuhi", "terpenihi"), TRUE),
morphind = replace(morphind, word_form %in% c("terpenuhi", "terpenihi"), "ter+penuh<a>+i_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("terpenuhi", "terpenihi"), "penuh"),
pref_morphind = replace(pref_morphind, word_form %in% c("terpenuhi", "terpenihi"), "ter-"),
suff_morphind = replace(suff_morphind, word_form %in% c("terpenuhi", "terpenihi"), "i"),
affix_morphind = replace(affix_morphind, word_form %in% c("terpenuhi", "terpenihi"), "ter-_i"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("terpenuhi", "terpenihi"), "ter-_a_i"),
word_form = replace(word_form, word_form == "terpenihi", "terpenuhi"),
morphind = replace(morphind, word_form %in% c("terperam"), "ter+peram<pre-cat>_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("terperam"), "peram"),
root_pos_morphind = replace(root_pos_morphind, word_form == "terperam", "pre-cat"),
pref_morphind = replace(pref_morphind, word_form %in% c("terperam"), "ter-"),
suff_morphind = replace(suff_morphind, word_form %in% c("terperam"), "0"),
affix_morphind = replace(affix_morphind, word_form %in% c("terperam"), "ter-_0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("terperam"), "ter-_pre-cat_0"),
morphind = replace(morphind, word_form %in% c("terpencarkan"), "ter+pencar<v>+kan_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("terpencarkan"), "pencar"),
root_pos_morphind = replace(root_pos_morphind, word_form == "terpencarkan", "v"),
pref_morphind = replace(pref_morphind, word_form %in% c("terpencarkan"), "ter-"),
suff_morphind = replace(suff_morphind, word_form %in% c("terpencarkan"), "kan"),
affix_morphind = replace(affix_morphind, word_form %in% c("terpencarkan"), "ter-_kan"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("terpencarkan"), "ter-_v_kan"),
morphind = if_else(root_morphind == "esa",
str_replace_all(morphind, "\\<a\\>", "<c>"),
morphind),
root_pos_morphind = replace(root_pos_morphind,
root_morphind == "esa", "c"),
affix_morphind_wclass = if_else(root_morphind == "esa",
str_replace_all(affix_morphind_wclass,
"_num_", "_c_"),
affix_morphind_wclass),
morphind = replace(morphind, word_form %in% c("bergeliat-geliut"), "ber+geliat-geliut<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("bergeliat-geliut"), "geliat-geliut"),
morphind = replace(morphind, word_form %in% c("berkaca-mata"), "ber+kacamata<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berkaca-mata"), "kacamata"),
morphind = replace(morphind, word_form %in% c("bercocok-tanam"), "ber+cocok tanam<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("bercocok-tanam"), "cocok tanam"),
root_pos_morphind = replace(root_pos_morphind, word_form == "bercocok-tanam", "n"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("bercocok-tanam"), "ber-_n_0"),
morphind = replace(morphind, word_form %in% c("berdin-ding"), "ber+dinding<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berdin-ding"), "dinding"),
morphind = replace(morphind, word_form %in% c("berduka-kecewa"), "ber+duka kecewa<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berduka-kecewa"), "duka kecewa"),
morphind = replace(morphind, word_form %in% c("berdaya-guna"), "ber+daya guna<n>_VSA"),
morphind = str_replace_all(morphind, "dayaguna\\<n\\>", "daya guna<n>"),
root_morphind = replace(root_morphind, word_form %in% c("berdaya-guna"), "daya guna"),
root_morphind = replace(root_morphind, root_morphind %in% c("dayaguna"), "daya guna"),
morphind = replace(morphind, word_form %in% c("berdaya-cipta"), "ber+daya cipta<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berdaya-cipta"), "daya cipta"),
morphind = replace(morphind, word_form %in% c("berdaya-kuasa"), "ber+daya kuasa<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berdaya-kuasa"), "daya kuasa"),
morphind = replace(morphind, word_form %in% c("terperi"), "ter+peri<n>_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("terperi"), "peri"),
root_pos_morphind = replace(root_pos_morphind, word_form == "terperi", "n"),
pref_morphind = replace(pref_morphind, word_form %in% c("terperi"), "ter-"),
suff_morphind = replace(suff_morphind, word_form %in% c("terperi"), "0"),
affix_morphind = replace(affix_morphind, word_form %in% c("terperi"), "ter-_0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("terperi"), "ter-_n_0"),
morphind = replace(morphind, word_form %in% c("terperikan"), "ter+peri<n>+kan_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("terperikan"), "peri"),
root_pos_morphind = replace(root_pos_morphind, word_form == "terperikan", "n"),
pref_morphind = replace(pref_morphind, word_form %in% c("terperikan"), "ter-"),
suff_morphind = replace(suff_morphind, word_form %in% c("terperikan"), "kan"),
affix_morphind = replace(affix_morphind, word_form %in% c("terperikan"), "ter-_kan"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("terperikan"), "ter-_n_kan"),
root_morphind = replace(root_morphind, word_form == "beralih-rupa", "alih rupa"),
root_morphind = replace(root_morphind, word_form == "beralih-wujud", "alih wujud"),
root_morphind = replace(root_morphind, word_form == "beranak-cucu", "anak cucu"),
root_morphind = replace(root_morphind, word_form == "beranak-cucu", "anak pinak"),
root_morphind = replace(root_morphind, word_form == "berbaik-hati", "baik hati"),
root_morphind = replace(root_morphind, word_form == "bercita-rasa", "cita rasa"),
root_morphind = replace(root_morphind, root_morphind == "campuraduk", "campur aduk"),
root_morphind = replace(root_morphind, word_form == "bercampur-baur", "campur baur"),
morphind = replace(morphind, word_form == "bercampur-baur",
"ber+campur baur<v>_VSA"),
morphind = replace(morphind, word_form == "bercampur-aduk",
"ber+campur aduk<v>_VSA"),
morphind = replace(morphind, word_form == "tercampur-aduk",
"ter+campur aduk<v>_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("tercampur-aduk",
"bercampur-aduk"),
"campur aduk"),
morphind = if_else(root_morphind == "campur aduk",
str_replace_all(morphind, "campuraduk<[av]>", "campur aduk<v>"),
morphind),
root_pos_morphind = replace(root_pos_morphind,
root_morphind == "campur aduk" & root_pos_morphind == "a",
"v"),
affix_morphind_wclass = replace(affix_morphind_wclass,
root_morphind == "campur aduk"
& affix_morphind_wclass == "meN-_a_kan",
"meN-_v_kan"),
root_morphind = replace(root_morphind, morphind == "ber+aneka<a>_VSADASHragam<n>_NSD", "aneka ragam"),
root_pos_morphind = replace(root_pos_morphind, morphind == "ber+aneka<a>_VSADASHragam<n>_NSD", "n"),
affix_morphind = replace(affix_morphind, morphind == "ber+aneka<a>_VSADASHragam<n>_NSD", "ber-_n_0"),
root_morphind = replace(root_morphind, root_morphind == "anekaragam", "aneka ragam"),
root_morphind = replace(root_morphind, morphind == "ber+aneka<a>_VSADASHmacam<n>_NSD", "aneka macam"),
root_morphind = replace(root_morphind, morphind == "ber+aneka<a>_VSADASHrupa<n>_NSD", "aneka rupa"),
root_pos_morphind = replace(root_pos_morphind,
morphind %in% c("ber+aneka<a>_VSADASHmacam<n>_NSD",
"ber+aneka<a>_VSADASHrupa<n>_NSD"), "n"),
affix_morphind = replace(affix_morphind,
morphind %in% c("ber+aneka<a>_VSADASHmacam<n>_NSD",
"ber+aneka<a>_VSADASHrupa<n>_NSD"), "ber-_n_0"),
root_morphind = replace(root_morphind, root_morphind == "anekaragam", "aneka ragam"),
morphind = replace(morphind, word_form %in% c("berbasis-bank"), "ber+basis-bank<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berbasis-bank"), "basis-bank"),
# word_form = if_else(word_form %in% c("berbeda-bagi", "berbeda-dalam",
# "bercahaya-mungkin", "berburu-meramu",
# "bercerita-dengan", "bercerita-entah"),
# str_replace_all(word_form, "\\-[a-z]+$", ""),
# word_form),
morphind = replace(morphind, word_form %in% c("berazan"), "ber+azan<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berazan"), "azan"),
root_pos_morphind = replace(root_pos_morphind, word_form == "berazan", "n"),
suff_morphind = replace(suff_morphind, word_form %in% c("berazan"), "0"),
affix_morphind = replace(affix_morphind, word_form %in% c("berazan"), "ber-_0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("berazan"), "ber-_n_0"),
morphind = replace(morphind, word_form %in% c("berbasa-basi"), "ber+basa basi<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("basabasi"), "basa basi"),
morphind = replace(morphind, word_form %in% c("berbulan-madu"), "ber+bulan madu<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berbulan-madu"), "bulan madu"),
root_morphind = replace(root_morphind, root_morphind == "ceraiberai", "cerai berai"),
morphind = replace(morphind, word_form %in% c("terperinci"), "ter+per+rinci<a>_VSP"),
root_morphind = replace(root_morphind, word_form %in% c("terperinci"), "rinci"),
root_pos_morphind = replace(root_pos_morphind, word_form == "terperinci", "a"),
pref_morphind = replace(pref_morphind, word_form %in% c("terperinci"), "ter-+per-"),
suff_morphind = replace(suff_morphind, word_form %in% c("terperinci"), "0"),
affix_morphind = replace(affix_morphind, word_form %in% c("terperinci"), "ter-+per-_0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form %in% c("terperinci"), "ter-+per-_a_0"),
morphind = replace(morphind, word_form == "bertempik-sorak", "ber+tempik sorak<n>_VSA"),
root_morphind = replace(root_morphind, word_form == "bertempik-sorak", "tempik sorak"),
root_pos_morphind = replace(root_pos_morphind, word_form == "bertempik-sorak", "n"),
pref_morphind = replace(pref_morphind, word_form == "bertempik-sorak", "ber-"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form == "bertempik-sorak",
"ber-_n_0"),
affix_morphind = replace(affix_morphind, word_form == "bertempik-sorak", "ber-_0"),
morphind = replace(morphind, word_form == "diperbankan", "di+per+bank<n>+kan_VSP"),
root_morphind = replace(root_morphind, word_form == "diperbankan", "bank"),
morphind = replace(morphind, word_form == "berbesan", "ber+besan<n>_VSA"),
root_morphind = replace(root_morphind, word_form == "berbesan", "besan"),
root_pos_morphind = replace(root_pos_morphind, word_form == "berbesan", "n"),
pref_morphind = replace(pref_morphind, word_form == "berbesan", "ber-"),
suff_morphind = replace(suff_morphind, word_form == "berbesan", "0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form == "berbesan", "ber-_n_0"),
affix_morphind = replace(affix_morphind, word_form == "berbesan", "ber-_0"),
morphind = replace(morphind, word_form == "berkejap-kerjap", "ber+kejap kerjap<n>_VPA"),
root_morphind = replace(root_morphind, word_form == "berkejap-kerjap", "kejap kerjap"),
root_pos_morphind = replace(root_pos_morphind, word_form == "berkejap-kerjap", "n"),
pref_morphind = replace(pref_morphind, word_form == "berkejap-kerjap", "ber-"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form == "berkejap-kerjap", "ber-_n_0"),
affix_morphind = replace(affix_morphind, word_form == "berkejap-kerjap", "ber-_0"),
morphind = replace(morphind, word_form=='diperolah', "di+per+oleh<r>_VSP"),
morphind = replace(morphind, word_form=='memperolah', "meN+per+oleh<r>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("diperolah", "memperolah"), "oleh"),
root_pos_morphind = replace(root_pos_morphind, word_form %in% c("diperolah", "memperolah"), "r"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form=='diperolah', "di-+per-_r_0"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form=='memperolah', "meN-+per-_r_0"),
affix_morphind = replace(affix_morphind, word_form=='diperolah', "di-+per-_0"),
affix_morphind = replace(affix_morphind, word_form=='memperolah', "meN-+per-_r_0"),
morphind = if_else(word_form %in% c("meminta", "memintakan"),
str_replace_all(morphind, "pinta<n>", "minta<v>"),
morphind),
root_morphind = if_else(word_form %in% c("meminta", "memintakan"),
str_replace(root_morphind, "^pinta$", "minta"),
root_morphind),
root_pos_morphind = if_else(word_form %in% c("meminta", "memintakan"),
str_replace(root_pos_morphind, "^n$", "v"),
root_pos_morphind),
affix_morphind_wclass = if_else(word_form %in% c("meminta", "memintakan"),
str_replace(affix_morphind_wclass, "_n_", "_v_"),
affix_morphind_wclass),
morphind = if_else(word_form %in% c("mengecek"),
str_replace_all(morphind, "kecek<n>", "cek<v>"),
morphind),
root_morphind = if_else(word_form %in% c("mengecek"),
str_replace(root_morphind, "^kecek$", "cek"),
root_morphind),
root_pos_morphind = if_else(word_form %in% c("mengecek"),
str_replace(root_pos_morphind, "^n$", "v"),
root_pos_morphind),
affix_morphind_wclass = if_else(word_form %in% c("mengecek"),
str_replace(affix_morphind_wclass, "_n_", "_v_"),
affix_morphind_wclass),
morphind = if_else(word_form %in% c("mencek"),
str_replace_all(morphind, "cek<n>", "cek<v>"),
morphind),
root_pos_morphind = if_else(word_form %in% c("mencek"),
str_replace(root_pos_morphind, "^n$", "v"),
root_pos_morphind),
affix_morphind_wclass = if_else(word_form %in% c("mencek"),
str_replace(affix_morphind_wclass, "_n_", "_v_"),
affix_morphind_wclass),
morphind = if_else(word_form %in% c("menanyai", "menanyakan", "menanya"),
str_replace_all(morphind, "(?<=\\+)(nanya)(?=<v>)", "tanya"),
morphind),
root_morphind = if_else(word_form %in% c("menanyai", "menanyakan", "menanya"),
str_replace(root_morphind, "^nanya$", "tanya"),
root_morphind),
morphind = if_else(word_form %in% c("menerabas"),
str_replace_all(morphind, "(?<=\\+)(nerabas)(?=<v>)", "terabas"),
morphind),
root_morphind = if_else(word_form %in% c("menerabas"),
str_replace(root_morphind, "^nerabas$", "terabas"),
root_morphind),
morphind = if_else(root_morphind == "rasa", str_replace_all(morphind, "rasa<v>", "rasa<n>"), morphind),
root_pos_morphind = if_else(root_morphind == "rasa", "n", root_pos_morphind),
affix_morphind_wclass = if_else(root_morphind == "rasa",
str_replace_all(affix_morphind_wclass,
"_v_", "_n_"),
affix_morphind_wclass),
word_form = replace(word_form, word_form=="terangin", "diterangin"),
morphind = replace(morphind, word_form=="diterangin", "di+terang<a>+in_VSP"),
root_morphind = replace(root_morphind, word_form=="diterangin", "terang"),
root_pos_morphind = replace(root_pos_morphind, word_form=="diterangin", "a"),
pref_morphind = replace(pref_morphind, word_form=="diterangin", "di-"),
suff_morphind = replace(suff_morphind, word_form=="diterangin", "in"),
affix_morphind_wclass = replace(affix_morphind_wclass, word_form=="diterangin", "di-_a_in"),
affix_morphind = replace(affix_morphind, word_form=="diterangin", "di-_in"),
dbase = replace(dbase, word_form=="diterangin", "di"),
verb_tagged = replace(verb_tagged, word_form=="diterangin", TRUE),
morphind = replace(morphind, word_form=='berjual-beli', "ber+jualbeli<n>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("berjual-beli"), "jualbeli"),
root_pos_morphind = replace(root_pos_morphind, word_form %in% c("berjual-beli"), "n"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form=='berjual-beli', "ber-_n_0"),
morphind = replace(morphind, word_form=='memperinci', "meN+per+rinci<a>_VSA"),
root_morphind = replace(root_morphind, word_form %in% c("memperinci"), "rinci"),
root_pos_morphind = replace(root_pos_morphind, word_form %in% c("memperinci"), "a"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form=='memperinci', "meN-+per-_a_0"),
morphind = str_replace_all(morphind, "(?<=\\+)giat\\<n\\>", "giat<a>"),
root_pos_morphind = replace(root_pos_morphind, root_morphind == "giat", "a"),
affix_morphind_wclass = ifelse(root_morphind == "giat",
str_replace(affix_morphind_wclass, "_n_", "_a_"),
affix_morphind_wclass),
morphind = replace(morphind, word_form=='dipertanggung', "di+per+tanggungjawab<n>+kan_VSP"),
morphind = replace(morphind, word_form=='mempertanggung', "meN+per+tanggungjawab<n>+kan_VSA"),
root_morphind = replace(root_morphind,
word_form %in% c("dipertanggung", "mempertanggung"),
"tanggungjawab"),
suff_morphind = replace(suff_morphind,
word_form %in% c("dipertanggung", "mempertanggung"),
"kan"),
root_pos_morphind = replace(root_pos_morphind,
word_form %in% c("dipertanggung", "mempertanggung"), "n"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form=='dipertanggung', "di-+per-_n_kan"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form=='mempertanggung', "meN-+per-_n_kan"),
affix_morphind = replace(affix_morphind,
word_form=='dipertanggung', "di-+per-_kan"),
affix_morphind = replace(affix_morphind,
word_form=='mempertanggung', "meN-+per-_kan"),
morphind = replace(morphind, word_form=='diperjual', "di+per+jualbeli<n>+kan_VSP"),
morphind = replace(morphind, word_form=='memperjual', "meN+per+jualbeli<n>+kan_VSA"),
root_morphind = replace(root_morphind,
word_form %in% c("diperjual", "memperjual"),
"jualbeli"),
suff_morphind = replace(suff_morphind,
word_form %in% c("diperjual", "memperjual"),
"kan"),
root_pos_morphind = replace(root_pos_morphind,
word_form %in% c("diperjual", "memperjual"),
"n"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form=='diperjual',
"di-+per-_n_kan"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form=='memperjual',
"meN-+per-_n_kan"),
affix_morphind = replace(affix_morphind,
word_form=='diperjual', "di-+per-_kan"),
affix_morphind = replace(affix_morphind,
word_form=='memperjual', "meN-+per-_kan"),
morphind = replace(morphind, word_form=='berbasi', "ber+basi<a>_VSA"),
morphind = replace(morphind, word_form=='bergelar', "ber+gelar<n>_VSA"),
morphind = replace(morphind, word_form=='melinting', "meN+linting<n>_VSA"),
root_pos_morphind = ifelse(word_form=="berbasi", "a", root_pos_morphind),
root_morphind = ifelse(word_form=="berbasi", "basi", root_morphind),
root_pos_morphind = ifelse(word_form=="bergelar", "n", root_pos_morphind),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form == "berbasi",
"ber-_a_0"),
affix_morphind = replace(affix_morphind,
word_form == "berbasi",
"ber-_0"),
affix_morphind_wclass = replace(affix_morphind_wclass,
word_form == "bergelar",
"ber-_n_0"),
morphind = str_replace_all(morphind, "(?<=\\+)robek\\<v\\>", "robek<a>"),
morphind = replace(morphind, word_form=='merobek-robek', "meN+robek<a>_VPA"),
root_pos_morphind = ifelse(root_morphind == "robek", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "robek",
str_replace(affix_morphind_wclass, "_v_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)seberang\\<v\\>", "seberang<n>"),
root_pos_morphind = ifelse(root_morphind == "seberang", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "seberang",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)lisan\\<a\\>", "lisan<n>"),
root_pos_morphind = ifelse(root_morphind == "lisan", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "lisan",
str_replace(affix_morphind_wclass, "_a_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)lenting\\<a\\>", "lenting<v>"),
morphind = str_replace_all(morphind, "(?<=\\+)lengkung\\<n\\>", "lengkung<a>"),
root_pos_morphind = ifelse(root_morphind == "lengkung", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "lengkung",
str_replace(affix_morphind_wclass, "_n_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)jalan\\<v\\>", "jalan<n>"),
root_pos_morphind = ifelse(root_morphind == "jalan", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "jalan",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)teriak\\<v\\>", "teriak<n>"),
root_pos_morphind = ifelse(root_morphind == "teriak", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "teriak",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)cakap\\<v\\>", "cakap<n>"),
root_pos_morphind = ifelse(root_morphind == "cakap", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "cakap",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)gumam\\<v\\>", "gumam<n>"),
root_pos_morphind = ifelse(root_morphind == "gumam", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "gumam",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)sorak\\<v\\>", "sorak<n>"),
root_pos_morphind = ifelse(root_morphind == "sorak", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "sorak",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)tahan\\<v\\>", "tahan<a>"),
root_pos_morphind = ifelse(root_morphind == "tahan", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "tahan",
str_replace(affix_morphind_wclass, "_v_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)kisar\\<v\\>", "kisar<n>"),
root_pos_morphind = ifelse(root_morphind == "kisar", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "kisar",
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)padu\\<v\\>", "padu<a>"),
root_pos_morphind = ifelse(root_morphind == "padu", "a", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "padu",
str_replace(affix_morphind_wclass, "_v_", "_a_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "((?<=^ber\\+)tanggal\\<v\\>(?=_V)|(?<=^ter\\+)tanggal\\<v\\>(?=_V))", "tanggal<n>"),
root_pos_morphind = ifelse(word_form %in% c("tertanggal", "bertanggal"), "n", root_pos_morphind),
affix_morphind_wclass = ifelse(word_form %in% c("tertanggal", "bertanggal"),
str_replace(affix_morphind_wclass, "_v_", "_n_"),
affix_morphind_wclass),
morphind = str_replace_all(morphind, "(?<=\\+)bujang\\<f\\>", "bujang<n>"),
root_pos_morphind = ifelse(root_morphind == "bujang", "n", root_pos_morphind),
affix_morphind_wclass = ifelse(root_morphind == "bujang",
str_replace(affix_morphind_wclass, "_f_", "_n_"),
affix_morphind_wclass),
morphind = ifelse(word_form %in% c("berbikini", "bertopi", "berhelai", "berhelai-helai", "berselai", "berponi", "beruntai", "berderai", "berderailah", "berderai-derailah", "berderai-", "berderai-derai", "beresponsi", "bermigasi", "beranalogi"), str_replace_all(morphind, "(?<=^ber\\+)([^<]+)\\<.\\>\\+(i)", "\\1\\2<n>"), morphind),
root_pos_morphind = ifelse(word_form %in% c("berbikini", "bertopi", "berhelai", "berhelai-helai", "berselai", "berponi", "beruntai", "berderai", "berderailah", "berderai-derailah", "berderai-", "berderai-derai", "beresponsi", "bermigasi", "beranalogi"), "n", root_pos_morphind),
affix_morphind_wclass = ifelse(word_form %in% c("berbikini", "bertopi", "berhelai", "berhelai-helai", "berselai", "berponi", "beruntai", "berderai", "berderailah", "berderai-derailah", "berderai-", "berderai-derai", "beresponsi", "bermigasi", "beranalogi"),
str_replace(affix_morphind_wclass, "_[a-z]_i$", "_n_0"),
affix_morphind_wclass),
affix_morphind = ifelse(word_form %in% c("berbikini", "bertopi", "berhelai", "berhelai-helai", "berselai", "berponi", "beruntai", "berderai", "berderailah", "berderai-derailah", "berderai-", "berderai-derai", "beresponsi", "bermigasi", "beranalogi"),
str_replace(affix_morphind, "_i$", "_0"),
affix_morphind),
root_morphind = ifelse(word_form %in% c("berbikini", "bertopi", "berhelai", "berhelai-helai", "berselai", "berponi", "beruntai", "berderai", "berderailah", "berderai-derailah", "berderai-", "berderai-derai", "beresponsi", "bermigasi", "beranalogi"), str_extract(morphind, "([^<+]+?)(?=<)"), root_morphind))
more_editing <- read_tsv('more_editing_database.txt') %>%
filter(dbase=='me')
verbs <- verbs %>%
left_join(more_editing) %>%
mutate(morphind = ifelse(word_form %in% more_editing$word_form & suff_morphind=="0" & str_detect(morphind, "X--"),
paste("meN+", ROOT2, "<", root_pos_morphind, ">_VSA", sep = ""),
morphind),
morphind = ifelse(word_form %in% more_editing$word_form & suff_morphind!="0" & str_detect(morphind, "X--"),
paste("meN+", ROOT2, "<", root_pos_morphind, ">+", suff_morphind, "_VSA", sep = ""),
morphind),
root_morphind = ifelse(word_form %in% more_editing$word_form, ROOT2, root_morphind)) %>%
select(-ROOT2)