forked from koskenni/pytwolc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwolcomp.py
187 lines (174 loc) · 6.36 KB
/
twolcomp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import sys, re, hfst
import twbt, twex, twrl
def apply_rule(psymlist, dicrule):
# print(rule_name) ##
state = 0
state_seq = []
final, dtrans = drule[state]
for psym in psymlist:
if psym in dtrans:
state = dtrans[psym]
final, dtrans = drule[state]
state_seq.append("{} -> {}{}".
format(psym, state,(':' if final else '.')))
else:
state_seq.append("{} -> ??".
format(psym))
return(False, state_seq)
if not final:
return(False, state_seq)
else:
return(True, state_seq)
def clean_comb_sym(sym):
insym, outsym = sym.split('^')
if insym == outsym:
return insym
else:
return insym + ':' + outsym
def print_raw_paths(paths):
for path in paths:
weight, sym_pairs = path
sym_list = [(insym if insym == outsym else insym + ":" + outsym)
for insym, outsym in sym_pairs]
print(' '.join(sym_list))
return
import argparse
arpar = argparse.ArgumentParser("python3 twolcomp.py")
arpar.add_argument("-e", "--examples", help="name of the examples fst",
default="examples.fst")
arpar.add_argument("-r", "--rules", help="name of the rule file",
default="test.rules")
arpar.add_argument("-l", "--lost",
help="file to which write the examples not accepted by all rules",
default="")
arpar.add_argument("-w", "--wrong",
help="file to which write the wrong strings accepted by all rules as a fst",
default="")
arpar.add_argument("-t", "--thorough",
help="test each rule separately, values: 0, 1 or 2",
type=int, default=0)
arpar.add_argument("-v", "--verbosity",
help="level of diagnostic output",
type=int, default=0)
arpar.add_argument("-d", "--debug",
help="level of PLY debugging output",
type=int, default=0)
arpar.add_argument("-p", "--parser",
help="which parser to use: ply or tatsu", default="ply")
args = arpar.parse_args()
print('Reading examples from:', args.examples)
twex.read_fst(args.examples)
examples_fsa = twex.EXAMPLES.copy()
examples_fsa = hfst.fst_to_fsa(examples_fsa, separator="^")
examples_up_fsa = twex.EXAMPLES.copy()
examples_up_fsa.input_project()
twrl.init(args.verbosity)
if args.parser == "ply":
import plytw
plytw.init(args.verbosity)
elif args.parser == "tatsu":
import twolcsyntax
twolcsyntax.init()
else:
print("--parser must be either 'tatsu' or 'ply', not", args.parser)
if args.lost or args.wrong:
all_rules_fst_lst = []
rule_file = open(args.rules, 'r')
for line in rule_file:
line = line.strip()
if line == "STOP":
break
if line == "" or line[0] == '!':
continue
if args.parser == "ply":
result = plytw.parse_rule(line)
elif args.parser == "tatsu":
result = twolcsyntax.parse_rule(line)
if not result:
print("ERROR:", line)
continue
op = result[0]
if op == "=":
op, id, expr, title = result
print(title)
twrl.define(id, expr)
continue
op, x_expr, ctx_expr_list, title = result
if args.thorough > 0:
print("\n--------------------\n")
print(title)
#print(result) ##
if op == "=>":
R, SEL, MIXe = twrl.rightarrow(title, x_expr, *ctx_expr_list)
elif op == "<=":
R, SEL, MIXe = twrl.leftarrow(title, x_expr, *ctx_expr_list)
elif op == "<=>":
R, SEL, MIXe = twrl.doublearrow(title, x_expr, *ctx_expr_list)
elif op == "/<=":
R, SEL, MIXe = twrl.center_exclusion(title, x_expr, *ctx_expr_list)
else:
print("Error: not a valid type of a rule", op)
continue
if args.lost or args.wrong:
all_rules_fst_lst.append(R)
if args.thorough > 0:
SEL.intersect(twex.EXAMPLES)
# SEL.n_best(5)
SEL.minimize()
if args.verbosity > 1:
paths = SEL.extract_paths(output='raw')
print_raw_paths(paths[0:20])
passed_pos_examples_fst = SEL.copy()
passed_pos_examples_fst.intersect(R)
if args.thorough > 0:
if passed_pos_examples_fst.compare(SEL):
print("All positive examples accepted")
else:
lost_examples_fst = SEL.copy()
lost_examples_fst.minus(passed_pos_examples_fst)
lost_examples_fst.minimize()
print("** Some positive examples were rejected:")
lost_paths = lost_examples_fst.extract_paths(output='raw')
print_raw_paths(lost_paths)
if args.thorough > 1:
neg_examples_fsa = examples_fsa.copy()
neg_examples_fsa.compose(MIXe)
neg_examples_fsa.output_project()
neg_examples_fst = hfst.fsa_to_fst(neg_examples_fsa, separator="^")
neg_examples_fst.minus(twex.EXAMPLES)
NG = examples_up_fsa.copy()
NG.compose(neg_examples_fst)
npaths = NG.extract_paths(output='raw')
print_raw_paths(npaths)
passed_neg_examples_fst = NG.copy()
passed_neg_examples_fst.intersect(R)
if args.verbosity > 0:
if passed_neg_examples_fst.compare(hfst.empty_fst()):
print("All negative examples rejected")
else:
print("Some negative examples accepted:")
npaths = passed_neg_examples_fst.extract_paths(output='raw')
print_raw_paths(npaths)
if args.lost or args.wrong:
RESU = examples_up_fsa.copy()
print(RESU.number_of_arcs(), "arcs in RESU")
RESU.compose_intersect(tuple(all_rules_fst_lst))
RESU.minimize()
if args.lost:
lost_positive_examples_fst = twex.EXAMPLES.copy()
lost_positive_examples_fst.minus(RESU)
lost_positive_examples_fst.minimize()
lost_stream = hfst.HfstOutputStream(filename=args.lost)
lost_stream.write(lost_positive_examples_fst)
lost_stream.flush()
lost_stream.close()
print("wrote lost examples to", args.lost)
if args.wrong:
WRONG = RESU.copy()
WRONG.subtract(twex.EXAMPLES)
WRONG.minimize()
wrong_stream = hfst.HfstOutputStream(filename=args.wrong)
wrong_stream.write(WRONG)
wrong_stream.flush()
wrong_stream.close()
print("wrote wrongly accepted examples to", args.wrong)