-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrenderSideBySide.py
executable file
·253 lines (227 loc) · 9.5 KB
/
renderSideBySide.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/python
"""
CS276 Programming Assignment 3: Ranking
Given two experiment outputs, render a collection of webpages that compare the
two experiment results. The rendered pages will be in the following hierarchies:
- index.html: an index page that shows NDCG differences.
- queries/qn.html: a collection of query pages that shows detailed comparison on each query.
The experiment outputs should be generated by using the Java code in the skeleton code,
otherwise we may not be able to parse the format correctly.
Usage: render.py [-h] exp1_name exp1_file exp2_name exp2_file
Args:
exp1_name A name for experiment 1
exp1_file Experiment 1 output
exp2_name A name for experiment 2
exp2_file Experiment 2 output
Note: This script is relatively new, and was tested for macOS only.
If you find it to be buggy, please let us know on Piazza.
"""
import os
import sys
import shutil
import argparse
import math
ROOT_DIR = './sideBySide/'
INDEX_PAGE = 'index.html'
QUERY_DIR = 'queries/'
class URL(object):
def __init__(self, url, rating, title, debugstr):
self.url = url
self.rating = rating
self.title = title
self.debugstr = debugstr
self.clean_url = url.split('://')[1]
def render_url_info(self, idx=None):
info = '<td>\n'
if idx is None:
info += ' Page: <a href="%s">%s</a> <br>' % (self.url, self.title)
else:
info += ' P%d: <a href="%s">%s</a> <br>' % (idx, self.url, self.title)
info += ' <small style="color:green">%s</small> <br>' % self.clean_url
info += ' Rating: <b>%g</b> <br>' % self.rating
info += ' Debug: %s <br>' % self.debugstr
info += '</td>'
return info
def sign(v):
return 1 if v >= 0 else -1
def read_experiment_file(fname):
""" Read in experiment file.
"""
queries = []
query2ndcg = dict()
query2urls = dict()
def process_cache(cache):
assert(cache[0].startswith('query:'))
assert(cache[1].startswith('ndcg:'))
q = cache[0][7:]
ndcg = float(cache[1][6:])
urls = []
assert(len(cache[2:]) % 4 == 0)
num_urls = len(cache[2:]) // 4
for i in range(num_urls):
assert(cache[i*4+2].startswith('url:'))
assert(cache[i*4+3].startswith('rating:'))
assert(cache[i*4+4].startswith('title:'))
assert(cache[i*4+5].startswith('debug:'))
u = cache[i*4+2][5:]
r = float(cache[i*4+3][8:])
t = cache[i*4+4][7:]
d = cache[i*4+5][7:]
urls.append(URL(u, r, t, d))
queries.append(q)
query2ndcg[q] = ndcg
query2urls[q] = urls
with open(fname) as infile:
cache = []
for line in infile:
line = line.strip()
if len(line) == 0:
continue
if line.startswith('query:'):
if len(cache) != 0:
process_cache(cache)
cache = []
cache.append(line)
if len(cache) != 0:
process_cache(cache)
return queries, query2ndcg, query2urls
def get_color_str(num):
if num > 0:
return "green"
elif num == 0:
return "black"
else:
return "red"
def get_link(url, title):
return '<a href="%s">%s</a>' % (url, title)
def write_index_page(outfile, queries, ndcg1, ndcg2, name1, name2):
""" Write index HTML to outfile.
"""
mean_ndcg1 = 1.0*sum(ndcg1) / len(ndcg1)
mean_ndcg2 = 1.0*sum(ndcg2) / len(ndcg2)
diff = mean_ndcg2 - mean_ndcg1
INDEX_HEADER = '<!DOCTYPE html>\n<html>\n<head>\n<style>\n\
table, th, td { border: 1px solid black; border-collapse: collapse; }\n\
th, td { padding: 5px }\n</style>\n</head>\n'
print >> outfile, INDEX_HEADER
print >> outfile, '<body>'
print >> outfile, '<p>Experiment 1: %s <br>' % name1
print >> outfile, 'Experiment 2: %s</p>' % name2
print >> outfile, '<p>Average NDCG diff [%s - %s]: <b style="color:%s">%g</b></p>' % \
(name2, name1, get_color_str(diff), diff)
print >> outfile, '<p>Queries are sorted by NDCG diff.</p>'
print >> outfile, '<table>'
TABLE_HEADER = '<tr>\n<th></th>\n<th>Query</th>\n<th>NDCG diff</th>\n</tr>'
print >> outfile, TABLE_HEADER
for i, (q, n1, n2) in enumerate(zip(queries, ndcg1, ndcg2)):
idx = i+1
print >> outfile, ' <tr>'
print >> outfile, ' <td>%d</td>' % idx
link = QUERY_DIR + 'q%d.html' % idx
print >> outfile, ' <td>%s</td>' % get_link(link, q)
diff = n2 - n1
print >> outfile, ' <td><b style="color:%s">%g</b></td>' % (get_color_str(diff), diff)
print >> outfile, ' </tr>'
print >> outfile, "</table>"
print >> outfile, "</body>"
print >> outfile, "</html>"
def write_query_page(outfile, q, n1, n2, urls1, urls2, name1, name2):
""" Write a query HTML to outfile.
"""
ndcg_diff = n2 - n1
QUERY_HEADER = '<!DOCTYPE html>\n<html>\n<head>\n<style>\n\
table, th, td { border: 1px solid black; border-collapse: collapse;}\n\
th, td {padding: 10px}\n\
.box { float: left; width: 20px; height: 20px; margin: 5px; border: 1px solid rgba(0, 0, 0, .2);}\n\
</style>\n</head>'
print >> outfile, QUERY_HEADER
print >> outfile, '<body>'
print >> outfile, '<p><b>Query: %s</b></p>' % q
print >> outfile, '<p>NDCG diff [%s - %s]: <b style="color:%s">%g</b></p>' % \
(name2, name1, get_color_str(ndcg_diff), ndcg_diff)
print >> outfile, '<table style="width:100%">'
print >> outfile, '<tr>\n<th>Experiment 1: %s <br> NDCG = %g </th>\n<th>Experiment 2: %s <br> NDCG = %g </th>\n</tr>' % \
(name1, n1, name2, n2)
url2id = dict()
for i,u in enumerate(urls1):
url2id[u.url] = i+1
for u1, u2 in zip(urls1, urls2):
print >> outfile, '<tr>'
print >> outfile, u1.render_url_info(idx=url2id[u1.url])
print >> outfile, u2.render_url_info(idx=url2id[u2.url])
print >> outfile, '</tr>'
print >> outfile, '</table>'
print >> outfile, '</body>'
print >> outfile, '</html>'
def main(args):
# check output dir, and ask user if delete if already exists
if os.path.exists(ROOT_DIR):
inp = raw_input("> Directory %s already exists. Delete and generate new pages (y/n)? " % ROOT_DIR)
if inp[0].lower() == 'y':
shutil.rmtree(ROOT_DIR)
else:
print "> Do not delete old directory. Program exit."
exit()
os.makedirs(ROOT_DIR)
# digest the two ouput files
if not os.path.exists(args.exp1_file) or not os.path.exists(args.exp2_file):
raise Exception("At lease of the input files does not exist. Please check!")
print "> Reading input files..."
try:
queries1, query2ndcg1, query2urls1 = read_experiment_file(args.exp1_file)
queries2, query2ndcg2, query2urls2 = read_experiment_file(args.exp2_file)
except AssertionError as e:
print "The program encounter errors while parsing the input files."
print "Please check the format of your input files.\nProgram Exits."
exit(-1)
print "Done."
# compare queries to check for errors
print "> Checking if the input files are valid..."
if set(queries1) != set(queries2):
raise Exception('Experiment 1 and experiment 2 contain different queries. ' +
'Pages cannot be rendered. Please check your files.')
queries = queries1
ndcg1 = [query2ndcg1[q] for q in queries]
ndcg2 = [query2ndcg2[q] for q in queries]
urls1 = [query2urls1[q] for q in queries]
urls2 = [query2urls2[q] for q in queries]
ndcg_diffs = [n2 - n1 for n1,n2 in zip(ndcg1, ndcg2)]
# sort queries based on ndcg diffs
zipped = zip(ndcg_diffs, queries, ndcg1, ndcg2, urls1, urls2)
zipped.sort()
_, queries, ndcg1, ndcg2, urls1, urls2 = zip(*zipped)
for u1, u2, q in zip(urls1, urls2, queries):
u1_set = set([u.url for u in u1])
u2_set = set([u.url for u in u2])
if u1_set != u2_set:
raise Exception('Pages cannot be rendered. ' +
'Experiment 1 and experiment 2 contain different URLs for query: ' + q)
print "Done."
# generate index file
print "> Generating pages..."
with open(ROOT_DIR + INDEX_PAGE, 'w') as outfile:
write_index_page(outfile, queries, ndcg1, ndcg2, args.exp1_name, args.exp2_name)
# generate all query files
query_dir = ROOT_DIR + QUERY_DIR
if os.path.exists(query_dir):
shutil.rmtree(query_dir)
print " > Query directory exists. Deleted."
os.makedirs(query_dir)
for i, (q, n1, n2, u1, u2) in enumerate(zip(queries, ndcg1, ndcg2, urls1, urls2)):
idx = i+1
fname = ROOT_DIR + QUERY_DIR + 'q%d.html' % idx
with open(fname, 'w') as outfile:
write_query_page(outfile, q, n1, n2, u1, u2, args.exp1_name, args.exp2_name)
print "All done."
# open browser, only works for macOS
if sys.platform == 'darwin':
os.system('open %s' % (ROOT_DIR + INDEX_PAGE))
print "Please open the index page at: %s" % (ROOT_DIR + INDEX_PAGE)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Render the side by side output, given two experiment outputs.')
parser.add_argument('exp1_name', help='A name for experiment 1')
parser.add_argument('exp1_file', help='Experiment 1 output')
parser.add_argument('exp2_name', help='A name for experiment 2')
parser.add_argument('exp2_file', help='Experiment 2 output')
args = parser.parse_args()
main(args)