forked from verisign/tlsa-survey
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dnssec_tlsa_zone_num.py
239 lines (216 loc) · 9.19 KB
/
dnssec_tlsa_zone_num.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#!/usr/bin/env python
# Copyright (c) 2015, Verisign, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of tlsa-survey nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sqlite3, getopt, datetime, os, sys, calendar, subprocess, bz2
from datetime import date, timedelta
def print_err(s):
sys.stderr.write(s)
def run_bash(c):
p = subprocess.Popen(c.split(), stderr=subprocess.PIPE, stdout=subprocess.PIPE)
output, err = p.communicate()
return output
def validate(d):
try:
datetime.datetime.strptime(d, '%Y-%m-%d')
except ValueError:
return False
return True
def get_date(dy, dm, dd, num_days):
start_date = datetime.date(dy, dm, dd)
rv = start_date + timedelta(days = num_days)
return rv
def rm_last_dot(s):
if len(s) > 1 and s.endswith('.'):
return s[:-1]
return s
def get_zone(n):
n = rm_last_dot(n)
w = n.split('.')
if len(w) == 0:
sys.exit('[error] cannot get zone for [%s]' % n)
if len(w) == 1:
return n
if len(w) > 1:
return w[-2] + '.' + w[-1]
def get_tld(n):
n = rm_last_dot(n)
w = n.split('.')
if len(w) == 0:
sys.exit('[error] cannot get tld for [%s]' % n)
if len(w) == 1:
return n
return w[-1]
def get_line_num(fn):
if not os.path.isfile(fn):
sys.exit('[error] file[%s] does not exist, abort!' % fn)
input_file = None
if fn.endswith('.bz2'):
input_file = bz2.BZ2File(fn, 'rb')
else:
input_file = open(fn, 'r')
line_num = 0
try:
for l in input_file:
l = l.strip()
if len(l) == 0 or l[0] == '#' or l == '\n':
continue
line_num += 1
finally:
input_file.close()
return line_num
def usage(comm):
global plot_type_list
print 'usage: %s [-hiodna]' % comm
print '\t -h print this message'
print '\t -i INPUT FILE input file, only for -a insert'
print '\t -p INPUT PATH input path'
print '\t -d YYYY-MM-DD select Date'
print '\t -n NUMBER number of days, must be a positive integer'
print '\t -z SELECT ZONE zone name, used as input prefix'
print '\t process all zone in db if empty'
def insert_zone_db(in_path, in_fn, date_text, num_days, zone):
sql_stat = ''
if not os.path.isfile(in_fn):
print_err('[warn] create %s\n' % in_fn)
new_sqldb = sqlite3.connect(in_fn)
new_sqldb_cur = new_sqldb.cursor()
sql_stat = "CREATE TABLE if not exists zone_num (zone text, year int, month int, day int, tlsa_name int, tlsa_zone int, dnssec_zone int)"
new_sqldb_cur.execute(sql_stat)
new_sqldb.commit()
new_sqldb_cur.close()
new_sqldb.close()
sqldb = sqlite3.connect(in_fn)
sqldb_cur = sqldb.cursor()
dy_str, dm_str, dd_str = date_text.split('-')
dy = int(dy_str)
dm = int(dm_str)
dd = int(dd_str)
total_name = 0
total_dnssec = 0
top_zone = {}
level2_zone = {}
tmp_lev2_zone = ''
tmp_top_zone = ''
for x in range(0, 1 + num_days):
select_date = str(get_date(dy, dm, dd, x))
select_date_1 = str(get_date(dy, dm, dd, x - 1))
dy_str_tmp, dm_str_tmp, dd_str_tmp = select_date.split('-')
dy_str_tmp_1, dm_str_tmp_1, dd_str_tmp_1 = select_date_1.split('-')
conn = sqlite3.connect(in_path + '/data/stats.%s.db' % select_date)
c = conn.cursor()
base_select = 'select distinct name, port, zone from tlsa_rdata where year=%s and month=%s and day=%s' % (dy_str_tmp,
dm_str_tmp,
dd_str_tmp)
if zone == '':
sql_stat = 'select distinct name from (%s)' % base_select
else:
sql_stat = 'select distinct name from (%s) where zone=\'%s\'' % (base_select, zone)
level2_zone.clear()
top_zone.clear()
c.execute(sql_stat)
for row in c:
n = row[0]
tmp_lev2_zone = get_zone(n)
tmp_top_zone = get_tld(n)
if not tmp_lev2_zone in level2_zone: #check uniq level 2 zone
level2_zone[tmp_lev2_zone] = 1
if not tmp_top_zone in top_zone:
top_zone[tmp_top_zone] = 0
top_zone[tmp_top_zone] += 1 # count the total number of uniq TLSA enabled level 2 zone, based on gTLD
for z, zc in top_zone.iteritems():
sql_stat = 'select count(*) from (select distinct name, port from (%s) where zone=\'%s\')' % (base_select,z)
c.execute(sql_stat)
total_name = -1
total_name = c.fetchone()[0]
ds_name_file = in_path + '/input/' + z + '-signed-zones-' + dy_str_tmp_1 + dm_str_tmp_1 + dd_str_tmp_1 + '.bz2'
total_dnssec = -1 #-1 means files does not exist
if os.path.isfile(ds_name_file):
total_dnssec = get_line_num(ds_name_file)
print_err('%s: zone[%s] tlsa_name[%d] tlsa_zone[%d] dnssec_zone[%d]\n' % (select_date_1,
z,
total_name,
zc,
total_dnssec))
sqldb_cur.execute("INSERT INTO zone_num VALUES (\'%s\', %s, %s, %s, %d, %d, %d)" % (z,
dy_str_tmp_1,
dm_str_tmp_1,
dd_str_tmp_1,
total_name,
zc,
total_dnssec))
sqldb.commit()
c.close()
conn.close()
sqldb_cur.close()
sqldb.close()
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'hi:p:d:n:z:H:', ['help', 'input=', 'path=', 'date=', 'numdays=', 'zone='])
except getopt.GetoptError as err:
print_err(str(err) + '\n')
usage(sys.argv[0])
sys.exit(1)
in_path = ''
date_text = ''
in_fn = ''
action = ''
zone = ''
num_days = -1
for o, a in opts:
if o in ('-h', '--help'):
usage(sys.argv[0])
sys.exit()
elif o in ('-d', '--date'):
date_text = a
elif o in ('-i', '--input'):
in_fn = a
elif o in ('-p', '--path'):
in_path = a
elif o in ('-z', '--zone'):
zone = a
elif o in ('-n', '--numdays'):
num_days = int(a)
if date_text == '':
usage(sys.argv[0])
sys.exit('[error] date must be provided, abort!')
elif not validate(date_text):
sys.exit('[error] date is not valid, abort!')
if num_days < 0:
usage(sys.argv[0])
sys.exit('[error] num_days[%d] < 0, abort!' % num_days)
if in_path == '':
usage(sys.argv[0])
sys.exit('[error] input path is empty, abort!')
elif not os.path.isdir(in_path):
sys.exit('[error] input path does not exist, abort!')
if in_fn == '':
usage(sys.argv[0])
sys.exit('[error] input file is empty, abort!')
insert_zone_db(in_path, in_fn, date_text, num_days, zone)
if __name__ == "__main__":
main()