forked from priyom/dyatlov
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkiwisdr_com-parse
executable file
·82 lines (69 loc) · 2.25 KB
/
kiwisdr_com-parse
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#! /usr/bin/perl
# KiwiSDR.com receiver list parser for dyatlov map maker
# Copyright 2017, 2020 Pierre Ynard
# Licensed under GPLv3+
#
# This script reads the http://kiwisdr.com/public/ listing HTML from the
# standard input, and writes the corresponding extracted javascript data
# to the standard output.
#
# The output is not JSON, but a ready-to-execute javascript file; so
# data needs to be all the more carefully validated to prevent malicious
# injection.
use strict;
use warnings;
sub json_escape {
my $field = shift;
$field =~ s/["\r\n\\]/\\$&/g;
return $field;
}
print "// KiwiSDR.com receiver list for dyatlov map maker\n// Automatically generated from http://kiwisdr.com/public/\n";
# Extract source data timestamp
my $limit = -1;
while (<STDIN>) {
# This is where the timestamp is currently found
if (/<div class='id-text-container'>/) {
if (defined($_ = <STDIN>) && /(.*)/) {
print "// KiwiSDR.com data timestamp: $1\n";
} else {
print STDERR "Data timestamp extraction failed.\n";
}
last;
}
# Safety nets: abort if going too far without finding the timestamp.
# This tag means that the receiver listing has already been reached.
elsif (/<div class='cl-entry'>/ || $limit == 0) {
print STDERR "Data timestamp extraction failed.\n";
last;
}
# Safety nets: abort if the timestamp can't be found within
# so many lines into <body>
if ($limit < 0 && /<body[ >]/) {
$limit = 20;
} elsif ($limit > 0) {
$limit--;
}
}
print "// File generation timestamp: ".gmtime()."\n\nvar kiwisdr_com =\n[\n";
while (<STDIN>) {
# This marks the beginning of a section corresponding to one receiver
if (/<div class='cl-info'>/) {
print "\t{\n";
# Parse all the data fields of the receiver,
# available inside HTML comments
while (defined($_ = <STDIN>) && /<!-- ([\w]+)=(.*) -->/a) {
print "\t\t\"$1\":\"".json_escape($2)."\",\n";
}
# Parse the URL of the receiver, available only as HTML
if (defined($_ = <STDIN>) && />([^<]+)<\/a>/) {
# It is unknown whether URLs are properly
# XML-encoded, or not; if they were, XML entities
# would need to be decoded here.
print "\t\t\"url\":\"".json_escape($1)."\"\n";
} else {
print STDERR "URL extraction failed: ".($_ // "truncated input\n");
}
print "\t},\n";
}
}
print "]\n;\n";