-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFragility.pm
123 lines (110 loc) · 2.71 KB
/
Fragility.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package BlackCurtain::Fragility;
use 5.10.0;
use Carp;
use base qw(Clone);
use vars qw($AUTOLOAD);
use Scalar::Util qw(blessed);
use Encode;
use LWP::UserAgent;
use Crypt::SSLeay;
use HTTP::Headers;
use HTTP::Request;
use HTTP::Response;
use HTTP::Cookies;
use HTML::TreeBuilder;
use HTML::Form;
use XML::Simple qw();
use YAML::XS qw();
use JSON::XS qw();
AUTOLOAD
{
my($s,@a) = @_;
if(eval(qq(require $AUTOLOAD))){
return($AUTOLOAD->new(@a));
}else{
Carp::croak($@);
}
}
sub new
{
my($s,%a) = @_;
return(($s = bless({},$s))->clean(%a));
}
sub clean
{
my($s,%a) = @_;
$s->{a} = LWP::UserAgent->new(
#agent =>
#from =>
#conn_cache =>
cookie_jar =>HTTP::Cookies->new(),
default_headers =>HTTP::Headers->new(
User_Agent =>"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0",
Accept_Encoding =>"gzip, deflate",
Accept_Language =>"ja_JP",
),
#local_address =>
#ssl_opts =>
#max_size =>
#max_redirect =>
#parse_head =>
#protocols_allowed =>
#protocols_forbidden =>
requests_redirectable =>[qw(GET HEAD POST)],
timeout =>30,
%a,
);
$s->{a}->add_handler(response_done =>sub(){pop();pop()->{def_headers}->header(Referer =>pop()->request()->uri())});
return($s);
}
sub spider
{
my($s,$q,$y,@a) = @_;
if(($s->{s} = $s->{a}->request(blessed($q) ? $q : HTTP::Request->new(ref($q) eq "HASH" ? %{$q} : (GET =>$q))))->is_success()){
$s->{h} = $s->{s}->header()->as_string();
$s->{b} = $s->{s}->decoded_content(default_charset =>"UTF-8");
given($y // $s->{s}->header("Content-Type")){
when(/^(?:text\/)?html(?:;.+?)?$/io){
$s->{d} = HTML::TreeBuilder->new_from_content($s->{b});
}
when(/^(?:application\/)?xml(?:;.+?)?$/io){
$s->{d} = XML::Simple::XMLin($s->{b});
}
when(/^(?:application\/)?json(?:;.+?)?$/io){
$s->{d} = JSON::XS::decode_json($s->{b});
}
when(/^(?:application\/(?:x-))?yaml(?:;.+?)?$/io){
$s->{d} = YAML::XS::Load($s->{b});
}
}
return($s->{s}->code(),$s->{b},$s->{d},$s->seek(@a));
}else{
return($s->{s}->code());
}
}
sub seek
{
my($s,@a) = @_;
my @r;
while($#a != -1){
my($op,$var) = (shift(@a),shift(@a));
given($op){
when("regx"){
push(@r,$s->{b} =~ /$var/i ? defined($1) ? $1 : 1 : 0);
}
when(ref() eq "ARRAY" && $_->[0] eq "regx"){
push(@r,[$s->{b} =~ /$var/gi]);
}
when("form"){
$s->{form} //= [HTML::Form->parse($s->{b},$s->{s}->request()->uri())];
push(@r,(grep{grep(/\Q$var\E/i,@{$_->{attr}}{qw(id class name)})}@{$s->{form}})[0]);
}
when(ref() eq "ARRAY" && $_->[0] eq "form"){
$s->{form} //= [HTML::Form->parse($s->{b},$s->{s}->request()->uri())];
push(@r,[grep{grep(/\Q$var\E/i,@{$_->{attr}}{qw(id class name)})}@{$s->{form}}]);
}
}
}
return(@r);
}
1;