forked from philippta/flyscrape
-
Notifications
You must be signed in to change notification settings - Fork 0
/
js_test.go
102 lines (85 loc) · 2.13 KB
/
js_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package flyscrape_test
import (
"encoding/json"
"testing"
"github.com/philippta/flyscrape"
"github.com/stretchr/testify/require"
)
var html = `
<html>
<body>
<main>
<h1>headline</h1>
<p>paragraph</p>
</main>
</body>
</html>`
var script = `
export const config = {
url: "https://localhost/",
}
export default function({ doc, url }) {
return {
headline: doc.find("h1").text(),
body: doc.find("p").text(),
url: url,
}
}
`
func TestJSScrape(t *testing.T) {
cfg, run, err := flyscrape.Compile(script)
require.NoError(t, err)
require.NotNil(t, cfg)
require.NotNil(t, run)
result, err := run(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
m, ok := result.(map[string]any)
require.True(t, ok)
require.Equal(t, "headline", m["headline"])
require.Equal(t, "paragraph", m["body"])
require.Equal(t, "http://localhost/", m["url"])
}
func TestJSCompileError(t *testing.T) {
cfg, run, err := flyscrape.Compile("import foo;")
require.Error(t, err)
require.Empty(t, cfg)
require.Nil(t, run)
var terr flyscrape.TransformError
require.ErrorAs(t, err, &terr)
require.Equal(t, terr, flyscrape.TransformError{
Line: 1,
Column: 10,
Text: `Expected "from" but found ";"`,
})
}
func TestJSConfig(t *testing.T) {
js := `
export const config = {
url: 'http://localhost/',
depth: 5,
allowedDomains: ['example.com'],
}
export default function() {}
`
rawCfg, _, err := flyscrape.Compile(js)
require.NoError(t, err)
type config struct {
URL string `json:"url"`
Depth int `json:"depth"`
AllowedDomains []string `json:"allowedDomains"`
}
var cfg config
err = json.Unmarshal(rawCfg, &cfg)
require.NoError(t, err)
require.Equal(t, config{
URL: "http://localhost/",
Depth: 5,
AllowedDomains: []string{"example.com"},
}, cfg)
}