This repository has been archived by the owner on May 26, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.js
130 lines (118 loc) · 4.92 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
const fs = require('fs')
const config = JSON.parse(fs.readFileSync('config.json', 'utf8'))
const puppeteer = require('puppeteer')
const request = require('request')
const hoursBetweenRuns = 1
const temperatureMin = 450
const limit = 3
const dealsSentFile = 'deals-sent.log'
const testMode = false
const verbose = false
const useInterval = false // TODO : this is not the good way and it consume memory
let dealsSent = []
const log = (str) => console.log(time() + ' : ' + str)
const time = (addHours = 0) => {
const date = new Date()
date.setHours(date.getHours() + addHours)
const hours = date.getHours() + ''
const minutes = date.getMinutes() + ''
return (hours.length === 1 ? '0' : '') + hours + 'h' + (minutes.length === 1 ? '0' : '') + minutes
}
try {
dealsSent = fs.readFileSync(dealsSentFile, 'utf8').trim().split('\n')
log('Found ' + dealsSent.length + ' deals already sent in ' + dealsSentFile)
} catch (e) {
fs.writeFileSync(dealsSentFile, '', (err) => log(err ? err : 'created empty ' + dealsSentFile))
}
const postDeal = (deal) => {
if (testMode) return
const message = (deal.price ? deal.price + ' ' : '') + '@ ' + deal.merchant + ' : ' + deal.url
const options = { uri: config.iftttWebhook, method: 'POST', json: { value1: message } }
request(options, (error) => {
if (error) { log('postDeal error : ' + error) } else if (!testMode) { // all went good and not in test mode
dealsSent.push(deal.id) // persist to in memory list
fs.appendFileSync(dealsSentFile, deal.id + '\n') // and on file system
}
})
}
const logNextSrap = () => log('Next execution planned in ' + hoursBetweenRuns + ' hours at ' + time(hoursBetweenRuns))
const scroll = (page) => page.evaluate(() => new Promise(resolve => {
window.scrollTo(0, document.body.scrollHeight)
setTimeout(() => resolve(), 500)
}))
const scrape = async () => {
log('Start deals scrapping...')
const browser = await puppeteer.launch({ headless: true })
const page = await browser.newPage()
await page.goto('https://www.dealabs.com')
await page.waitFor(800)
await scroll(page)
await scroll(page)
await scroll(page)
let deals = await page.evaluate(() => {
let elements = document.querySelectorAll('section.tGrid article.thread:not(.thread--expired)')
let length = elements.length
let deals = []
for (let i = 0; i < length; i++) {
let element = elements[i]
let titleEl = element.querySelector('.thread-title a')
let title = (titleEl ? titleEl.textContent.trim() : null || 'No title found').split(' ').splice(0, 7).join(' ') + '...'
let url = titleEl ? titleEl.href : null
let id = url ? url.split('-').reverse()[0] : null
let temperatureEl = element.querySelector('.vote-temp')
let temperature = temperatureEl ? parseInt(temperatureEl.textContent.trim()) : null
let merchantEl = element.querySelector('.cept-merchant-name')
let merchant = merchantEl ? merchantEl.textContent.trim() : null
let priceEl = element.querySelector('.thread-price')
let price = priceEl ? priceEl.textContent.trim() : null
if (title && id && url && temperature && merchant) {
deals.push({ id: id, title: title, url: url, temperature: temperature, merchant: merchant, price: price })
}
}
return deals
})
log(deals.length + ' deals found')
// filter by temperature
deals = deals.filter(deal => {
if (verbose) {
log(deal.temperature + '° | ' + deal.title)
}
return deal.temperature && (deal.temperature > temperatureMin)
})
log(deals.length + ' deals above ' + temperatureMin + '°')
// filter sent
deals = deals.filter(deal => {
const dealAlreadySent = (dealsSent.indexOf(deal.id) !== -1)
if (verbose) {
log((dealAlreadySent ? 'Avoid re-sending deal' : 'Brand new deal') + ' ' + deal.id + ' "' + deal.title + '" ' + deal.price + ' @ ' + deal.merchant)
}
return !dealAlreadySent
})
log(deals.length + ' deals not sent yet')
// filter if limit specified
if (limit && limit < deals.length) {
deals = deals.splice(0, limit)
log(deals.length + ' deals after limit')
}
log(deals.length + ' deals will be sent to IFTTT')
browser.close()
// send deals at 1 second interval
deals.forEach((deal, index) => {
setTimeout(() => postDeal(deal), index * 1000)
if (index === (deals.length - 1) && useInterval) {
// last iteration
setTimeout(logNextSrap, (index * 1000) + 1000)
}
})
// if no deals found
if (!deals.length && useInterval) {
// still display next planned scrap
logNextSrap()
}
}
// start now
scrape()
if (useInterval) {
// every X hours
setInterval(scrape, 1000 * 60 * 60 * hoursBetweenRuns)
}