-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathindex.js
60 lines (51 loc) · 1.61 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
// External dependencies
const axios = require('axios')
const cheerio = require('cheerio')
const fs = require('fs')
const chalk = require('chalk')
const url = 'http://listverse.com/'
const outputFile = 'data.json'
const parsedResults = []
const pageLimit = 10
let pageCounter = 0
let resultCount = 0
console.log(chalk.yellow.bgBlue(`\n Scraping of ${chalk.underline.bold(url)} initiated...\n`))
const getWebsiteContent = async (url) => {
try {
const response = await axios.get(url)
const $ = cheerio.load(response.data)
// New Lists
$('.wrapper .main .new article').map((i, el) => {
const count = resultCount++
const title = $(el).find('a').attr('href')
const url = $(el).find('h3').text()
const metadata = {
count: count,
title: title,
url: url
}
parsedResults.push(metadata)
})
// Pagination Elements Link
const nextPageLink = $('.pagination').find('.curr').parent().next().find('a').attr('href')
console.log(chalk.cyan(` Scraping: ${nextPageLink}`))
pageCounter++
if (pageCounter === pageLimit) {
exportResults(parsedResults)
return false
}
getWebsiteContent(nextPageLink)
} catch (error) {
exportResults(parsedResults)
console.error(error)
}
}
const exportResults = (parsedResults) => {
fs.writeFile(outputFile, JSON.stringify(parsedResults, null, 4), (err) => {
if (err) {
console.log(err)
}
console.log(chalk.yellow.bgBlue(`\n ${chalk.underline.bold(parsedResults.length)} Results exported successfully to ${chalk.underline.bold(outputFile)}\n`))
})
}
getWebsiteContent(url)