const superagent = require('superagent')
const cheerio = require('cheerio')
const models = require('./db')
const mysql = require('mysql')
const $sql = require('./sqlMap')
const EventProxy = require('eventproxy')
const headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Cookie':'cookie',
'Host':'yaohuo.me',
'If-Modified-Since':'Wed, 07 Mar 2018 14:00:00 GMT',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}
const origin = 'http://xxx.me'
const startId = 100000
const endId = 597452
// /bbs-10000.html 起始 -597452 结束
const urls = []
for (let i = startId; i <= endId; i++) {
urls.push(origin + '/bbs-' + i +'.html')
}
const ep = new EventProxy()
ep.after('getUrl', urls.length, list => {
// 当所有爬去完成
console.log('回调执行了')
})
urls.forEach(element => {
superagent
.get(element)
.set(headers)
.end((err, res) => {
if (err) {
return false
} else {
let = $ = cheerio.load(res.text)
if ($('.content').text() === '') {
console.log(element + '资源不存在')
} else {
let title = $('.content').children()[0].prev.data
let context = $('.bbscontent').text()
// 执行数据库查询
const pool = mysql.createPool(models.mysql)
pool.getConnection((err, connection) => {
let sql = $sql.inst.instdata
connection.query(sql, [title, context], (err, result) => {
if (err) {
return false
} else {
console.log('插入数据库成功!')
}
})
connection.release()
})
ep.emit('getUrl', element)
}
}
})
});