const async = require('async')
const superagent = require('superagent')
const cheerio = require('cheerio')
const models = require('./db')
const mysql = require('mysql')
const $sql = require('./sqlMap')
const headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.9',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Cookie':'ASP.NET_SessionId=npkbpv55oiqkub55t4phoj55; GUID=0cc6d10722040054; shuo=06681C82EB1A3_786_06563_23590_21001-2',
'Host':'yaohuo.me',
'If-Modified-Since':'Wed, 07 Mar 2018 14:00:00 GMT',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}
const origin = 'http://yaohuo.me'
const startId = 100000
const endId = 597452
const urls = []
for (let i = startId; i <= endId; i++) {
urls.push(origin + '/bbs-' + i +'.html')
}
const pool = mysql.createPool(models.mysql)
const getData = (url, callback) => {
superagent
.get(url)
.set(headers)
.end((err, res) => {
if (err) {
return false
} else {
let = $ = cheerio.load(res.text)
if ($('.content').text() === '') {
console.log(url + '资源不存在')
callback(null, url + '资源不存在')
} else {
let title = $('.content').children()[0].prev.data
let context = $('.bbscontent').text()
let author = $('.subtitle').next().next().next().children().first().text()
pool.getConnection((err, connection) => {
let sql = $sql.inst.instdata
connection.query(sql, [title, context, author], (err, result) => {
if (err) {
return false
} else {
console.log('插入数据库成功!')
callback(null, url + ' html content')
}
})
connection.release()
})
}
}
})
}
async.mapLimit(urls, 100, (url, callback) => {
getData(url, callback)
}, (err, result) => {
console.log('final:')
console.log(result)
})