eventproxy 并发处理

const superagent = require('superagent')
const cheerio = require('cheerio')
const models = require('./db')
const mysql = require('mysql')
const $sql = require('./sqlMap')
const EventProxy = require('eventproxy')

const headers = {
  'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
  'Accept-Encoding':'gzip, deflate',
  'Accept-Language':'zh-CN,zh;q=0.9',
  'Cache-Control':'max-age=0',
  'Connection':'keep-alive',
  'Cookie':'cookie',
  'Host':'yaohuo.me',
  'If-Modified-Since':'Wed, 07 Mar 2018 14:00:00 GMT',
  'Upgrade-Insecure-Requests':'1',
  'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}

const origin = 'http://xxx.me'
const startId = 100000
const endId = 597452
// /bbs-10000.html 起始 -597452 结束
const urls = []
for (let i = startId; i <= endId; i++) {
  urls.push(origin + '/bbs-' + i +'.html')
}

const ep = new EventProxy()

ep.after('getUrl', urls.length, list => {
  // 当所有爬去完成
  console.log('回调执行了')
})

urls.forEach(element => {
  superagent
      .get(element)
      .set(headers)
      .end((err, res) => {
        if (err) {
          return false
        } else {
          let = $ = cheerio.load(res.text)
          if ($('.content').text() === '') {
            console.log(element + '资源不存在')
          } else {
            let title = $('.content').children()[0].prev.data
            let context = $('.bbscontent').text()
            // 执行数据库查询
            const pool = mysql.createPool(models.mysql)
            pool.getConnection((err, connection) => {
              let sql = $sql.inst.instdata
              connection.query(sql, [title, context], (err, result) => {
                if (err) {
                  return false
                } else {
                  console.log('插入数据库成功!')
                }
              })
              connection.release()
            })
            ep.emit('getUrl', element)
          }
        }
      })
});

 

发表评论

邮箱地址不会被公开。 必填项已用*标注