async 异步控制并发

const async =  require('async')
const superagent = require('superagent')
const cheerio = require('cheerio')
const models = require('./db')
const mysql = require('mysql')
const $sql = require('./sqlMap')

const headers = {
  'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
  'Accept-Encoding':'gzip, deflate',
  'Accept-Language':'zh-CN,zh;q=0.9',
  'Cache-Control':'max-age=0',
  'Connection':'keep-alive',
  'Cookie':'ASP.NET_SessionId=npkbpv55oiqkub55t4phoj55; GUID=0cc6d10722040054; shuo=06681C82EB1A3_786_06563_23590_21001-2',
  'Host':'yaohuo.me',
  'If-Modified-Since':'Wed, 07 Mar 2018 14:00:00 GMT',
  'Upgrade-Insecure-Requests':'1',
  'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}

const origin = 'http://yaohuo.me'
const startId = 100000
const endId = 597452

const urls = []

for (let i = startId; i <= endId; i++) {
  urls.push(origin + '/bbs-' + i +'.html')
}
const pool = mysql.createPool(models.mysql)

const getData = (url, callback) => {
  superagent
    .get(url)
    .set(headers)
    .end((err, res) => {
      if (err) {
        return false
      } else {
        let = $ = cheerio.load(res.text)
        if ($('.content').text() === '') {
          console.log(url + '资源不存在')
          callback(null, url + '资源不存在')
        } else {
          let title = $('.content').children()[0].prev.data
          let context = $('.bbscontent').text()
          let author = $('.subtitle').next().next().next().children().first().text()
          pool.getConnection((err, connection) => {
            let sql = $sql.inst.instdata
            connection.query(sql, [title, context, author], (err, result) => {
              if (err) {
                return false
              } else {
                console.log('插入数据库成功!')
                callback(null, url + ' html content')
              }
            })
            connection.release()
          })
        }
      }
    })
}

async.mapLimit(urls, 100, (url, callback) => {
  getData(url, callback)
}, (err, result) => {
  console.log('final:')
  console.log(result)
})

发表评论

邮箱地址不会被公开。 必填项已用*标注