除了學習人數獲取不了,其他都沒啥問題
var http = require('http')
var Promise = require('bluebird')
var cheerio = require('cheerio')
var baseUrl = 'http://www.xianlaiwan.cn/learn/'
var videoIds = [935, 796, 694, 327]
function filterChapters(html){
var $ = cheerio.load(html)
var chapters = $('.chapter')
var title = $('.course-infos .path span').text().trim()
var number = ($('.js-learn-num').text() != '')?parseInt($('.js-learn-num').text().trim(),10):0
var courseData = {
title: title,
number: number,
videos: []
}
chapters.each(function(item){
var chapter = $(this)
var chapterTitle = chapter.find('strong').contents().filter(function() {
? ? return this.nodeType === 3;
}).text().trim()
chapterTitle = chapterTitle.replace(/<\/?[^>]*>/g,''); //去除HTML tag
chapterTitle = chapterTitle.replace(/[ | ]*\n/g,'\n'); //去除行尾空白
var videos = chapter.find('.video').children('li')
var chapterData = {
chapterTitle: chapterTitle,
videos: []
}
videos.each(function(item){
var video = $(this).find('.J-media-item')
var videoTitle = video.contents().filter(function() {
? ? return this.nodeType === 3;
}).text().trim()
videoTitle = videoTitle.replace(/<\/?[^>]*>/g,''); //去除HTML tag
videoTitle = videoTitle.replace(/[ | ]*\n/g,'\n'); //去除行尾空白
var id = video.attr('href').split('video/')[1]
chapterData.videos.push({
title: videoTitle,
id: id
})
})
courseData.videos.push(chapterData)
})
return courseData
}
function printCourseInfo(courseData){
courseData.forEach(function(courseData){
console.log(courseData.number + ' 人學過 '+ courseData.title + '\n')
})
courseData.forEach(function(courseData){
console.log('### '+ courseData.title + '\n')
courseData.videos.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle + '\n')
item.videos.forEach(function(video){
console.log(' [' + video.id + '] ' + video.title + '\n')
})
})
})
}
function getPageAsync(url){
return new Promise(function(resolve, reject){
console.log('正在爬取 ' + url)
http.get(url, function(res){
var html = ''
res.on('data', function(data){
html += data
})
res.on('end', function(){
resolve(html)
})
}).on('error', function(e){
reject(e)
console.log('獲取課程數據出錯!')
})
})
}
var fetchCourseArray = []
videoIds.forEach(function(id){
fetchCourseArray.push(getPageAsync(baseUrl + id))
})
Promise
.all(fetchCourseArray)
.then(function(pages){
var courseData = []
pages.forEach(function(html){
var courses = filterChapters(html)
courseData.push(courses)
})
courseData.sort(function(a,b){
return a.number < b.number
})
printCourseInfo(courseData)
})
2018-09-11
實現了獲取人數,源碼:
https://niuyi1017.github.io/2018/09/10/http-crawler.html
2018-06-06
可參考下我這個的,用chorme xhr看到的接口,人數要單獨請求
2018-03-18
根據提示,查點資料,大致思路是,瀏覽器,F12看源碼,可以找到取學習人數的連接,然后,http.get就可以得到人數數據,結果如下
2018-02-04
同問,學習人數獲取不到
2018-01-26
我測試了一下可以獲取啊,實在不行現在Chrome的調試面板里先試試