
var?http?=?require('http')
var?cheerio?=?require('cheerio')
var?url?=?'http://www.xianlaiwan.cn/learn/348'
function?filterChapters(html){
????var?$?=?cheerio.load(html)
????var?chapters?=?$('.learnchapter')
????//?[{
????//?????chapterTitle?:?'',
????//?????videos:?[
????//?????????title:'',
????//?????????id:''
????//?????]
????//?}]
????var?courseData?=?[]
????chapters.each(function(item){
????????var?chapter?=?$(this)
????????var?chapterTitle?=?chapter.find('strong').test();
????????var?videos?=?chapter.find('.video').children('li');
????????var?chapterData?=?{
????????????chapterTitle?:?chapterTitle,
????????????videos?:[]
????????}
????????videos.each(function(item){
????????????var?video?=?$(this).find('.studyvideo');
????????????var?videoTitle?=?video.text();
????????????var?id?=?video.attr('href').split('video/')[1]
????????????chapterData.videos.push({
????????????????title:videoTitle,
????????????????id:id
????????????})
????????})
????????courseData.push(chapterData)
????})
????return?courseData
}
function?printCourseInfo(courseData){
????courseData.forEach(function(item){
????????var?chapterTitle?=?item.chapterTitle
????????console.log(chapterTitle?+?'\n')
????????item.videos.forEach(function(video){
????????????console.log('??【'?+?video.id?+?'】'?+?video.title?+?'\n');
????????})
????})
}
http.get(url,function(res){
????var?html?=?''
????res.on('data',function(data){
????????html?+=?data
????})
????res.on('end',function(){
????????var?courseData?=?filterChapters(html);
????????printCourseInfo(courseData)
????})
}).on('error',function(){
????console.log('獲取課程數據出錯!')
})
2017-02-24
因為慕課網源碼里html的class變了 所以爬不到了?.learnchapter變成了chapter ?.studyvideo變成了video
2017-04-25
非常感謝
2017-04-06
//這個是我的代碼,可以成功爬到數據的,你可以對比一下看DOM結構發生的改變。
//2017-04-06
var http = require('http')
var cheerio = require('cheerio')
var url = 'http://www.xianlaiwan.cn/learn/348'
function filterChapters(html){
var $ = cheerio.load(html)
var chapters = $('.chapter')
//期望的數據的結構
// [{
// chapterTitle:'',
// videos:[
// title:'',
// id:''
// ]
// }]
var courseData = []
chapters.each(function (item){
var chapter = $(this)
var chapterTitle = chapter.find('strong').text()
var videos = chapter.find('.video').children('li')
var chapterData = {
chapterTitle:chapterTitle,
videos:[]
}
videos.each(function(item){
var video = $(this)
var videoTitle = video.text()
var id = video.attr('data-media-id')
chapterData.videos.push({
title:videoTitle,
id:id
})
})
courseData.push(chapterData)
})
return courseData
}
function printCourseInfo(courseData){
courseData.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle + '\n')
item.videos.forEach(function(video){
console.log(' ?[' + video.id + '] ' + video.title+ '\n')
})
})
}
http.get(url,function(res) {
var html = ''
res.on('data', function(data){
html+=data
})
res.on('end', function(){
var courseData = filterChapters(html)
printCourseInfo(courseData)
})
}).on('error', function(){
console.log('獲取課程數據出錯')
})