var?http?=?require('http');
var?Promise?=?require('bluebird');
var?cheerio?=?require('cheerio');
var?colors?=?require('colors');
var?baseUrl?=?'http://www.xianlaiwan.cn/learn/';
var?videoIds?=?[348,?259,?197,?134,?75];
var?url?=?'http://www.xianlaiwan.cn/learn/348';
console.log('正在啟動程序');
console.log('.')
console.log('..')
console.log('.........')
function?removeSpecialCharscter(words)?{
??var?reg?=?/\s+/g;
??return?words.replace(reg,?'?');
}
function?filterHtml(obj)?{
??var?$?=?cheerio.load(obj.html);
??var?courseId?=?obj.id;
??var?courseTitle?=?$('#main?.course-infos?h2').text().trim();
??var?chapters?=?$('.chapter');
???/*[
???chapterTitle:?'',
???sessions:{
??????chapterTitle:?'',
??????session:?[{
????????id:?'',
????????title:?''
??????}]
??}]*/?
??var?courseData?=?[];
??
??//獲取章
??chapters.each(function(item)?{
????var?chapter?=?$(this);
????var?$chapterTitle?=?chapter.find('strong').clone();
????$chapterTitle.find('.chapter-info').remove();
????//章標題
????var?chapterTitle?=?$chapterTitle.text();
????chapterTitle?=?removeSpecialCharscter(chapterTitle.trim());
????var?chapterData?=?{
??????chapterTitle:?chapterTitle,
??????sessions:?[]
????};???
????//獲取節
????var?sessions?=?chapter.find('.video?li');
????sessions.each(function(item)?{
??????var?session?=?$(this);
??????var?$a?=?session.find('a');
??????var?sessionTitle?=?removeSpecialCharscter($a.text().trim());
??????var?splitSessionTitle?=?sessionTitle.split('(');
??????sessionTitle?=?splitSessionTitle[0];
??????var?id?=?$a.attr('href').split('/video/')[1];
??????chapterData.sessions.push({
????????id:?id,
????????title:?sessionTitle
??????});
????})
????courseData.push(chapterData);
??});
??courseData?=?{
????id:?courseId,
????title:?courseTitle,
????chapterData:?courseData
??};
??return?courseData;
}
function?printCourseInfo(courseData)?{
??var?courseId?=?courseData.id;
??var?courseTitle?=?courseData.title;
??console.log(('\n《'?+?courseTitle?+?'》?編號:'?+?courseId).green);
??var?courses?=?courseData['chapterData'];
??courses.forEach(function(item)?{
????var?chapterTitle?=?item.chapterTitle;
????//輸出章
????console.log(chapterTitle.red);
????var?sessions?=?item.sessions;
????//輸出節
????sessions.forEach(function(item)?{
??????console.log('??'?+?item.title.yellow);
????})
??});
}
function?getPageAsync(url,?id)?{
??return?new?Promise(function(resolve,?reject)?{
????http.get(url,?function(res)?{
??????console.log('正在爬取:'+url)
??????var?html?=?'';
??????res.on('data',?function(data)?{
????????html?+=?data;
??????});
??????res.on('end',?function()?{
????????resolve({
??????????html:?html,
??????????id:?id
????????});
????????/*var?courseData?=?filterHtml(html);
????????pringCourseInfo(courseData);*/
??????})
????}).on('error',?function(e)?{
??????reject(e)
??????console.log('獲取網頁代碼出錯!');
????})
??})
}
var?fetchCourseArray?=?[];
videoIds.forEach(function(id)?{
??fetchCourseArray.push(getPageAsync(baseUrl?+?id,?id))
})
Promise
??.all(fetchCourseArray)
??.then(function(obj)?{
????setTimeout(function()?{
??????var?courseData?=?[];
??????obj.forEach(function(item)?{
????????var?course?=?filterHtml(item);
????????courseData.push(course);
??????})
??????courseData.sort(function(a,?b)?{
????????return?a.id?>?b.id;
??????})
??????.forEach(function(course)?{
????????printCourseInfo(course);
??????})
??????//?console.log('成功爬取頁面!');
????},?1000)
??})
2018-05-25
沒人人數獲取嗎?他們說ajax可行,我在代碼中實例化xhr對象,結果構造器不存在,想來是node環境下沒有這個,然后直接獲取人數又是一個空值,腦闊疼!
2017-11-24
? ?courseData.push(chapterData);
??});
??courseData?=?{
????id:?courseId,
????title:?courseTitle,
????chapterData:?courseData
??};
??return?courseData;
。。。你就不能規范點命名么~ = =!
2017-10-16
很棒哦!!但是為什么要設個定時器
2017-10-10
-_-!