getURL() 函數從原始 URL 創建一個抓取 URL 的數組。getSubURL() 然后循環遍歷該數組并抓取所有這些頁面的 URL。目前,此代碼可以很好地輸出到控制臺,但我不知道如何等待我的數據解析,以便我可以將所有收集的數據推送到單個數組。目前,當我嘗試返回站點然后推送到數組時,它只推送最后一個值。我相信這是一種promise.all(map) 的情況,但我不知道如何正確編寫而不出現錯誤。理想情況下,我完成的抓取可以在另一個函數中調用。如果可以的話請看一下const cheerio = require('cheerio');const axios = require('axios');let URL = 'https://toscrape.com';const getURLS = async () => { try { const res = await axios.get(URL); const data = res.data; const $ = cheerio.load(data); const urlQueue = []; $("a[href^='http']").each((i, elem) => { const link = $(elem).attr('href'); if (urlQueue.indexOf(link) === -1) { urlQueue.push(link); } }); return urlQueue; } catch (err) { console.log(`Error fetching and parsing data: `, err); }};const getSubURLs = async () => { let urls = await getURLS(); try { //loop through each url in array for (const url of urls) { //fetch all html from the current url const res = await axios.get(url); const data = res.data; const $ = cheerio.load(data); //create object and push that url into that object let sites = {}; sites.url = url; let links = []; //scrape all links and save in links array $("a[href^='/']").each((i, elem) => { const link = $(elem).attr('href'); if (links.indexOf(link) === -1) { links.push(link); } //save scraped data in object sites.links = links; }); // returns list of {url:'url', links:[link1,link2,link3]} console.log(sites); } } catch (err) { console.log(`Error fetching and parsing data: `, err); }};
如何承諾等待所有對象完成然后推送到數組?
撒科打諢
2023-11-02 20:01:11