apply_async里面的函數不執行
#?coding=utf-8
import?json
import?re
import?time
import?multiprocessing
import?requests
class?HandleLaGou(object):
????def?__init__(self):
????????self.lagou_session?=?requests.session()
????????self.header?=?{
????????????'User-Agent':?'Mozilla/5.0?(Macintosh;?Intel?Mac?OS?X?10_15_0)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/78.0.3904.108?Safari/537.36'
????????}
????????self.city_list?=?""
????#?獲取全國所有城市列表
????def?handle_city(self):
????????city_search?=?re.compile(r'zhaopin/">(.*?)</a>')
????????city_url?=?'https://www.lagou.com/jobs/allCity.html'
????????city_result?=?self.handle_request(method='GET',?url=city_url)
????????#?使用正則表達式獲取城市列表
????????self.city_list?=?city_search.findall(city_result)
????????self.lagou_session.cookies.clear()
????def?handle_city_job(self,?city):
????????first_request_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?city
????????first_response?=?self.handle_request(method='GET',?url=first_request_url)
????????total_page_search?=?re.compile(r'class="span\stotalNum">(\d+)</span>')
????????try:
????????????total_page?=?total_page_search.search(first_response).group(1)
????????except:
????????????return
????????else:
????????????for?i?in?range(1,?int(total_page)?+?1):
????????????????data?=?{
????????????????????'pn':?i,
????????????????????'kd':?'python'
????????????????}
????????????????page_url?=?'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%s&needAddtionalResult=false'?%?city
????????????????referer_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?city
????????????????self.header['Referer']?=?referer_url.encode()
????????????????response?=?self.handle_request(method='POST',?url=page_url,?data=data,?info=city)
????????????????print?response
????????????????lagou_data?=?json.loads(response)
????????????????job_list?=?lagou_data['content']['positionResult']['result']
????????????????for?job?in?job_list:
????????????????????print?job
????def?handle_request(self,?method,?url,?data=None,?info=None):
????????global?response
????????while?True:
????????????#?阿布云代理
????????????#?代理服務器
????????????proxyHost?=?"http-dyn.abuyun.com"
????????????proxyPort?=?"9020"
????????????#?代理隧道驗證信息
????????????proxyUser?=?"H6451437A9W24E7D"
????????????proxyPass?=?"A86CD1F6AF3AD760"
????????????proxyMeta?=?"http://%(user)s:%(pass)s@%(host)s:%(port)s"?%?{
????????????????"host":?proxyHost,
????????????????"port":?proxyPort,
????????????????"user":?proxyUser,
????????????????"pass":?proxyPass,
????????????}
????????????proxies?=?{
????????????????"http":?proxyMeta,
????????????????"https":?proxyMeta,
????????????}
????????????try:
????????????????if?method?==?'GET':
????????????????????response?=?self.lagou_session.get(
????????????????????????url=url,
????????????????????????headers=self.header,
????????????????????????proxies=proxies,
????????????????????????timeout=6
????????????????????)
????????????????elif?method?==?'POST':
????????????????????response?=?self.lagou_session.post(
????????????????????????url=url,
????????????????????????headers=self.header,
????????????????????????data=data,
????????????????????????proxies=proxies,
????????????????????????timeout=6
????????????????????)
????????????except:
????????????????self.lagou_session.cookies.clear()
????????????????first_request_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?info
????????????????self.handle_request(method='GET',?url=first_request_url)
????????????????time.sleep(10)
????????????????continue
????????????response.encoding?=?'utf-8'
????????????if?'頻繁'?in?response.text:
????????????????print?response.text
????????????????self.lagou_session.cookies.clear()
????????????????first_request_url?=?'https://www.lagou.com/jobs/list_python?&px=default&city=%s'?%?info
????????????????self.handle_request(method='GET',?url=first_request_url)
????????????????time.sleep(10)
????????????????continue
????????????return?response.text
if?__name__?==?'__main__':
????lagou?=?HandleLaGou()
????lagou.handle_city()
????#?引入多進程
????pool?=?multiprocessing.Pool(1)
????for?city?in?lagou.city_list:
????????pool.apply_async(lagou.handle_city_job,?args=(city,1))
????pool.close()
????pool.join()執行結果
/usr/local/bin/python2.7?/Users/imooc_lagou/handle_crawl_lagou.pyProcess? finished?with?exit?code?0
2020-03-14
pool.apply_async(lagou.handle_city_job,?args={city})