from boss.items import BossItem class ZhiPinSpider(CrwalSpider): name='Zhipin' allwed_domains=['zhipin.com'] start_urls=['https://www.zhipin.com/c100010000/?query=python&page=1'] rules={ #匹配職位串列頁的規則 Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),follow=True) #匹配職位詳情頁的規則 Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),callback="parse_job",follow=False) def parse_job(self,response): title=response.xpath('//h1[@]/text()').get().strip() company=response.xpath('//div[@]//a/text()').get() item=BossItem(title=title,company=company) yield item
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/110808.html
標籤:Python
上一篇:Python錯誤與例外
