import requests
import time
from lxml import etree
def spider(url,d,pages):
headers={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'}
for k,v in d.items():
for pg in range(1,pages+1):
new_url=url+'/'+k+'/'+str(pg)+'/#contentlist'
html=requests.get(new_url,headers=headers)
print(html.status_code)
if html.status_code!=200:
break
time.sleep(2)
selector=etree.HTML(html.text)
house_list=selector.xpath('//*[@id="content"]/div[1]/div[1]/div[1]/div/p[2]/text()[3]')
for house in house_list:
area=house.xpath('div/p[2]/text()')[4]
print(area)
if __name__=='__main__':
url='https://dg.lianjia.com/zufang/'
dg={'dalangzhen':'大朗鎮','nanchengqu':'南城區'}
spider(url,dg,2)
輸出之后報錯AttributeError: 'lxml.etree._ElementUnicodeResult' object has no attribute 'xpath',請問解決辦法嗎?
轉載請註明出處,本文鏈接:https://www.uj5u.com/shujuku/16688.html
標籤:其他數據庫
上一篇:teradata資料庫的偽列
