#!/usr/bin/env python #-*- coding:utf-8-*- """ @author: wangzhu @desc: get qian cheng wu you qiu zhi wang information @contact: [email protected] @data: 2019/8/7 """ import requests #匯入請求包 import re #匯入正則包 from random import randint """ 網站地址:https://www.danke.com/room/hz """ #Some User Agents hds=[{'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}, {'User-Agent':'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11'}, {'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'}, {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0'}, {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36'}, {'User-Agent':'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'}, {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'}, {'User-Agent':'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0'}, {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'}, {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'}, {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'}, {'User-Agent':'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11'}, {'User-Agent':'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'}] def CrawlerHouse(): url="https://www.danke.com/room/hz" res=requests.get(url,headers=hds[randint(0,len(hds)-1)]) res.encoding="utf-8" #對回傳資料進行排版 #print(res.text) #<div > /<div > infolist=re.findall('<div >(.*?)</div>',res.text,re.S) #獲取所有資料 i=1 for one in infolist: #每一條的資料 two=one.split('<span >{0}</span>'.format(i)) #過濾掉<span >{0}</span>,以防止 >與下方的>沖突 two=''.join(two) #去掉外邊中括號 #print(two) #獲取崗位名稱 job=re.findall('>(.*?)</a>',two,re.S) job=''.join(job).strip() #洗掉兩邊空字符 print(job) i+=1 #運行程式 CrawlerHouse()

<div >
<div >
<a href="javascript:void(0)" key='0' xiaoqu='萬科北宸之光'>
<span >
<span></span>
<span></span>
</span>
<img
src="https://public.danke.com.cn/public-20190123-isz_ljR3BG1JKKfa2lXEilpNXgN1NTRV?imageView2/1/w/380/h/285" width="260" height="173"
title=""
alt="圖片"/>
</a>
<div >
<div >
<span >1</span>
<a href="https://www.danke.com/duanzu/1913140756.html" key='0' xiaoqu='萬科北宸之光' target="_blank"
title="萬達廣場 萬科北宸之光 3室2廳">
萬達廣場 萬科北宸之光 3室2廳
</a>
<div >
<div ></div>
距5號線大運河站2700米
</div>
</div>
<div >
<div ></div>
建筑面積約12㎡ | 21樓
| 3室1衛 | 朝南
<i>合</i>
</div>
<div >
</div>
</div>
<div >
<div >
<span >1890</span> 元/月
</div>
<a key='0' xiaoqu='萬科北宸之光' href="https://www.danke.com/duanzu/1913140756.html"
target="_blank">
查看詳情
</a>
</div>
</div>
<div >
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/183606.html
標籤:Python
下一篇:python——變數的高級使用
