import requests
import bs4
import re
def open_url(url):
headers ={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0'}
res = requests.get(url, headers = headers)
return res
def find_movies(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
#電影
movies = []
targets = soup.find_all("div", class_="hd")
for each in targets:
movies.append(each.a.span.text)
#評分
ranks = []
targets = soup.find_all('span', class_='rating_num')
for each in targets:
ranks.append('評分: %s ' % each.text)
#資料
messages = []
targets = soup.find_all('div', class_='bd')
for each in targets:
try:
messages.append(each.p.text.split('\n')[1].strip() + each.p.text.split('\n')[2].strip())
except:
continue
result = []
length = len(movies)
for i in range(length):
result.append(movies[i] + ranks[i] + messages[i] + '/n')
return result
def main():
host = 'https://movie.douban.com/top250'
res = open_url(host)
depth = 10
result = []
for i in range(depth):
url = host + '?start=' + str(25 * i) + '&filter='
res = open_url(url)
result.extend(find_movies(res))
with open('doubaner.txt', 'w', encoding='utf-8') as f:
for each in result:
f.write(each)
f.close()
if __name__ == '__main__':
main()
uj5u.com熱心網友回復:
你不是只要電影,評分,資料么,結果正常啊uj5u.com熱心網友回復:
為什么我的結果只有個表頭uj5u.com熱心網友回復:
用你的代碼運行了下,回傳了douban.txt,資訊都有啊。。
uj5u.com熱心網友回復:
運行后 什么也沒有反饋uj5u.com熱心網友回復:
把你的res.text保存下來看看內容對不對uj5u.com熱心網友回復:
試著用simplified-scrapy庫抽了下,給你參看下,需要安裝pip install simplified-scrapyfrom simplified_scrapy.simplified_doc import SimplifiedDoc
def test(html):
doc = SimplifiedDoc(html)
lst = doc.getElements('div',value="https://bbs.csdn.net/topics/info")
movies = []
for l in lst:
line = l.innerHtml
title = doc.getElementByTag('a',line)
obj = {}
if(title):
obj['href'] = title.href
obj["title"] = title.text
star = doc.getElementByClass('rating_num',line)
if(star):
obj['star'] = star.text
info = doc.getElementsByTag('p',line)
if(info):
obj['info']=''
for i in info:
obj['info']+=i.text
movies.append(obj)
return movies
uj5u.com熱心網友回復:
具體我沒有運行,但是看你的 w 寫檔案 放在 for 回圈 中, 那么一定會有資料被覆寫的
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/132427.html
