#-*- coding:utf-8 -*-
import requests
from lxml import etree
from bs4 import BeautifulSoup # 匯入所需庫
# 請求頭部
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0'}
def get_top():
# 首頁輸入想要爬取城市
url = 'https://movie.douban.com/cinema/nowplaying/zhanjiang/'
respose = requests.get(url, headers=headers)
respose.encoding="utf-8"
soup = BeautifulSoup(respose.text, 'lxml')
#資訊串列
li_list = soup.find("ul", {"class": "lists"}).children
for li in li_list:
try:
id = "{}".format(li["id"])#id
score = "{}".format(li["data-score"])#評分
maker_location = "{}".format(li["data-region"])#制片地區
url1 = 'https://movie.douban.com/subject/{}/?from=playing_poster'.format(id)#跳轉鏈接
respose1 = requests.get(url1, headers=headers).text
s = etree.HTML(respose1)
film_name = s.xpath('//*[@id="content"]/h1/span[1]/text()')#電影名
director = s.xpath('//*[@id="info"]/span[1]/span[2]/a/text()')#導演
writers = s.xpath('//*[@id="info"]/span[2]/span[2]/a/text()')#編劇
actor = s.xpath('//*[@id="info"]/span[3]/span[2]/a/text()') # 主演
type = s.xpath('//*[@id="info"]/span[5]/text()')#型別
date = s.xpath('//*[@id="info"]/span[9]/text()')#日期
time = s.xpath('//*[@id="info"]/span[11]/text()')#時長
content = s.xpath('normalize-space(//*[@id="link-report"]/span[1]/text())')#簡介
img = s.xpath('//*[@id="mainpic"]/a/img/@src')#圖片鏈接
print(id)
print(score)
print(maker_location)
print(director)
print(writers)
print(actor)
print(film_name)
print(type)
print(date)
print(time)
print(content)
print(img)
print('\n')
except KeyError:
pass
except Exception:
pass
if __name__ == '__main__':
get_top()
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/261065.html
標籤:python
上一篇:【數算-19】樹
下一篇:Python繪制高斯曲線
