各位大神,小弟最近入坑python爬蟲,模仿網上的例子寫了一個爬取攜程機票資料的代碼,但是輸出結果長這樣,得不到想要的資料內容,能不能幫忙看看問題出在哪呢[/code][/code]

代碼如下:
import datetime
import requests
import hashlib
import json
from bs4 import BeautifulSoup #網頁資訊資料獲取(網頁資料拆分)
import re #正則運算式-進行文字匹配(網頁資料提煉)
import xlwt #進行excel操作(資料存入excel)
#定義航線串列
one_way=['SZX-CGO','SZX-CGQ','SZX-CKG','SZX-CTU','SZX-CZX','SZX-DLC','SZX-HAK',
'SZX-HET','SZX-HFE','SZX-HGH','SZX-HRB','SZX-INC','SZX-KHN','SZX-KMG','SZX-LHW',
'SZX-LJG','SZX-LYI','SZX-MIG','SZX-NGB','SZX-NKG','SZX-NNG','SZX-NTG','SZX-PEK',
'SZX-PKX','SZX-PVG','SZX-SHA','SZX-SHE','SZX-SJW','SZX-SYX','SZX-TAO','SZX-TCZ',
'SZX-TNA','SZX-TSN','SZX-TYN','SZX-URC','SZX-WNZ','SZX-WUH','SZX-WUX','SZX-XIY',
'SZX-XNN','SZX-YIH','SZX-YNT','SZX-ZHA']
#定義日期段串列
datelist=[]
for i in range(1, 14):
datelist.append((datetime.datetime.now()+datetime.timedelta(days=i)).strftime('%Y-%m-%d'))
#主程式
def main():
baseurl = 'https://flights.ctrip.com/international/search/' #基礎鏈接
# 獲取網頁引數、transactionid
getData(baseurl)
#獲取網頁transactionID、signID、引數
def getData(baseurl):
# for i in range(0,len(one_way)):
for i in range(0, 1):
# for j in range(0,len(datelist)):
for j in range(0, 1):
url = baseurl + 'oneway-' + str(one_way[i]) + '?depdate=' + str(datelist[j] + '&cabin=Y_S_C_F') # 生成完整的基礎鏈接網址
response = requests.get(url,headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'}) #獲取基礎網頁內容
# 獲取基礎網頁引數
data = re.findall(r'GlobalSearchCriteria =(.+);', response.text)[0].encode('utf-8') #獲取基礎網頁引數
# 獲取基礎網頁transactionid
transactionId = json.loads(data).get("transactionID")
# 獲取基礎網頁signID
sign_value = transactionId + one_way[i][0:3] + one_way[i][-3:] + datelist[j]
signID = hashlib.md5()
signID.update(sign_value.encode('utf-8'))
askurl(transactionId,data,signID)
def askurl(transactionId,data,signID):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
'Content-Type': 'application/json;charset=UTF-8',
'transactionid': transactionId,
'sign': signID.hexdigest()
} # 模擬頭部資訊,偽裝成瀏覽器
response= requests.post(
url="https://flights.ctrip.com/international/search/api/search/batchSearch",
headers=headers,
data=https://bbs.csdn.net/topics/data
)
response.encoding = 'utf-8'
print(response.json().get("data"))
#主程式入口
if __name__=='__main__':
main()
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/275596.html
上一篇:CSDN《問答有獎》活動上線:提問或回答都有機會中獎!
下一篇:穩健距離和馬氏距離的散點圖的包
