# coding:utf-8
# 引入相關模塊
import requests
from bs4 import BeautifulSoup
from requests_html import HTMLSession
url ='https://gy.house.ifeng.com//news'
wbdata = requests.get(url).text
# 對獲取到的文本進行決議
soup = BeautifulSoup(wbdata,'lxml')
# 從決議檔案中通過select選擇器定位指定的元素,回傳一個串列
news_titles = soup.select('body > div.w1180.mb30 > div.content.clearfix > div.newsList.clearfix.fl > div.newsDetail > a')
# 對回傳的串列進行遍歷
for n in news_titles:
# 提取出標題和鏈接資訊
title = n.get_text()
link = n.get("href")
date = {'標題':"".join(title.split()),'鏈接':link}
date1={"".join(title.split())}
session = HTMLSession()
r = session.get(date1)
title1 = {r.html.find('body > div.w1180.mb30 > div.content.clearfix > div.article-content.fl > div.article > div.title', first=True)}
context1 = r.html.find('body > div.w1180.mb30 > div.content.clearfix > div.article-content.fl > div.article > div.content-info>p', first=True)
print(title1.text)
print(context1.text)
用這個代碼取https://gy.house.ifeng.com//news新聞網的文章老是出錯,python小白,求大神指教
錯誤代碼:"C:\Program Files\Python38\python.exe" C:/Users/sikezx-all/PycharmProjects/PythonTest/Test1.py
Traceback (most recent call last):
File "C:\Program Files\Python38\lib\site-packages\requests\models.py", line 379, in prepare_url
scheme, auth, host, port, path, query, fragment = parse_url(url)
File "C:\Program Files\Python38\lib\site-packages\urllib3\util\url.py", line 392, in parse_url
return six.raise_from(LocationParseError(source_url), None)
File "<string>", line 3, in raise_from
urllib3.exceptions.LocationParseError: Failed to parse: {'前三季度全國累計新增減稅降費超1.78萬億元10月30日,國家稅務總局召開新聞發布會,介紹今年前三季度稅務部門落實減稅降費、組織稅收收入、深化“放管服”改革、優化稅收營商環境等情況人民日報2019-11-01企業創新聯合科技作用'}
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/sikezx-all/PycharmProjects/PythonTest/Test1.py", line 71, in <module>
r = session.get(date1)
File "C:\Program Files\Python38\lib\site-packages\requests\sessions.py", line 546, in get
return self.request('GET', url, **kwargs)
File "C:\Program Files\Python38\lib\site-packages\requests\sessions.py", line 519, in request
prep = self.prepare_request(req)
File "C:\Program Files\Python38\lib\site-packages\requests\sessions.py", line 452, in prepare_request
p.prepare(
File "C:\Program Files\Python38\lib\site-packages\requests\models.py", line 313, in prepare
self.prepare_url(url, params)
File "C:\Program Files\Python38\lib\site-packages\requests\models.py", line 381, in prepare_url
raise InvalidURL(*e.args)
requests.exceptions.InvalidURL: Failed to parse: {'前三季度全國累計新增減稅降費超1.78萬億元10月30日,國家稅務總局召開新聞發布會,介紹今年前三季度稅務部門落實減稅降費、組織稅收收入、深化“放管服”改革、優化稅收營商環境等情況人民日報2019-11-01企業創新聯合科技作用'}
Process finished with exit code 1
uj5u.com熱心網友回復:
你這個問題在于parse里面為什么只有鍵沒有值?正常不應該是{"sad":"dsa"}=>xxx.xxx?sad=dsa,你這里是不是少了一半,然后在那個新聞網也沒有用到引數的地方啊?轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/95088.html
上一篇:SAP UI5 在js中如何修改元素css樣式,呼叫css檔案中的樣式
下一篇:python獲取時間戳
