import requests
from bs4 import BeautifulSoup
respond_timestamp=[]
for i in range(17,18):
print(i)
try:
url='https://www.darooyab.ir/doctor/9/????-?????-???????page=' str(i)
#print(url)
response = requests.get(url).content.decode()
except:
continue
soup = BeautifulSoup(response,'html.parser')
comment= soup.select('div.comment')
#print( len(comment))
for rt in [i for i in soup.select('div.comment')]:
try:
for out in [j.text for j in rt.select('div:nth-child(3) > span:nth-child(1) > label:nth-child(1)')]:
#print(rt[1])
pattern=r'???? ????? ?????? - ????? ??? ? ????'
respond_timestamp.append(re.sub(pattern,'',out))
except:
respond_timestamp.append(None)
print(len(respond_timestamp))
我想附加respond_timestamp到串列中。的回傳值respond_timestamp可能為空,然后我使用try/except和append(None),但None不會添加到串列中。因為串列的計數很重要。
我該怎么辦?
uj5u.com熱心網友回復:
注意 檢查/列印您的串列,它不是不會被附加的 None,而是最后缺少的“時間戳” - 基于您的示例
怎么了?
- 主要問題是
re模塊沒有匯入(看看你的例子) - 這些不必要的串列理解使代碼非常混亂并且除錯起來更加困難。此外,如果沒有
responseComment不啟動您的回圈,因為它是一個空串列,這將使您try成功并且不會導致例外。
怎么修?
匯入re模塊以避免例外并更改您的 for 回圈 - 僅選擇一個元素來檢查您的try:
for rt in soup.select('div.comment'):
try:
out = rt.select_one('div:nth-child(3) > span:nth-child(1) > label:nth-child(1)').text
pattern= '???? ????? ?????? - ????? ??? ? ????'
respond_timestamp.append(re.sub(pattern,'',out))
except Exception as ex:
print(repr(ex))
respond_timestamp.append(None)
例子
請注意, 如果您的目標是生成字典,請在整個程序中完成,而不是通過壓縮多個串列,...
import requests
from bs4 import BeautifulSoup
data = []
for i in range(17,18):
try:
url='https://www.darooyab.ir/doctor/9/????-?????-???????page=' str(i)
response = requests.get(url).content.decode()
except:
continue
soup = BeautifulSoup(response,'lxml')
for c in soup.select('.comment'):
# pattern=r'???? ????? ?????? - ????? ??? ? ????'
# print(re.sub(pattern,'',c.select_one('div:nth-child(3) > span:nth-child(1) > label:nth-child(1)').text))
data.append({
'commentDate':c.span.text.split(' ')[1][1:-1],
'commentText':c.div.text,
'responseDate':responseDate.text.split(' ')[-1][1:-1] if (responseDate := c.select_one('.responseComment label')) else None,
'responseText':responseText.text if (responseText := c.select_one('.responseComment .commentText')) else None,
})
data
輸出
[{'commentDate': '1398/12/29',
'commentText': '???? ?? ???? ?????? ??? ???????? ???? ????? ? ???? ????? ?? ???? ????? ????? ????? ??? ??? ??? ???? ??? ?? ? ??? ?? . ??? ??? ??? ??? ?? ??? ?????? ????? ????? ??????? ?? ??? ???? ?? ???? ??? ????\r\n',
'responseDate': None,
'responseText': None},
{'commentDate': '?????',
'commentText': '?? ???? ???? ???? ???? ????. ?????? ?? ?? ?? ????? ????? ??? ??????????? ???? 1 ???? ?? ??? ? ?? ???? ?? ???? ??? ?? ???? . ????? ???????????????? ???? ??? ?? ???? ???? ????? ???. ????????? ',
'responseDate': '1399/2/25',
'responseText': '???? ????? ?? ??? ??? ???'},
{'commentDate': '1398/12/28',
'commentText': '???? . ?? ????? ?? ???? ??? ???? ? ????? ??? ?? ?? ?? ???? ?????? ???? . ???? ?? ?????????????? ? ??????????? ????? ???? . ???? ?? ????? ???? ???? ???? ?? ????? . ???????? ????? ???? ?? ???? ??? ?? ???? ?',
'responseDate': '1399/2/25',
'responseText': '???? ????? ?? ??? ??? ???'},
...]
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/386241.html
