import bs4 as bs
import urllib.request
import csv
import requests
import re
from urllib.request import urlopen
from urllib.error import HTTPError
import pandas as pd
import time
import urllib.error
book_ids = ["9781408110416","9789604249671","9781405950305"]
def get_description(book_id):
my_urls = 'https://www.bookdepository.com/Enid-Blytons-Christmas-Tales-Enid-Blyton/' book_id
source = urlopen(my_urls).read()
soup = bs.BeautifulSoup(source, 'lxml')
description = soup.find('div', class_='item-img-content')
if description:
return description
else:
return "[No description"
for book_id in book_ids:
print(book_id)
print(get_description(book_id))
time.sleep(2)
我得到的錯誤 -
HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
您好,我正在撰寫腳本以從 bookdepository 中洗掉影像 url。我的主要問題是某些書籍回傳 404,因為它們在平臺上不可用。
我怎樣才能讓腳本簡單地轉到下一個代碼而不停止整個回圈。
提前致謝。
uj5u.com熱心網友回復:
您可以將代碼放在get_description()函式中的try和except塊中。像這樣的東西,
def get_description(book_id):
my_urls = 'https://www.bookdepository.com/Enid-Blytons-Christmas-Tales-Enid-Blyton/' book_id
try:
source = urlopen(my_urls).read()
soup = bs.BeautifulSoup(source, 'lxml')
description = soup.find('div', class_='item-img-content')
if description:
return description
else:
return "[No description"
except:
pass
現在,如果代碼遇到錯誤,它將跳過該book_id錯誤并移至下一個。
轉載請註明出處,本文鏈接:https://www.uj5u.com/net/462918.html
上一篇:如何使用BS4抓取資料值?
