import urllib.request
import urllib.error
import re
def use_id(url, add_proxy):
"""使用代理服務器爬取網頁"""
headers = ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0")
# 模仿瀏覽器訪問網頁
req = urllib.request.Request(url)
# 使用代理服務器
proxy = urllib.request.ProxyHandler({"http": add_proxy})
opener = urllib.request.build_opener(proxy, urllib.request.HTTPHandler)
opener.addheaders = [headers]
urllib.request.install_opener(opener)
response = urllib.request.urlopen(req).read().decode("utf-8", "ignore")
return response
# 爬取洛克王國所有圖片
url = "http://news.4399.com/luoke/luokechongwu/"
add_proxy = "127.0.0.1:8888"
response = use_id(url, add_proxy)
re_picture = '(http://newsimg.5054399.com/uploads/userup/.*?.gif)'
url_picture = re.compile(re_picture).findall(response)
print(url_picture)
# 寫入檔案
for i in range(0, len(url_picture)):
try:
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0",
"Host": "newsimg.5054399.com"}
req = urllib.request.Request(url_picture[i], None, headers)
find_picture = urllib.request.urlopen(req).read().decode("GB2312", "ignore")
except urllib.error.URLError as e:
# 判斷是否有狀態碼
if hasattr(e, "code"):
print(e.code)
# 判斷是否有原因
if hasattr(e, "reason"):
print(e.reason)
except Exception as error_understand:
print("未知錯誤")
else:
fp = open("F:/picture/%d.jpg" % i, "w", encoding = "GB2312")
fp.write(find_picture)
fp.close()
print("第 %d 個圖片添加成功" % (i + 1))
print("爬取結束")
正則運算式可以獲取到內容,就是下載出來圖片以后打不開,顯示無法打開此檔案..怎么解,求大佬告知!!
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/150328.html
上一篇:資料庫被黑預防
