本人新手菜鳥,自學兩個月...昨天嘗試用urllib批量下載網站圖片,顯示HTTPError: Forbidden,于是想要構造頭部來解決,又發現構造頭部后顯示'module' object is not callable,求大神解答!
第一次嘗試代碼:
import re
import os
from urllib import request
#5.請求頁面函式
def get_html(url):
page = request.urlopen(url)
html = page.read.decode("utf-8")
return html
#6.正則提取url串列函式
def get_url_list(html):
pattern = r'"auto" src="https://bbs.csdn.net/topics/(.+?\.jpeg)"'
img_list = re.findall(pattern,html)
return img_list
#7. 下載圖片函式
def downloadings(url_list):
#a.創建保存路徑
if not os.path.exsits("python_pictures"):
os.mkdir("python_pictures")
#b.提取下載圖片
x = 1#下載圖片的序號,也用作下載圖片的名稱
print("開始下載,共{count}張圖片".format(count=len(url_list)))
for img_url in url_list:
print("第{count}張圖片".format(count=x))
request.urlretrieve(img_url,os.path.join("python_pictures","{num}.jpg",format(num = x)))
x+=1
print("下載完成。")
if __name__ == "__main__":
#1.確定URL
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'}
url = "http://www.360doc.com/content/18/0415/17/1127866_745873284.shtml"
#2.請求網頁
html = get_html(url)
#3.基于正則運算式提取圖片url,獲得下載串列
url_list = get_url_list(html)
#4.基于下載佇列下載圖片
downloadings(url_list)
第二次嘗試代碼:
import re
import os
from urllib import request
#5.請求頁面函式
def get_html(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'}
page1= request (url,headers=headers)
page= request.urlopen(page1)
html = page.read.decode("utf-8")
return html
#6.正則提取url串列函式
def get_url_list(html):
pattern = r'"auto" src="https://bbs.csdn.net/topics/(.+?\.jpeg)"'
img_list = re.findall(pattern,html)
return img_list
#7. 下載圖片函式
def downloadings(url_list):
#a.創建保存路徑
if not os.path.exsits("python_pictures"):
os.mkdir("python_pictures")
#b.提取下載圖片
x = 1#下載圖片的序號,也用作下載圖片的名稱
print("開始下載,共{count}張圖片".format(count=len(url_list)))
for img_url in url_list:
print("第{count}張圖片".format(count=x))
request.urlretrieve(img_url,os.path.join("python_pictures","{num}.jpg",format(num = x)))
x+=1
print("下載完成。")
if __name__ == "__main__":
#1.確定URL
url = "http://www.360doc.com/content/18/0415/17/1127866_745873284.shtml"
#2.請求網頁
html = get_html(url)
#3.基于正則運算式提取圖片url,獲得下載串列
url_list = get_url_list(html)
#4.基于下載佇列下載圖片
downloadings(url_list)
uj5u.com熱心網友回復:
我發現你們呼叫函式都不加括號的,不加括號就是一個函式物件的變數,能干什么?轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/130436.html
上一篇:人臉識別MATLAB語言
