@T爬蟲百度百萬高清美圖源代碼OC
#! -- coding: utf-8 --
Date:2020-09-20 16:52
USER:gordon_lu
使用正則運算式 刪選指定的 URL 鏈接,
“”"
oo0oo
o8888888o
88" . “88
(| -- |)
0\ = /0
/ ‘—’ _
.’ | |/ ‘.
/ \||| : |||//
/ ||||| -卍- |||||
| | \\ - /// | |
| _| ‘’\ — /’’ | |
\ .-__ ‘-’ /-. /
, . ’ /–.--\ ’ ’
. "" ̄ ̄ <’ '. _<|>/.’ '> ̄ ̄ “” .
| | : ‘- \ .; ’ \ _ /’ ;, / - ’ : | |
\ \ '. _ __ \ / / . _.’ / /
===== '-.___ ‘.___ ___/.-’ _____.-’ =====
‘=—=’
“””
import requests
import re
import random
1,獲取URL
word = input(“請輸入你要爬蟲的內容【暫時僅支持英文和字母】:”)
url = f’https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=&st=-1&fm=result&fr=&sf=1&fmq=1600592048477_R&pv=&ic=&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word={word}’
def random_user_agent():
ulist=[
“Mozilla/5.0 (Windows NT 6.1;Win64;x86) AppleWebKit / 537.36 (KHTML, likeGecko) Chrome / 88.0.4183.102Safari / 537.36”,
“Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36”
“Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36”
]
return ulist[random.randint(0,len(ulist)-1)]
def get_image(url):
headers = {
“user-agent”: random_user_agent(),
“referer”: url,
}
result = requests.get(url,headers = headers).text
# print(result)
image_urls = re.findall('"objURL":"(.*?)"',result) # 找大圖的 URL 確實不好找,這個技巧需要慢慢總結
for image_url in image_urls:
# print(image_url)
# 設定名字
image_name = image_url.split('/')[-1]
print(image_name)
image_end = re.search('(.jpg|.png|.jpeg|.gif)$',image_name)
if image_end ==None:
image_name = image_name + '.jpg'
image = requests.get(image_url).content
with open('./baidu_pic/%s'%image_name,'wb') as f:
f.write(image)
get_image(url)
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/105354.html
標籤:其他
上一篇:計算機網路第一章知識點總結(根據b站方老師的課整理)
下一篇:快速冪計算 (簡單計算器)
