import requests,time,os
import lxml
from bs4 import BeautifulSoup
from gevent.queue import Queue
import threading
class PictureMM():
def __init__(self,page):
self.headers = {'user-agent': '嘻嘻嘻'}
self.page=page
self.url = 'https://www.mmonly.cc/mmtp/list_9_{}.html'
self.url_list0_queue=Queue()
self.url_list1_queue=Queue()
self.url_list2_queue=Queue()
self.url_queue=Queue()
self.name1_queue=Queue()
self.name2_queue=Queue()
self.count=0
self.lock1=threading.Lock()
self.lock2=threading.Lock()
self.lock3=threading.Lock()
def url_list(self):
"""獲取每個頁面的url"""
for i in range(int(self.page)):
url_page=self.url.format(i+1)
self.url_list0_queue.put(url_page)
#print(url_page)
def get_img_page(self):
"""獲取單個頁面的圖片的url"""
while len(self.url_list0_queue)>0:
url=self.url_list0_queue.get()
res=requests.get(url,headers=self.headers).content.decode('gbk')
soup=BeautifulSoup(res,'lxml')
datas=soup.find('div',id='infinite_scroll').find_all('div',class_='item masonry_brick masonry-brick')
for data in datas:
img_url=data.find('div').find('div').find('div').find('a')['href']
img_name = data.find('div').find('div').find('div').find('a').find('img')['alt']
self.url_list1_queue.put(img_url)
self.name1_queue.put(img_name)
#print(self.url_list1_queue)
#print(self.name1_queue)
def get_img_url(self):
"""獲取每張圖片的url和名字"""
while len(self.url_list1_queue)>0:
url = self.url_list1_queue.get()
name = self.name1_queue.get()
res=requests.get(url,headers=self.headers).content.decode('gbk')
soup=BeautifulSoup(res,'lxml')
datas=soup.find('div',class_="wrapper clearfix imgtitle").find('div',class_='pages').find('ul').find_all('li')
for i in range(1,len(datas)-2):
if i==1:
img_url=url
img_name=name+str(i)
self.url_list2_queue.put(img_url)
self.name2_queue.put(img_name)
else:
lst = list(url)
lst.insert(-5, '_' + str(i))
img_url = ''.join(lst)
img_name = name + str(i)
self.url_list2_queue.put(img_url)
self.name2_queue.put(img_name)
#print(self.url_list2_queue)
#print(self.name2_queue)
def download_url(self):
"""獲取每張圖片的下載地址"""
while len(self.url_list2_queue)>0:
url=self.url_list2_queue.get()
res = requests.get(url, headers=self.headers).content.decode('gbk')
soup = BeautifulSoup(res, 'lxml')
img_url=soup.find(id='big-pic').find('p').find('a').find('img')['src']
self.url_queue.put(img_url)
def download_img(self):
"""下載并保存每張圖片"""
while len(self.url_queue)>0:
url = self.url_queue.get()
name = self.name2_queue.get()
with open(f'妹子圖/{name}.jpg','wb') as file:
res=requests.get(url).content
file.write(res)
self.count+=1
print(name+"-------下載成功!")
def run(self):
"""加入多執行緒"""
self.url_list()
thread_list1 = []
thread_list2 = []
thread_list3 = []
thread_list4=[]
"""多執行緒執行"""
for i in range(20):
t_get_img_page=threading.Thread(target=self.get_img_page)
thread_list1.append(t_get_img_page)
for i in range(20):
t_get_img_url=threading.Thread(target=self.get_img_url)
thread_list2.append(t_get_img_url)
for i in range(20):
t_download_url=threading.Thread(target=self.download_url)
thread_list3.append(t_download_url)
for i in range(20):
t_download_img=threading.Thread(target=self.download_img)
thread_list4.append(t_download_img)
for t in thread_list1:
t.setDaemon(True)
t.start()
for t in thread_list1:
t.join()
for t in thread_list2:
t.setDaemon(True)
t.start()
for t in thread_list2:
t.join()
for t in thread_list3:
t.setDaemon(True)
t.start()
for t in thread_list3:
t.join()
for t in thread_list4:
t.setDaemon(True)
t.start()
for t in thread_list4:
t.join()
print("所有圖片下載完成")
if __name__ == '__main__':
print("歡迎美圖批量下載!")
page=input("請輸入需要下載頁數(每頁170張):")
if not os.path.exists('妹子圖'):
os.mkdir('妹子圖')
my=PictureMM(page)
start=time.time()
my.run()
end=time.time()-start
print("總共下載了%d張圖片,用時:%.2f秒"%(my.count,end))
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/212712.html
下一篇:mybatis時間查詢小技巧
