import re
import requests
import urllib.request
from bs4 import BeautifulSoup
from urllib.request import urlopen
import time
import os
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
#https://stock.tuchong.com/?source=tc_pc_home
urlindex='https://stock.tuchong.com/?source=tc_pc_home'
reqindex=urllib.request.Request(url=urlindex,headers=headers)
htmlCodeIndex=urllib.request.urlopen(reqindex).read()
dataIndex=htmlCodeIndex.decode('UTF-8')
soupIndex=BeautifulSoup(dataIndex,'html.parser')
regIndex=r'"topicId":.*?,'#匹配字符
reg_ImgIndex=re.compile(regIndex)#編譯一下
imglistIndex=reg_ImgIndex.findall(dataIndex)
print(imglistIndex)
xIndex=0
for igIndex in imglistIndex:
xIndex=xIndex+1
print(igIndex[10:-1])
indexId=igIndex[10:-1]
# headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
topicId = 49364
url = 'https://stock.tuchong.com/topic?topicId=' + str(topicId)
req = urllib.request.Request(url=url, headers=headers)
# urllib.request.urlopen(req).read()
# page=urllib.request.urlopen(url)
htmlCode = urllib.request.urlopen(req).read()
data = htmlCode.decode('UTF-8')
# print(data)
# pagefile=open('pagecode1.txt','wb')
# pagefile.write(htmlCode)
# page.close()
soup = BeautifulSoup(data, "html.parser")
# reg = r'src="https://bbs.csdn.net/topics/(.+?\.jpg)"'#寫出圖片的正則運算式: reg = r'src="https://bbs.csdn.net/topics/(.+?\.jpg)"'
reg = r'"imageId":".*?"'
reg_img = re.compile(reg) # 編譯一下,運行更快
imglist = reg_img.findall(data)
print(imglist)
x = 0
# pageFile = open('pageCode2.txt', 'wb') # 以寫的方式打開pageCode.txt
imglist2 = []
for img in imglist:
x = x + 1
print(img[11:-1])
imglist2.append(img[11:-1])
print(imglist2)
x = 0
for ig in imglist2:
x = x + 1
print(ig)
# urllib.request.urlretrieve('http://ppic.meituba.com:83/uploads3/181201/3-1Q20111553V11.jpg','%s.jpg' % x)
# x+1
# python 下載圖片到本地 方法
image_url = "https://icweiliimg6.pstatp.com/weili/l/" + ig + ".webp"
# image_url2=""
# image_url = img
file_path = 'E:/新建檔案夾4/爬蟲圖片/圖片'+indexId+'/圖片'
try:
if not os.path.exists(file_path):
os.makedirs(file_path) # 如果檔案夾不存在直接創建一個
file_suffix = os.path.splitext(image_url)[1]
print(file_suffix)
filetype = '.webp'
filename = '{}{}'.format(file_path + str(x), filetype) # 拼接檔案名
# x=x+1
print(filename)
# urllib.request.urlretrieve(image_url, filename=filename) # 利用urllib.request.urltrieve方法下載圖片 這個可能會出現 403 forbidden
# region 這里為了防止出現403 改為這種輸出方式
res = requests.get(image_url)
with open(file_path + str(x) + '.jpg', 'wb') as f:
f.write(res.content)
print(111)
# endregion
except IOError as e:
print(1, e)
except Exception as e:
print(2, e)
time.sleep(2)
uj5u.com熱心網友回復:
這是給大家的例子嗎uj5u.com熱心網友回復:
是啊,寫了一個小例子轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/106379.html
上一篇:print輸出見鬼了
