代碼
headersList=[{'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'},
{'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'},
{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'},
{'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
]
def getHtmlList(url): headers = headersList[random.randint(0,len(headersList)-1)] #headers = headersList[3] attempts =0 success = False while attempts < 5 and not success: try: r = requests.get(url, headers = headers,timeout=100) success = True return r except: time.sleep(15) print(time.ctime()) print("失敗"+url) attempts +=1 if(attempts == 5): break def get_apple_lastest_version(): res_apple = getHtmlList('https://apps.apple.com/cn/app/zhi-wu-da-zhan-jiang-shi2/id639516529') res_apple.encoding = 'utf-8' apple_pattern ="版本 [2-9].[4-9].[0-9][0-9]?</p>" apple_pattern_2 ="[2-9].[4-9].[0-9][0-9]?" apple_version=re.findall(apple_pattern,res_apple.text,flags=0) if(len(apple_version)==0): file_temp="C:\\temp20191023\\error.txt" data_temp=open(file_temp,'w+') print(res_apple.text,file=data_temp) data_temp.close() return "0.0.0" apple_version=re.findall(apple_pattern_2,apple_version[0],flags=0) apple=apple_version[0] return apple
每60~120s讀取一次。 一段時間后get到的錯誤html
<!DOCTYPE html>
<html lang="zh-cn" prefix="og: http://ogp.me/ns#"> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover"> <meta name="web-experience-app/config/environment" content="%7B%22appVersion%22%3A1%2C%22modulePrefix%22%3A%22web-experience-app%22%2C%22environment%22%3A%22production%22%2C%22rootURL%22%3A%22/%22%2C%22locationType%22%3A%22history-hash-router-scroll%22%2C%22historySupportMiddleware%22%3Atrue%2C%22contentSecurityPolicyMeta%22%3Atrue%2C%22contentSecurityPolicy%22%3A%7B%22default-src%22%3A%5B%22%27none%27%22%5D%2C%22img-src%22%3A%5B%22%27self%27%22%2C%22http%3A//*.mzstatic.com%22%2C%22*.mzstatic.com%22%2C%22*.apple.com%22%2C%22*.googleusercontent.com%22%2C%22data%3A%22%5D%2C%22style-src%22%3A%5B%22%27self%27%22%2C%22%27unsafe-inline%27%22%2C%22*.apple.com%22%5D%2C%22font-src%22%3A%5B%22%27self%27%22%2C%22http%3A//*.apple.com%22%2C%22https%3A//*.apple.com%22%5D%2C%22medi
到底是啥情況…應該怎么辦…?求各位大神給個辦法
uj5u.com熱心網友回復:
def get_apple_lastest_version():res_apple = getHtmlList('https://apps.apple.com/cn/app/zhi-wu-da-zhan-jiang-shi2/id639516529')
res_apple.encoding = 'utf-8'
apple_pattern ="版本 [2-9].[4-9].[0-9][0-9]?</p>"
apple_pattern_2 ="[2-9].[4-9].[0-9][0-9]?"
apple_version=re.findall(apple_pattern,res_apple.text,flags=0)
if(len(apple_version)==0):
file_temp="C:\\temp20191023\\error.txt"
data_temp=open(file_temp,'w+')
print(res_apple.text,file=data_temp)
data_temp.close()
return "0.0.0"
apple_version=re.findall(apple_pattern_2,apple_version[0],flags=0)
apple=apple_version[0]
return apple
uj5u.com熱心網友回復:
headersList=[{'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'},
{'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'},
{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'},
{'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
]
def getHtmlList(url):
headers = headersList[random.randint(0,len(headersList)-1)]
#headers = headersList[3]
attempts =0
success = False
while attempts < 5 and not success:
try:
r = requests.get(url, headers = headers,timeout=100)
success = True
return r
except:
time.sleep(15)
print(time.ctime())
print("失敗"+url)
attempts +=1
if(attempts == 5):
break
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/139512.html
