我用vba爬快遞資訊,但是總是提示“查無結果”,網上查了很多資料,說是IP被限制了。
針對這個爬蟲專案,我有幾個問題,請大神幫我指點指點,或者直接修改代碼,萬分感謝!
1.爬快遞100的時候,有個引數是temp,這個引數有沒有用,怎么去解決temp問題?
2.ip被限制了,怎么使用代理ip,在vba中怎么寫程式?
3.使用代理ip后,cookie和user-agent怎么處理?
爬kuaidi100的代碼如下:
Sub kd100()
Application.EnableEvents = True
Application.ScreenUpdating = False
Dim xmlhttp As Object, str1 As String, str2 As String
Dim i%, j%
Range("d1:e70").ClearContents
Set xmlhttp = CreateObject("WinHttp.WinHttpRequest.5.1") 'MSXML2.XMLHTTP 'WinHttp.WinHttpRequest.5.1
With xmlhttp
If Cells(2, 2) <> "" Then
.Open "post", "http://www.kuaidi100.com/autonumber/autoComNum?text=" & Trim(Cells(2, 2).Value)
.send 'post請求,目的是獲得快遞公司名稱
str1 = Split(Split(.responseText, "comCode"":""")(2), """")(0)
Debug.Print str1
.Open "get", "http://www.kuaidi100.com/query?type=" & str1 & "&postid=" & Trim(Cells(2, 2).Value) & "&temp=" & Rnd() & "phone="
.Option(6) = False
.setRequestHeader "Accept", "application/json, text/javascript, */*; q=0.01"
.setRequestHeader "Accept-Language", "zh-CN,zh;q=0.8"
.setRequestHeader "Connection", "keep -alive"
.setRequestHeader "Cookie", "csrftoken=px575xwqdFtCFzOOrLlIbgcfkqB4646zHZpXPPWGOV4; __guid=112420610.3231980096521463300.1605887476529.404; TOKEN=m5g-GpdoICyx5_4SH5MOg6HlV--i6LjrGFcjoE1eCR0; USER_NAME=%E6%B2%B3%E5%8D%97%E7%9C%81%E4%BA%94%E5%8D%81%E9%93%83%E5%95%86%E8%B4%B8%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8; IS_LOGGED=YES; monitor_count=5; WWWID=WWWC35F4AB686DE10806682D6B6AFBDEF1A; loginId=386005580; loginType=POLL; loginName=hennan_wushiling; nickname=hennan_wushiling; loginEmail=null; loginMobile=null; loginExt=null; auth=32; loginSession=1; Hm_lvt_22ea01af58ba2be0fec7c11b25e88e6c=1605887478,1605969167,1605974529; Hm_lpvt_22ea01af58ba2be0fec7c11b25e88e6c=1605974529"
.setRequestHeader "Host", "www.kuaidi100.com"
.setRequestHeader "Referer", "https://www.kuaidi100.com/"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.send
str2 = .responseText '取得物流資料
Debug.Print str2
'MsgBox str2
End If
End With
Set Ms = CreateObject("MSScriptControl.ScriptControl") '用這個處理提取出來的資料,可是64位office不支持
Ms.Language = "javascript"
Ms.AddCode "a=" & str2
'MsgBox Ms.eval("a.com")
For j = 0 To Ms.eval("a.data.length") - 1
Cells(j + 5, 4) = Ms.eval("a.data[" & j & "].time")
Cells(j + 5, 5) = Ms.eval("a.data[" & j & "].context")
'Cells(j + 5, 6) = Ms.eval("a.data[" & j & "].ftime") 'ftime和time差不多,基本一樣
Next j
Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub
爬kuaidihelp的代碼如下:
Sub Kuaidihelp()
Dim strText As String, URL As String, KdNum As String
Dim i%, Mh As Object, Arr(1 To 99, 1 To 2)
Range("q2:r100").ClearContents: [p2:p3] = "" '前期處理
KdNum = "9863588744188"
URL = "http://m.kuaidihelp.com/express/queryResult"
With CreateObject("WinHttp.WinHttpRequest.5.1") '
.Open "get", URL, False 'POST,GET 根據需要更改
.Option(6) = False
.setRequestHeader "Accept", " */*"
.setRequestHeader "Accept-Language", "zh-CN,zh;q=0.9"
.setRequestHeader "Connection", "keep -alive"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.setRequestHeader "X-Requested-With", "XMLHttpRequest" '這個一定要加
.setRequestHeader "Referer", "https://m.kuaidihelp.com/express/queryResult?word=" & Trim(KdNum) '這個可以省略
.setRequestHeader "Cookie", "__guid=186342133.2521275814532956700.1605883142010.0737; UM_distinctid=175e61820a0422-0bea918f4178-376b4502-1fa400-175e61820a1455; CNZZDATA30097644=cnzz_eid%3D397802876-1605881974-%26ntime%3D1605881974; CNZZDATA1260433975=1176996879-1605881344-%7C1605881344; CNZZDATA1278216271=1470525690-1605878972-https%253A%252F%252Fm.kuaidihelp.com%252F%7C1605960799; saltcode=xDCvG4VQwEqKYiGyBxKugA%3D%3D; rand=vb7eqjOzXVHZQNG1bz0qLQ%3D%3D; monitor_count=29"
.setRequestHeader "Host", "m.kuaidihelp.com"
.setRequestHeader "Origin", "https://m.kuaidihelp.com"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
.send "waybill=" & Trim(KdNum)
Debug.Print .responseText
strText = TestRegUni(.responseText) '轉譯成正常的文本
strText = Replace(strText, "<br\/>", " ") '把時間和日期中間的<br\/>替換成空格
End With
'######################對資料進行處理并寫入到單元格###########################
With CreateObject("vbscript.regexp") '用正則提取資料
.Global = True
.Pattern = "date"":""(.+?)"",""info"":""(.+?)""}"
Set Mh = .Execute(strText)
If Mh.Count = 0 Then Debug.Print "該快遞單號無法識別": Exit Sub
For i = 1 To Mh.Count
Arr(i, 1) = Mh(i - 1).submatches(0)
Arr(i, 2) = Mh(i - 1).submatches(1)
Next
End With
[q2].Resize(99, 2) = Arr
[p2] = Split(Split(strText, """,""brand_key")(0), "name"":""")(1)
[p3] = Split(Split(strText, """,""data")(0), "msg"":""")(1)
Range("Q2:Q100").NumberFormat = "yyyy-mm-dd hh:mm"
End Sub
Function TestRegUni(str As String) As String '用正則提取后處理轉譯,把response的內容轉成常規內容
Dim strTemp$, i%, y%, Arr(1 To 10000, 1 To 2), 定義足夠的的陣列
Dim iReg As Object, iMch As Object, Mch As Object
Dim d As Object
strTemp = str
Set d = CreateObject("scripting.dictionary") '字典去重復內容,可能提高運行速度!
Set iReg = CreateObject("vbscript.regexp") '提取轉譯內容
iReg.Global = True
iReg.Pattern = "\\u\w{4}"
Set iMch = iReg.Execute(str)
For Each Mch In iMch
If Not d.exists(Mch) Then
d(Mch) = ""
y = y + 1
Arr(y, 1) = ChrW(CLng(Replace(Mch.Value, "\u", "&h")))
Arr(y, 2) = Mch.Value
End If
Next
For i = 1 To d.Count
strTemp = Replace(strTemp, Arr(i, 2), Arr(i, 1))
Next
TestRegUni = strTemp
Set iReg = Nothing
Set d = Nothing
End Function
轉載請註明出處,本文鏈接:https://www.uj5u.com/gongcheng/226519.html
標籤:VBA
