我一直在嘗試從網頁中獲取表的第一個提供者名稱。隨著該表的資料分布到下一頁,我在腳本中創建了一個回圈,以從所有下一頁中獲取第一個提供者名稱。我只是無法弄清楚我到底做錯了什么,因為I'm always getting the first provider name of the table located in the first page, not from the next pages.
如果您想知道如何從該頁面手動填充結果:
- 轉到本站網站
US從旁邊的下拉選單中選擇Country- 按下
Search按鈕
我在 python 中使用了相同的邏輯并發現成功。但是,唯一與此處不同的是我在 python 腳本中創建了會話并在每個回圈中重用了相同的會話。
這就是我目前的方法的樣子:
function fetchInformation() {
const url = 'http://carf.org/advancedProviderSearch.aspx';
var options = {
'method': 'GET',
'muteHttpExceptions': true,
'headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
},
};
var response = UrlFetchApp.fetch(url, options);
var $ = Cheerio.load(response.getContentText());
var payload = {};
payload['__VIEWSTATE'] = $('[id="__VIEWSTATE"]').first().attr('value');
payload['EktronClientManager'] = $('[id="EktronClientManager"]').first().attr('value');
payload['__VIEWSTATEGENERATOR'] = $('[id="__VIEWSTATEGENERATOR"]').first().attr('value');
payload['__EVENTVALIDATION'] = $('[id="__EVENTVALIDATION"]').first().attr('value');
payload['__EVENTTARGET'] = '';
payload['ctl00$usrWebsiteSearch$txtWebsiteSearch'] = 'Search CARF.org';
payload['ctl00$body$ddlCity'] = '0';
payload['ctl00$body$ddlZipcode'] = '0';
payload['ctl00$body$ddlCountry'] = 'US';
payload['ctl00$body$ddlProgram'] = '';
payload['ctl00$body$ddlProgramFocus'] = '';
payload['ctl00$body$ddlPopulation'] = '';
payload['ctl00$body$btnAdvSearch'] = 'Search';
var i = 0;
while (i<5){
var options = {
'method': 'POST',
'payload': payload,
'muteHttpExceptions': true,
'headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
},
};
var response = UrlFetchApp.fetch(url, options);
var $ = Cheerio.load(response.getContentText());
var elem = $('table#ctl00_body_gvProviderAdvSearch tr td > a').first().text();
console.log(elem);
var payload = {};
payload['__VIEWSTATE'] = $('[id="__VIEWSTATE"]').first().attr('value');
payload['EktronClientManager'] = $('[id="EktronClientManager"]').first().attr('value');
payload['__VIEWSTATEGENERATOR'] = $('[id="__VIEWSTATEGENERATOR"]').first().attr('value');
payload['__EVENTVALIDATION'] = $('[id="__EVENTVALIDATION"]').first().attr('value');
payload['ctl00$usrWebsiteSearch$txtWebsiteSearch'] = 'Search CARF.org';
payload['ctl00$body$ddlCity'] = '0';
payload['ctl00$body$ddlZipcode'] = '0';
payload['ctl00$body$ddlCountry'] = 'US';
payload['ctl00$body$ddlProgram'] = '';
payload['ctl00$body$ddlProgramFocus'] = '';
payload['ctl00$body$ddlPopulation'] = '';
payload['__EVENTTARGET'] = 'ctl00$body$gvProviderAdvSearch$ctl13$ctl01';
i ;
}
}
如何從分布在下一頁的每個頁面的表格中獲取第一個提供商名稱?
uj5u.com熱心網友回復:
改裝要點:
- 當我看到您的腳本時,我認為在您的情況下,可能需要包含cookie。
- 不幸的是,在當前階段,
User-Agent無法使用 UrlFetchApp 進行更改。
當這些點反映在你的腳本中時,它變成如下。
修改后的腳本:
function fetchInformation() {
const url = 'http://carf.org/advancedProviderSearch.aspx';
// Modified
var options = {
'method': 'GET',
'muteHttpExceptions': true,
};
var response = UrlFetchApp.fetch(url, options);
var $ = Cheerio.load(response.getContentText());
var payload = {};
payload['__VIEWSTATE'] = $('[id="__VIEWSTATE"]').first().attr('value');
payload['EktronClientManager'] = $('[id="EktronClientManager"]').first().attr('value');
payload['__VIEWSTATEGENERATOR'] = $('[id="__VIEWSTATEGENERATOR"]').first().attr('value');
payload['__EVENTVALIDATION'] = $('[id="__EVENTVALIDATION"]').first().attr('value');
payload['__EVENTTARGET'] = '';
payload['ctl00$usrWebsiteSearch$txtWebsiteSearch'] = 'Search CARF.org';
payload['ctl00$body$ddlCity'] = '0';
payload['ctl00$body$ddlZipcode'] = '0';
payload['ctl00$body$ddlCountry'] = 'US';
payload['ctl00$body$ddlProgram'] = '';
payload['ctl00$body$ddlProgramFocus'] = '';
payload['ctl00$body$ddlPopulation'] = '';
payload['ctl00$body$btnAdvSearch'] = 'Search';
var cookie = response.getAllHeaders()["Set-Cookie"][1]; // Added
var i = 0;
while (i < 5) {
var options = {
'method': 'POST',
'payload': payload,
'muteHttpExceptions': true,
'headers': { cookie }, // Modified
};
var response = UrlFetchApp.fetch(url, options);
var $ = Cheerio.load(response.getContentText());
var elem = $('table#ctl00_body_gvProviderAdvSearch tr td > a').first().text();
console.log(elem);
var payload = {};
payload['__VIEWSTATE'] = $('[id="__VIEWSTATE"]').first().attr('value');
payload['EktronClientManager'] = $('[id="EktronClientManager"]').first().attr('value');
payload['__VIEWSTATEGENERATOR'] = $('[id="__VIEWSTATEGENERATOR"]').first().attr('value');
payload['__EVENTVALIDATION'] = $('[id="__EVENTVALIDATION"]').first().attr('value');
payload['ctl00$usrWebsiteSearch$txtWebsiteSearch'] = 'Search CARF.org';
payload['ctl00$body$ddlCity'] = '0';
payload['ctl00$body$ddlZipcode'] = '0';
payload['ctl00$body$ddlCountry'] = 'US';
payload['ctl00$body$ddlProgram'] = '';
payload['ctl00$body$ddlProgramFocus'] = '';
payload['ctl00$body$ddlPopulation'] = '';
payload['__EVENTTARGET'] = 'ctl00$body$gvProviderAdvSearch$ctl13$ctl01';
i ;
}
}
結果:
當上面修改的腳本運行時,在日志中可以看到如下結果。
Greenleaf Job Training Services Inc
06Maren
10th District Substance Abuse Program dba New Beginnings, C.A.S.A.
12 Queen Street
12th Street House
參考:
- 獲取(網址,引數)
轉載請註明出處,本文鏈接:https://www.uj5u.com/shujuku/389307.html
