我們正試圖從這里抓取表格 - https://www.nba.com/stats/teams/advanced/?sort=W&dir=-1&Season=2020-21&SeasonType=Regular Season - 到 R 中。這就是我們所做的到目前為止嘗試過:
# get request from API found in network tab - this doesn't work, the request hangs
httr::GET(url = 'https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2020-21&SeasonSegment=&SeasonType=Regular Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=')
# rvest returns empty nodeset when grabbing tables on page
'https://www.nba.com/stats/teams/advanced/?sort=W&dir=-1&Season=2020-21&SeasonType=Regular Season' %>%
read_html() %>%
html_nodes('table')
是否可以使用 R 從此網頁中抓取主表?
編輯:
headers = c(
`authority` = 'www.nba.com',
`cache-control` = 'max-age=0',
`sec-ch-ua` = '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
`sec-ch-ua-mobile` = '?0',
`sec-ch-ua-platform` = '"macOS"',
`upgrade-insecure-requests` = '1',
`user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',
`accept` = 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
`sec-fetch-site` = 'same-origin',
`sec-fetch-mode` = 'navigate',
`sec-fetch-user` = '?1',
`sec-fetch-dest` = 'document',
`accept-language` = 'en-US,en;q=0.9',
`cookie` = 'usprivacy=1YNN; AMCVS_248F210755B762187F000101@AdobeOrg=1; s_ecid=MCMID|39761269548384710744541812242089157146; countryCode=US; s_cc=true; ug=61647d1f0252400a3f87470014d69025; nlhidescores=false; _pbjs_userid_consent_data=3524755945110770; qoscid=524912006.1633975588; qossid=1633975588; client_type=html5; client_version=4.4.0; ugs=1; OptanonAlertBoxClosed=2021-10-12T23:20:24.183Z; at_check=true; _parsely_visitor={"id":"pid=0cb0a9a5854f45ea8a6d48f74f03e800","session_count":1,"last_session_ts":1634155541257}; ab.storage.deviceId.cf150dab-3153-49b0-b48c-66a7c18688ea={"g":"28d2f640-2ad0-b8e9-b78c-016ba5a85671","c":1634155541318,"l":1634155541318}; OptanonControl=ccc=US&csc=&cic=0&otvers=6.24.0&pctm=2021-10-12T23:20:24.183Z®=ccpa&ustcs=1YNN&vers=3.1.5; aam_uuid=39724801183369993254542124123886279717; s_ips=796; mbox=session#70d31bd3ea124acc80cb089a5594528e#1634158760|PC#70d31bd3ea124acc80cb089a5594528e.34_0#1697401700; ab.storage.sessionId.cf150dab-3153-49b0-b48c-66a7c18688ea={"g":"8dcfd2a2-4419-87f9-7e1c-22cf76830e7e","e":1634158700129,"c":1634155541315,"l":1634156900129}; s_tp=2924; s_ppv=nba%3Ateams%3Amain,27,27,796,1,3; ak_bmsc=2C1E9B2928FD1C90ECFF4A5887776269~000000000000000000000000000000~YAAQrL4cuDCzpVJ8AQAAytvzew1NuriisqR0MtOqexD1CqvqIJKuuhJda9NNGXOBCOjAdMEXnQjL10fYxWYj9HLm2DJdQLQIjLSqvl3faGyPbxWARg6dKwmf4NK/ RENdJTZfsKGTbwUMxTtPRSoR7TmMc3UWE4tAdft14nRiSPZwp/DJjK9NUhLtpTDjCa65HELyeJ7O4M4d98rAu5R7YYZOEVRjz5VRQEGaFBc5u2OlaUpcyFDqUM j jII/6xmqgwVRUhX8t8oNmdeiYpfEALo1yewznqZcfOO18htGp4sF3SLPG8bBFvLeGwW118Mu1rVkyeO4PEvC7UFZUc a7tGNSjGyGe0WSC/0iSjTC /ikP2BPwMosXe7DxWk/a0vuFtUlw7jArB/YQuYHH61uu8E97UTA=; AMCV_248F210755B762187F000101@AdobeOrg=359503849|MCMID|39761269548384710744541812242089157146|MCAAMLH-1634771953|7|MCAAMB-1634771953|RKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y|MCOPTOUT-1634174353s|NONE|MCAID|NONE|vVersion|5.0.1|MCIDTS|18914; s_gpv_pageModal=nba:stats:teams:advanced; s_sq=[[B]]; akavpau_allowednbamain=1634169266~id=4fd4cabce5336e66bef275d5dd409a10; bm_sv=467DB2784E3DE76FAA9F4CD21DD7DE3C~8bPs2wRiWvWAD8K8MYos9duNZqYto/EQc8HFibswczdPYqofRTJZOTE4Xy1RsB9fJag8YMdv3OOHkVFDGoh7aG8x4Y8eZepOfBGMFtPmQF0Vgg0XNix35HHU2sk9RKCEQujy2BRS4m269Y6fIapqEQ==; OptanonConsent=isIABGlobal=false&datestamp=Wed Oct 13 2021 19:44:27 GMT-0400 (Eastern Daylight Time)&version=6.24.0&hosts=&consentId=e8a9be54-a345-44df-90e1-eaaf56d98079&interactionCount=2&landingPath=NotLandingPage&groups=BG30:1,ven:1,pad:1,pap:1,cad:1,map:1,dsa:1,NBAad:1,req:1,sec:1,gld:1,pcp:1,mcp:1,mra:1,tdc:1,cos:1,did:1,sid:1,pdd:1,pcd:1,NBAmt:1&AwaitingReconsent=false&geolocation=US;'
)
params = list(
`sort` = 'W',
`dir` = '-1',
`Season` = '2020-21',
`SeasonType` = 'Regular Season'
)
res <- httr::GET(url = 'https://www.nba.com/stats/teams/advanced/', httr::add_headers(.headers=headers), query = params)
以下回傳一個res變數,但我們現在正在努力從res.
uj5u.com熱心網友回復:
正如評論中所討論的,不需要許多標頭和引數,但這有效:
library(data.table)
library(magrittr)
headers = c(
`Connection` = 'keep-alive',
`Accept` = 'application/json, text/plain, */*',
`x-nba-stats-token` = 'true',
`DNT` = '1',
`User-Agent` = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
`x-nba-stats-origin` = 'stats',
`Sec-GPC` = '1',
`Origin` = 'https://www.nba.com',
`Sec-Fetch-Site` = 'same-site',
`Sec-Fetch-Mode` = 'cors',
`Sec-Fetch-Dest` = 'empty',
`Referer` = 'https://www.nba.com/',
`Accept-Language` = 'en-US,en;q=0.9',
`If-Modified-Since` = 'Wed, 13 Oct 2021 23:24:06 GMT'
)
params = list(
`Conference` = '',
`DateFrom` = '',
`DateTo` = '',
`Division` = '',
`GameScope` = '',
`GameSegment` = '',
`LastNGames` = '0',
`LeagueID` = '00',
`Location` = '',
`MeasureType` = 'Advanced',
`Month` = '0',
`OpponentTeamID` = '0',
`Outcome` = '',
`PORound` = '0',
`PaceAdjust` = 'N',
`PerMode` = 'PerGame',
`Period` = '0',
`PlayerExperience` = '',
`PlayerPosition` = '',
`PlusMinus` = 'N',
`Rank` = 'N',
`Season` = '2020-21',
`SeasonSegment` = '',
`SeasonType` = 'Regular Season',
`ShotClockRange` = '',
`StarterBench` = '',
`TeamID` = '0',
`TwoWay` = '0',
`VsConference` = '',
`VsDivision` = ''
)
res <- httr::GET(url = 'https://stats.nba.com/stats/leaguedashteamstats', httr::add_headers(.headers=headers), query = params)
data <- httr::content(res) %>% .[['resultSets']] %>% .[[1]]
column_names <- data$headers %>% as.character()
dt <- rbindlist(data$rowSet) %>% setnames(column_names)
給出:
head(dt, 2)
TEAM_ID TEAM_NAME GP W L W_PCT MIN E_OFF_RATING OFF_RATING E_DEF_RATING DEF_RATING E_NET_RATING NET_RATING AST_PCT
1: 1610612737 Atlanta Hawks 72 41 31 0.569 3481 113 114.3 110.6 112.1 2.5 2.2 0.591
2: 1610612738 Boston Celtics 72 36 36 0.5 3476 111 113.1 110 111.8 0.9 1.2 0.566
AST_TO AST_RATIO OREB_PCT DREB_PCT REB_PCT TM_TOV_PCT EFG_PCT TS_PCT E_PACE PACE PACE_PER40 POSS PIE GP_RANK W_RANK L_RANK
1: 1.82 17.6 0.284 0.742 0.516 0.133 0.539 0.581 99.9 98.68 82.23 7160 0.511 1 11 11
2: 1.67 17.1 0.289 0.737 0.51 0.141 0.543 0.574 100.7 98.94 82.45 7172 0.501 1 16 16
W_PCT_RANK MIN_RANK OFF_RATING_RANK DEF_RATING_RANK NET_RATING_RANK AST_PCT_RANK AST_TO_RANK AST_RATIO_RANK OREB_PCT_RANK
1: 11 11 9 18 11 18 14 20 6
2: 16 16 10 13 13 27 22 26 3
DREB_PCT_RANK REB_PCT_RANK TM_TOV_PCT_RANK EFG_PCT_RANK TS_PCT_RANK PACE_RANK PIE_RANK CFID CFPARAMS
1: 9 7 10 16 10 22 10 10 Atlanta Hawks
2: 13 10 18 12 16 20 17 10 Boston Celtics
uj5u.com熱心網友回復:
一個RSelenium解決方案,
library(RSelenium)
library(dply)
library(rvest)
driver <- rsDriver(browser = "chrome")
remDr<-driver[["client"]]
remDr$navigate('https://www.nba.com/stats/teams/advanced/?sort=W&dir=-1&Season=2020-21&SeasonType=Regular Season')
# select element
table <- remDr$findElement(using = 'xpath', value = '/html/body/main/div/div/div[2]/div/div/nba-stat-table/div[2]/div[1]/table')
df1 = table$getPageSource()[[1]]%>%
read_html() %>%
html_table()
[[1]]
# A tibble: 30 x 39
`` TEAM GP W L MIN OffRtg DefRtg NetRtg `AST%` `AST/TO` ASTRatio `OREB%` `DREB%` `REB%` `TOV%` `eFG%` `TS%` PACE PIE POSS `GP RANK`
<int> <chr> <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <lgl>
1 1 Utah Jazz 72 52 20 3471 116. 108. 9 57.3 1.66 17.3 28.4 75.7 52.9 14.2 56.3 59.7 99.4 54.2 7,193 NA
2 2 Phoenix ~ 72 51 21 3496 116. 110. 5.9 62.2 2.15 19.6 24.8 74.1 50 12.6 56.4 59.7 98 53.4 7,137 NA
3 3 Philadel~ 72 49 23 3486 112. 107 5.5 57.2 1.64 17.2 27.7 73.7 51.1 14.3 54.1 57.9 100. 53.6 7,272 NA
4 4 Brooklyn~ 72 48 24 3481 117. 113. 4.2 62.1 1.98 19.3 25.2 72.6 50.3 13.4 57.5 61 100. 53.2 7,280 NA
5 5 Denver N~ 72 47 25 3496 116. 112. 4.8 62.1 1.99 19.3 29.2 75.1 52.2 13.6 55.7 58.8 97.7 52.5 7,123 NA
6 5 LA Clipp~ 72 47 25 3456 117. 111. 6.1 58.4 1.85 18.1 27 75.4 51.8 13.5 56.4 59.9 97.6 53 7,036 NA
7 7 Milwauke~ 72 46 26 3466 116. 111. 5.8 56.9 1.84 18 26.9 75.5 51.9 13.4 56.6 59.3 103. 53.3 7,423 NA
8 8 Dallas M~ 72 42 30 3461 115. 112. 2.3 55.7 1.9 17.2 25.3 73.4 49.6 12.3 55 58.2 97.9 51 7,062 NA
9 8 Los Ange~ 72 42 30 3491 110. 107. 2.9 60.7 1.62 18 26.9 74.8 51.1 15.2 53.6 56.9 98.8 51.7 7,184 NA
或使用 XML
readHTMLTable((table$getPageSource()[[1]]))
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/317035.html
