我是資料抓取的新手,最近,我試圖通過selenium庫和python從wunderground.com抓取資料。但是,我發現,有時候,selenium web驅動不能成功打開網頁,我想這個問題可能與網站使用的JavaScript有些關系,但不確定是哪部分出了問題。有誰知道如何解決這個問題嗎?預先感謝。
下面是正確顯示的例子。 正確顯示的例子
這里顯示的是有問題的一個。 有問題的例子
我的代碼在這里,這是一個非常簡單的Selenium呼叫
。import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ChromeOptions
from selenium.webdriver import ActionChains
import time
# url for scraping[/span]。
url = "https://www.wunderground.com/history/daily/us/ca/san-diego/KSAN/date/2021-2-1"/span>
# define properties of selenium webdriver
option = webdriver.ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'] )
option.add_experimental_option('useAutomationExtension', False)
option.add_experimental_option( "prefs",{'profile.managed_default_content_settings.javascript': 1}) #value 1可以啟用它,如果你設定為2則禁用它。
option.add_argument('-disable-gpu')
option.add_argument("--disable-link-features")
option.add_argument("-disable-blink-features=AutomationControlled")
option.add_argument("-enable-javascript")
driver = webdriver.Chrome(options=option)
driver.get(url)
time.sleep(5) # 等待網頁加載
uj5u.com熱心網友回復:
該頁面發送HTTP GET到。https://api.weather.com/v1/location/KSAN:9:US/observations/historical.json?apiKey=e1f10a1e78da46f5b10a1e78da96f525&units=e&startDate=20210201
這個呼叫的回應是一個巨大的JSON,包含你要找的資料。(下面是一個子集)
{
"metadata"/span>: {
"語言"。"en-US"。
"transaction_id": "1631220781880:2112944028",
"version": "1",
"location_id": "KSAN:9:US",
"單位": "e",
"expire_time_gmt": 1631224381,
"status_code": 200.
},
"observation": [
{
"key": "KSAN",
"class": "observation",
"expire_time_gmt"。1612176660,
"obs_id": "KSAN",
"obs_name": "San Diego",
"valid_time_gmt": 1612169460,
"day_ind": "N",
"temp": 59,
"wx_icon": 27,
"icon_extd": 2700,
"wx_phrase": "Mostly cloudy",
"pressure_tend": 2,
"pressure_desc": "Falling",
"dewPt": 45,
"heat_index": 59,
"rh": 60,
"pressure": 30.04,
"vis": 10,
"wc": 59,
"wdir": null,
"wdir_cardinal": "CALM",
"gust": null,
"wspd": 0,
"max_temp": null,
"min_temp": null,
" precip_total": null,
"precip_hrly": 0,
"snow_hrly": null,
"uv_desc": "Low",
"feels_like": 59,
"uv_index": 0,
"qualifier": null,
"qualifier_svrty": null,
"blunt_phrase": null,
"terse_phrase": null,
"clds": "BKN",
"water_temp": null,
"primary_wave_period": null,
"primary_wave_height": null,
"primary_swell_period": null,
"primary_swell_height": null,
"primary_swell_direction": null,
"secondary_swell_period": null,
"secondary_swell_height": null,
"secondary_swell_direction": null
},
{
"key": "KSAN",
"class": "observation",
"expire_time_gmt"。1612180260,
"obs_id": "KSAN",
"obs_name": "San Diego",
"valid_time_gmt": 1612173060,
"day_ind": "N",
"temp": 59,
"wx_icon": 27,
"icon_extd": 2700,
"wx_phrase": "Mostly cloudy",
"pressure_tend": null,
"pressure_desc": null,
"dewPt": 47,
"heat_index": 59,
"rh": 64,
"pressure": 30.04,
"vis": 10,
"wc": 59,
"wdir": 260,
"wdir_cardinal": "W",
"gust": null,
"wspd": 5,
"max_temp": null,
"min_temp": null,
" precip_total": null,
"precip_hrly": 0,
"snow_hrly": null,
"uv_desc": "Low",
"feels_like": 59,
"uv_index": 0,
"qualifier": null,
"qualifier_svrty": null,
"blunt_phrase": null,
"terse_phrase": null,
"clds": "BKN",
"water_temp": null,
"primary_wave_period": null,
"primary_wave_height": null,
"primary_swell_period": null,
"primary_swell_height": null,
"primary_swell_direction": null,
"secondary_swell_period": null,
"secondary_swell_height": null,
"secondary_swell_direction": null
} ]
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/318358.html
標籤:
