當我運行我的代碼時,他們會向我提供這些錯誤 selenium.common.exceptions.InvalidArgumentException: Message: invalid argument: 'URL' must be a string,當他們去抓取第二個標題時,他們會抓取第一個標題,他們會向我提供錯誤,即您的 URL 是一個字串,這些是頁面鏈接https://www.google。 com/maps/search/uk dentist/@31.5688259,74.2388013,12z/data=!3m1!4b1
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'https://www.google.com/maps/search/dentist uk/@31.5688259,74.2388013,12z/data=!3m1!4b1'
driver.get(URL)
time.sleep(3)
page_links = [element.get_attribute('href') for element in
driver.find_elements(By.XPATH, "//div[@class='Nv2PK Q2HXcd THOPZb']//a")]
# visit all the links
for link in page_links:
driver.get(link)
time.sleep(2)
title = driver.find_element(By.XPATH, "//h1[@class='DUwDvf fontHeadlineLarge']//span").text
# parse title for all the links
print(title)
# driver.back()
time.sleep(2)
time.sleep(2)
driver.quit()
supplyvan_scraper()
uj5u.com熱心網友回復:
就我而言,除了一切正常之外,它在標題元素選擇方面有點麻煩。
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
data=[]
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'https://www.google.com/maps/search/dentist uk/@31.5688259,74.2388013,12z/data=!3m1!4b1'
driver.get(URL)
time.sleep(3)
page_links = [element.get_attribute('href') for element in driver.find_elements(By.XPATH, '//*[@]')]
# visit all the links
for link in page_links:
print(link)
driver.get(link)
time.sleep(2)
title = driver.find_element(By.XPATH, '//h1[@]/span[1]').text
data.append(title)
# parse title for all the links
#print(title)
# driver.back()
time.sleep(2)
# time.sleep(2)
# driver.quit()
supplyvan_scraper()
df = pd.DataFrame(data,columns=['title'])
print(df)
輸出:
title
0 YOR Dental at MediaCityUK
1 Blossom Dental Care & Implant Studio
2 Blackbrook Dental Practice
3 Greenwich Dental Practice
4 NHS Dentist
5 London Dental Centre
6 New Cross Dental Practice
7 Dental Works
8 Huntingdon Dental Care
9 Advance Dental Care - Private & NHS | Invisali...
轉載請註明出處,本文鏈接:https://www.uj5u.com/shujuku/486862.html
