我試圖刮title他們會進入每個鏈接并刮掉標題,但他們會告訴我錯誤
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.supplyvan.com/power-tools/cordless-powertools/cordless-drills.html'
driver =webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 20)
driver.get(url)
list_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h4.card-title"))).click()
title=driver.find_element_by_xpath('h1').text()
print(title)
uj5u.com熱心網友回復:
您需要更改選擇器以獲取h1標簽文本。
在這個片段中,刮板將訪問第一個鏈接并列印標題
# click the single link
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h4.card-title"))).click()
time.sleep(2)
# parse the h1 tag text
title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
print(title)
司機會訪問每個鏈接并刮掉標題
# parse all the links
page_links = [element.get_attribute('href') for element in
driver.find_elements(By.CSS_SELECTOR, "h4.card-title > a")]
# visit all the links
for link in page_links:
driver.get(link)
time.sleep(2)
title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
# parse title for all the links
print(title)
time.sleep(2)
包含兩個片段的完整代碼 -
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'https://www.supplyvan.com/power-tools/cordless-powertools/cordless-drills.html'
driver.get(URL)
time.sleep(3)
# opt #1 visit first link, print the title uncomment to see
# click the single link
# WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h4.card-title"))).click()
# time.sleep(2)
#
# # parse the h1 tag text
# title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
# print(title)
# opt #2 visit all links, print titles
# parse all the links
page_links = [element.get_attribute('href') for element in
driver.find_elements(By.CSS_SELECTOR, "h4.card-title > a")]
# visit all the links
for link in page_links:
driver.get(link)
time.sleep(2)
title = driver.find_element(By.CSS_SELECTOR, 'h1.productView-title').text
# parse title for all the links
print(title)
# driver.back()
time.sleep(2)
time.sleep(2)
driver.quit()
supplyvan_scraper()
所有訪問鏈接的輸出 -
Bosch Professional Cordless Drill, GSR-120-Li, 12V, Blue/Black
Makita LXT Cordless Drill Driver, DDF481RTJ, 18V, 13MM
Bosch Cordless Drill, GSR-1000, 10.8V
.....
轉載請註明出處,本文鏈接:https://www.uj5u.com/yidong/482296.html
