使用這個url 我想找到具有屬性的 div 標簽data-asin。當我//div[@data-asin]在 Chrome Inspect 模式下使用時,它會提供 21 個元素。但是,在嘗試通過 Selenium 以兩種方式獲取這些元素時,顯式等待和直接長度給出 0。我猜 Selenium 遠程瀏覽器無法將這些元素中的任何一個作為 DOM 樹。代碼如下
import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#reading from csv file url-s
def readCSV(path_csv):
df=pd.read_csv(path_csv)
return df
fileCSV=readCSV(r'C:\Users\Admin\Downloads\urls.csv')
length_of_column_urls=fileCSV['linkamazon'].last_valid_index()
def create_driver():
chrome_options = Options()
chrome_options.headless = True
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
webdriver_service = Service(r'C:\Users\Admin\Downloads\chromedriver107v\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
return driver
#going to urls 1-by-1
def goToUrl_Se(driver):
global counter
counter = 0
for i in range(0, length_of_column_urls 1):
xUrl = fileCSV.iloc[i, 1]
print(xUrl,i)
# going to url(amazn) via Selenium WebDriver
driver.get(xUrl)
parse_data()
counter =1
driver.quit()
#fetch-parse the data from url page
def parse_data():
global asin, title, bookform, priceNewProd,author
wait=WebDriverWait(driver,timeout=77)
try:
x_index=wait.until(EC.visibility_of_all_elements_located((By.TAG_NAME,'//div[@data-asin]')))###Attention here
print(len(x_index))
except:
y_index=driver.find_elements(By.TAG_NAME,'//div[@data-asin]')###Anf attention here
print(len(y_index))
driver=create_driver()
goToUrl_Se(driver)
uj5u.com熱心網友回復:
您必須提及 XPATH 而不是 TAG_NAME:
try:
x_index=wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[@data-asin]')))###Attention here
print(len(x_index))
except:
y_index=driver.find_elements(By.XPATH,'//div[@data-asin]')###Anf attention here
print(len(y_index))
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/526074.html
標籤:Python硒
