我正在嘗試從該網站上抓取每個產品頁面:https ://www.aliexpress.com/wholesale?catId=0&initiative_id=SB_20220315022920&SearchText=bluetooth earphones
特別是我想得到我在照片中提到的評論和客戶國家: 在此處輸入圖片描述
主要問題是我的代碼沒有檢查正確的元素,這就是我正在努力解決的問題。首先,我嘗試對這個產品進行抓取:https ://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e 43d0-a429-5f6cddf3d6ad-8&pdp_ext_f={"sku_id":"12000027213624098"}&pdp_pi=-1;40.81;-1;-1@salePrice;MAD;search-mainSearch
這是我的代碼:
from selenium import webdriver
from selenium.webdriver.common.by import By
from lxml import html
import cssselect
from time import sleep
from itertools import zip_longest
import csv
driver = webdriver.Edge(executable_path=r"C:/Users/OUISSAL/Desktop/wscraping/XEW/scraping/codes/msedgedriver")
url = "https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f={"sku_id":"12000027213624098"}&pdp_pi=-1;40.81;-1;-1@salePrice;MAD;search-mainSearch"
with open ("data.csv", "w", encoding="utf-8") as csvfile:
wr = csv.writer(csvfile)
wr.writerow(["Comment","Custumer country"])
driver.get(url)
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
review_buttom = driver.find_element_by_xpath('//li[@ae_button_type="tab_feedback"]')
review_buttom.click()
html_source = driver.find_element_by_xpath('//div[@id="transction-feedback"]')
tree = html.fromstring(html_source)
#tree = html.fromstring(driver.page_source)
for rvw in tree.xpath('//div[@]'):
country = rvw.xpath('//div[@]//b/text()')
if country:
country = country[0]
else:
country = ''
print('country:', country)
comment = rvw.xpath('//dt[@id="buyer-feedback"]//span/text()')
if comment:
comment = comment[0]
else:
comment = ''
print('comment:', comment)
driver.close()
謝謝 !!
uj5u.com熱心網友回復:
發生什么了?
有一個主要問題,您要查找的反饋位于 中iframe,因此您不會通過直接呼叫元素來獲取資訊。
怎么修?
滾動到保持iframe導航到其源并與其分頁互動以獲取所有反饋的元素視圖。
例子
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f={"sku_id":"12000027213624098"}&pdp_pi=-1;40.81;-1;-1@salePrice;MAD;search-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
try:
comment = e.find_element(By.CSS_SELECTOR, '.buyer-feedback span').text
except:
comment = None
data.append({
'country':country,
'comment':comment
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)
轉載請註明出處,本文鏈接:https://www.uj5u.com/qiye/444113.html
上一篇:硒回圈通過單選按鈕python
