Python網路抓取“所有陣列必須具有相同的長度”熊貓錯誤-有解無憂

我正在嘗試刮udemy，似乎我獲得的課程標題比課程價格多，但我不知道為什么會發生這種情況，我知道一些避免pandas錯誤的方法，但這不是解決方案，解決方案必須是刮完全相同數量的名稱和相同數量的價格：

from selenium import webdriver
import pandas as pd
import time 
import selenium


#I put all this options to avoid udemy detect selnium as a bot

from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')

website = "https://www.udemy.com/courses/search/?src=ukw&q=python"

s = Service('C:\\Users\\Albin Rodriguez\\Documents\\Aprendiendo\\web_scraping\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get(website)

time.sleep(5)                    

titles = driver.find_elements_by_xpath('//h3[@]') 
prices = driver.find_elements_by_xpath('//div[@data-purpose="price-text-container"]//span/span') 

courses = []
prices_courses= []
for title in titles:
    courses.append(title.text)
    
for price in prices:
    prices_courses.append(price.text)

input()  #I used this to check if all the prices are shown.

#pandas
df = pd.DataFrame({'cursos': courses, 'precios': prices_courses})
df.to_excel("precio_cursos2.xlsx", index=False)

uj5u.com熱心網友回復：

現在運行代碼

from selenium import webdriver
import pandas as pd
import time 
import selenium


#I put all this options to avoid udemy detect selnium as a bot

from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')

website = "https://www.udemy.com/courses/search/?src=ukw&q=python"

s = Service('C:\\Users\\Albin Rodriguez\\Documents\\Aprendiendo\\web_scraping\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get(website)

time.sleep(5)                    
data=[]
titles = [x.text for x in driver.find_elements_by_xpath('//h3[@]/a')]
prices = [x.text for x in driver.find_elements_by_xpath('//div[@data-purpose="price-text-container"]//span/span')[0:31]]

data.extend([titles,prices])

#input()  #I used this to check if all the prices are shown.

#pandas
df = pd.DataFrame(data=list(zip(titles,prices)),columns=['cursos','precios'])
df.to_excel("precio_cursos2.xlsx", index=False)

轉載請註明出處，本文鏈接：https://www.uj5u.com/shujuku/449241.html

標籤：Python 熊猫硒网页抓取

上一篇：BeautifulSoup/從腳本標簽中獲取內容？

下一篇：當URL不變時，如何使用過濾器從網站上抓取資料？