我是Python和Selenium的新手,希望能抓取一些資料點。我在三個方面遇到了困難:
以下是我目前的腳本:
urls = [
'https://developers.google.com/speed/pagespeed/insights/?url=https://www.crutchfield.com//&tab=mobile'。
'https://developers.google.com/speed/pagespeed/insights/?url=https://www.lastpass.com/&tab=mobile'.
]
driver = webdriver.Chrome(executable_path='/Library/Frameworks/Python.framework/Versions/3.9/bin/chromedriver')
for url in urls:
for page in range(0, 1):
驅動程式.get(url)
wait = WebDriverWait(driver, 120).until(EC.existence_of_element_located((By.CLASS_NAME, 'origin-field-data'))
df = pd. DataFrame(列 = ['Title', 'Core Web Vitals', 'FCP', 'FID', 'CLS', 'TTI', 'TBT', '總分'] )
company = driver.find_elements_by_class_name("udited-url__link")
資料 = []
for i in company:
data.append(i.get_attribute('href'/span>)
for x in data:
#Get URL name[/span]。
title = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[1]/div/div[2]/h1/a')
co_name = title.text
#Get Core Web Vitals text pass/fail
cwv = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div/div[1]/div[1]/span[2] '/span>)
core_web = cwv.text
#Get FCP
fcp = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div'/span>)
first_content = fcp.text
#Get FID
fid = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[3]/div[1]/div'/span>)
first_input = fid.text
#Get CLS。
cls = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[4]/div[1]/div'/span>)
layout_shift = cls.text
#Get TTI
tti = driver.find_element_by_xpath('//*[@id="interactive"]/div/div[1]')
time_interactive = tti.text
#Get TBT
tbt = driver.find_element_by_xpath('//*[@id="total-blocking-time"]/div/div[1]')
total_block = tbt.text
#Get Total Score[/span]。
total_score = driver.find_element_by_xpath('//*[@id="page-speed-insights"]/div[2]/div[3]/div[2]/div[1]/div[1]/div/div[1]/a/div[2]'/span>)
score = total_score.text
#添加所有列到資料框架。
df.loc[len(df)] = [co_name,core_web,first_content,first_input,layout_shift,time_interactive,total_block,score]
driver.close()
#df.to_csv('Double Page Speed Test 9-10.csv')/span>
print(df)
uj5u.com熱心網友回復:
問題1:我不明白如何正確地回圈瀏覽多個URL?
Ans : for url in urls:
Q2. 我不明白為什么腳本要在每個URL上迭代兩次
Ans : 因為你的腳本在每個URL上迭代了兩次
Ans : 因為你的腳本在每個URL上迭代了兩次。
答案:因為你有for page in range(0, 1):
Update 1:
我沒有用DF運行你的整個代碼。此外,有時任何一個頁面,不顯示number和href,但當我通常運行下面的代碼時,
driver = webdriver.Chrome(driver_path)
driver.maximum_window()
driver.implicitly_wait(50)
wait = WebDriverWait(driver, 20)
urls = [
'https://developers.google.com/speed/pagespeed/insights/?url=https://www.crutchfield.com//& tab=mobile'。
'https://developers.google.com/speed/pagespeed/insights/?url=https://www.lastpass.com/&tab=mobile'.
]
資料 = []
for url in urls:
driver.get(url)
wait = WebDriverWait(driver, 120).until(EC.existence_of_element_located((By.CLASS_NAME, 'origin-field-data'))
company = driver.find_elements_by_css_selector("h1.udited-url a")
for i in company:
data.append(i.get_attribute('href') )
print(data)
這個輸出 :
['https://www.crutchfield.com//', 'https://www.lastpass.com/', 'https://www.lastpass.com/']
如果我們使用的元素定位器代表第1頁的1個元素或第2頁的2個元素
轉載請註明出處,本文鏈接:https://www.uj5u.com/yidong/320269.html
標籤:
