從資料框中讀取網路鏈接會引發“過時的元素參考：元素未附加到頁面檔案”錯誤-有解無憂

我得到了一個資料框，其中包含指向兩家餐廳的谷歌評論的鏈接。我想將兩家餐廳的所有評論（一個接一個）加載到瀏覽器中，然后將它們保存到一個新的資料框中。我撰寫了一個腳本來讀取所有評論并將其加載到瀏覽器中，如下所示：

from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time

link_df =   Link
0   https://www.google.com/search?q=restaurant in christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q:1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant in christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
1   https://www.google.com/search?q=restaurant in christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q:1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant in christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]

i = 0
driver = webdriver.Chrome()
for index, i in link_df.iterrows():
    base_url = i['Link']   #link_df['Link'][i]
    
    driver.get(base_url)
    WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
    print('Restaurant number is ',index)
    
    title = driver.find_element_by_xpath("//div[@class='P5Bobd']").text
    address = driver.find_element_by_xpath("//div[@class='T6pBCe']").text
    overall_rating = driver.find_element_by_xpath("//div[@class='review-score-container']//span[@class='Aq14fc']").text
    
    total_reviews_text =driver.find_element_by_xpath("//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
    num_reviews = int (total_reviews_text.split()[0])
    all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
    time.sleep(2)
    total_reviews = len(all_reviews)
    
    while total_reviews < num_reviews:
        driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
        WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
        time.sleep(5)
        all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
        print(total_reviews)
        total_reviews  =1
    reviews_info = driver.find_elements_by_xpath("//div[@class='jxjCjc']")
    review_information = pd.DataFrame(columns=["Restaurant title","Restaurant rating","Total reviews","Reviewer Name","Rating", "Review"])
    name= ''
    rating = ''
    text = ''
    
    
    for index,review_info in enumerate(reviews_info):
        name = review_info.find_element_by_xpath("./div/div/a").text
        rating = review_info.find_element_by_xpath(".//div[@class='PuaHbe']//g-review-stars//span").get_attribute('aria-label')
        text = review_info.find_element_by_xpath(".//div[@class='Jtu6Td']//span").text
        review_information.at[len(review_information)] = [title,overall_rating,num_reviews,name,rating,text]
    
    filename = 'Google_reviews'   ' '  pd.to_datetime("now").strftime("%Y_%m_%d") '.csv'
    files_present = glob.glob(filename)
    if files_present:
        review_information.to_csv(filename,index=False,mode='a',header=False)
    else:
        review_information.to_csv(filename,index=False)
    
    driver.get('https:ww.google.com')
    time.sleep(3)

問題是腳本在到達下一行時會引發錯誤。

driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])

它拋出以下錯誤：

StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
  (Session info: chrome=95.0.4638.69)

當我嘗試相同的程式而不將 google 鏈接存盤在資料幀中時（即沒有for回圈，而不是base_url = i['Link']，我寫了 base_url = google review link）它作業正常。

我不確定我在哪里犯了錯誤。任何解決問題的建議或幫助將不勝感激？

uj5u.com熱心網友回復：

編輯

您將驅動程式的創建放在 for 回圈之外
當第一個彈出視窗總是在前面時，您無法使用 gps 資料啟動新 url，如果啟動它，它會留在后門中，更簡單的方法是啟動一個沒有 gps 資料的新 url -> https:ww.google.com 和等待 12 月 3 日之前跟隨您的回圈：
您的計數不好，我已經更改了您的選擇器并更改了總數并在評論中設定了一些行

from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.options import Options
import time

link_df =  ["https://www.google.com/search?q=restaurant in christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q:1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant in christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
            "https://www.google.com/search?q=restaurant in christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q:1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant in christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
           ]
i = 0
binary = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
options = Options()
options.binary = binary
driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")

# i have to launch one time to accept the cookies manually 
#by setting a breakpoint after, but you dont have that i think
#driver.get(link_df[0])  

print ("Headless Firefox Initialized")


print(link_df)
for url in link_df:
    base_url = url    # i['Link']  # link_df['Link'][i]
    print(base_url)
    driver.get(base_url)
    WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Avis les plus récents']]"))).click()

    title = driver.find_element_by_xpath("//div[@class='P5Bobd']").text
    address = driver.find_element_by_xpath("//div[@class='T6pBCe']").text
    overall_rating = driver.find_element_by_xpath("//div[@class='review-score-container']//span[@class='Aq14fc']").text

    total_reviews_text = driver.find_element_by_xpath(
        "//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
    num_reviews = int(total_reviews_text.split()[0])
    all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
    # time.sleep(2)
    total_reviews = 0

    while total_reviews < num_reviews:
        driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
        WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
        
        all_reviews = WebDriverWait(driver, 5).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
        total_reviews = len(all_reviews)
        print(total_reviews, len(all_reviews))

    driver.get('https:ww.google.com') # or driver.close() if no bugs
    time.sleep(3)

driver.close()
driver.quit()

似乎 chrome 的解決方案需要一些修復：

org.openqa.selenium.StaleElementReferenceException：過時的元素參考：元素未附加到頁面檔案

字面意思是about，參考的元素已經過期，不再附加到當前頁面。通常，這是因為頁面被重繪或跳過，解決方法是，重用 findElement 或 findElements 方法來定位元素。

所以它似乎對于 chrome 有重繪問題，所以我建議在滾動之前加載記錄數，以獲得 DOM 專案的新副本，并且我必須在 while 回圈結束時添加等待 1 秒

from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
#from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
import time

link_df =  [
    "https://www.google.com/search?q=restaurant in christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q:1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant in christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
    "https://www.google.com/search?q=restaurant in christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q:1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant in christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]

i = 0
binaryfirefox = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
binarychrome = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'


options = Options()

#cap = DesiredCapabilities().CHROME
#cap["marionette"] = True
#cap = DesiredCapabilities().FIREFOX
#options.binary = binaryfirefox
#driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")

options.binary_location  = binarychrome
driver = webdriver.Chrome(options=options, executable_path="E:\\Téléchargement\\chromedriver.exe" )

# same reason tha Firefox i have to load one time
# an url to accept manually the cookies
#driver.get(link_df[0])   



print(link_df)
for url in link_df:
    base_url = url    # i['Link']  # link_df['Link'][i]
    print(base_url)
    driver.get(base_url)
    WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Newest']]"))).click()

    title = driver.find_element_by_xpath("//div[@class='P5Bobd']").text
    address = driver.find_element_by_xpath("//div[@class='T6pBCe']").text
    overall_rating = driver.find_element_by_xpath("//div[@class='review-score-container']//span[@class='Aq14fc']").text

    total_reviews_text = driver.find_element_by_xpath(
        "//div[@class='review-score-container']//div//div//span//span[@class='z5jxId']").text
    num_reviews = int(total_reviews_text.split()[0])
    all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
    # time.sleep(2)
    total_reviews = 0

    while total_reviews < num_reviews:
        #reload to avoid exception, or trap scroll with try/except but more expznsive
        all_reviews = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))

        driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])

        total_reviews = len(all_reviews)
        print(total_reviews, len(all_reviews))
        time.sleep(1)

    driver.get('https:ww.google.com') # or driver.close() if no bugs
    time.sleep(3)

driver.close()
driver.quit()

轉載請註明出處，本文鏈接：https://www.uj5u.com/shujuku/352708.html

標籤：Python 蟒蛇-3.x 硒硒网络驱动程序 selenium-chromedriver

上一篇：webelementselenium-如何使用xpath找到元素并使用onclick鏈接

下一篇：Python。燒瓶。點擊后獲取串列