請找到一個匹配的螢屏截圖。
下面的代碼只列印了截圖中可見的前4-5行。 它沒有向下滾動和檢查元素,而是在列印空白處。
同樣的代碼在沒有寫在主函式外的代碼的情況下也能成功地運行。
def close_up(driver, actions)。
time.sleep(1)
actions.move_to_element(wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@data-dismiss='modal']"/span>)))))
button = driver.find_element_by_xpath("//button[@data-dismiss='modal']"/span>)
driver.execute_script("arguments[0].click();", button)
time.sleep(1)
def check_model_winodows(driver, actions)。
try:
if len(driver.find_elements(By.XPATH, "(//button[@data-dismiss='modal'])[1]"/span>) > 0:
# print("Pop up is visible").
close_up(driver, actions)
else:
print(""/span>)
except:
# print("Something went wrong").
pass: pass
return driver, actions
def main(hashtag)。
options = webdriver.ChromeOptions()
options.add_argument("-disable-infobars")
options.add_argument("-disable-notifications")
options.add_argument("-start-maximized")
options.add_argument("-disable-extensions")
options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2})
options.add_argument('-window-size=1920,1080')
options.add_experimental_option("prefs", {"profile.default_content_settings.cookies": 2})
driver = webdriver.Chrome(executable_path='/home/tukaram/chromedriver', options=options)
# driver = webdriver.Chrome(driver_path).
driver.maximum_window()
driver.implicitly_wait(50)
driver.get("https://www.trackmyhashtag.com/")
wait = WebDriverWait(driver, 10)
actions = ActionChains(driver)
wait.until(EC.visibility_of_element_located((By.ID, "search_keyword")).send_keys(hashtag, Keys.RETURN)
check_model_winodows(driver, actions)
wait = WebDriverWait(driver, 10)
time.sleep(3)
button = driver.find_element_by_css_selector("a[onclick*='preview-tweets']")
driver.execute_script("arguments[0].click();", button)
# wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[onclick*='preview-tweets']")) .click()
check_model_winodows(driver, actions)
total_number_of_tweet_row = len(driver.find_elements(By.XPATH, "/tbody/tr")
# print(total_number_of_tweet_row)
rank=1
page_number = 2
total_number_of_pages =5
myhashtag = {}
for a, idx in enumerate(range(total_number_of_pages))。
print("idx>>>"/span>, idx)
j =0
for i in range(total_number_of_tweet_row)。
check_model_winodows(driver, actions)
elems = driver.find_elements(By.XPATH, "//tbody/tr"/span>)
time.sleep(1)
# final_ele = elems[j].find_element_by_xpath(".//td[2]")。
# print("code worked till here")
name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']") .text
print("name>", name)
myhashtag['user_name'] = name
userid = elems[j].find_element_by_tag_name("td"/span>).text
userid = userid.partition('@') [2]
userid = '@' userid
print("userid>"/span>, userid)
myhashtag['user_screen_name'] = userid
content = elems[j].find_element_by_xpath("./td[2]").text
print("content"/span>, content)
myhashtag['content'] = content
date = elems[j].find_element_by_xpath(".//td[3]").text
print("1>>>", date)
date = str(date).replace("
", " ")
print("2>>", date)
date = datetime.strptime(date, '%d %b %Y %H:%M:%S %p')
print("3>>"/span>, date)
date = date.strftime('%Y-%m-%dT%H:%M:%SZ')
print("date"/span>, date)
myhashtag['articleDate'] = date
engm = elems[j].find_element_by_xpath("./td[4]").text
print("engagement"/span>, engm)
myhashtag['engagement'] = engm
impressions = elems[j].find_element_by_xpath(".//td[6]"/span>).text
print("impressions"/span>, impressions)
myhashtag['impressions'] = impressions
myhashtag['rank'] = rank
rank = rank 1] = rank
j = j 1] = rank = rank 1
print(myhashtag)
check_model_winodows(driver, actions)
driver.execute_script(
"var scrollingElement = (document.scrollingElement || document.body); scrollingElement.scrollTop = "
"rollingElement.scrollHeight;")
wait.until(EC.element_to_be_clickable((By.XPATH, f"/a[text()='{page_number}']).click()
page_number = page_number 1
print("Page numberrrr", page_number)
if page_number == 7:
break: break.
driver.quit()
return driver, actions
if __name__ == '__main__'/span>:
for x in add_data.words:
main(x)
add_data.py ->
words = ['India','@pakistan'] #words to crawl]。
uj5u.com熱心網友回復:
可能你需要滾動到每一行來提取細節。我在代碼中加入了driver.execute_script("arguments[0].scrollIntoView(true);",elems[j]),它提取了所有的細節。試試這個吧。
for a, idx in enumerate(range(total_number_of_pages))。
print("idx>>>"/span>, idx)
j =0
for i in range(total_number_of_tweet_row)。
check_model_winodows(driver, actions)
elems = driver.find_elements(By.XPATH, "//tbody/tr"/span>)
time.sleep(1)
# final_ele = elems[j].find_element_by_xpath(".//td[2]")。
# print("code worked till here")
driver.execute_script("arguments[0].scrollIntoView(true);", elems[j]) # Line to be addded.
name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/318372.html
標籤:
上一篇:如何驗證是否顯示了烤面包資訊?

