url = 'http://www.mtv.de/charts/c6mc86/single-top-100?expanded=true'
chromedriver = Service("/usr/local/bin/chromedriver")
op = webdriver.ChromeOptions()
browser = webdriver.Chrome(service=chromedriver, options=op)
browser.get(url)
timeout = 60
browser.implicitly_wait(20)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight,)")
time.sleep(5)
try:
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, '/html/body/div[1]/main/div/section/div/div/div/object')))
print('========================')
except TimeoutException:
browser.quit()
items = browser.switch_to.frame(browser.find_element(By.TAG_NAME,'object'))
print(items)
itembox = items.find_elements(By.CLASS_NAME, 'charts-marslnet')
# print(itembox)
for item in itembox:
print(item.text)
我一直在嘗試從該網站上洗掉歌曲的歌曲名稱、作者和網址,但無法訪問#document 部分下標簽內的 html。我無法弄清楚為什么我無法訪問它。有關我的代碼可能存在什么問題或應該如何訪問#document 部分中的此 html 的任何見解都將非常有幫助。[帶有#document的標簽內的HTML(螢屏截圖2 ] [1]
uj5u.com熱心網友回復:
您可以從直接 url 獲取它:
import requests
from bs4 import BeautifulSoup
url = 'https://mtv.marsl.net/demo/showdbcharts.php?c=4'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
acts = soup.find_all('div', {'class':'cmn-act'})
for each in acts:
title = each.find_next('div', {'class':'cmn-title'}).text.strip()
artist = each.find_next('div', {'class':'cmn-artist'}).text.strip()
link = each.find_next('a', href=True)['href']
print(f'{title}\n{artist}\n{link}\n\n')
輸出:
abcdefu
Gayle
https://www.mtv.de/musikvideos/r9d9sl/abcdefu
Wenn ich will
Gzuz & Bonez MC
https://www.mtv.de/musikvideos/7evkst/10von10
10von10
Pajel
https://www.mtv.de/musikvideos/7evkst/10von10
Shivers
Ed Sheeran
https://www.mtv.de/musikvideos/miq9lq/shivers
Heat Waves
Glass Animals
https://www.mtv.de/musikvideos/l9rv5d/heat-waves
...
轉載請註明出處,本文鏈接:https://www.uj5u.com/shujuku/418990.html
標籤:
