主要功能:
1、爬取慕課網課程評價
2、將課程評價保存到Excel
使用:python3.6 selenium Chrome瀏覽器
python包:selenium、BeautifulSoup、pandas、
部分代碼參考:https://blog.csdn.net/weixin_43330908/article/details/82959940
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
if __name__ == '__main__':
writer = pd.ExcelWriter("./mooc課程評論.xls") #設定保存Excel 路徑
driver = webdriver.Chrome(executable_path=".\Chrome\chromedriver.exe") #設定chrome驅動
url = 'https://www.icourse163.org/course/BIT-268001' #設定要爬取的課程鏈接
#["用戶","內容","時間","點贊數","第幾次課程"] 待爬取的內容
driver.get(url)
cont = driver.page_source
soup = BeautifulSoup(cont, 'html.parser')
ele = driver.find_element_by_id("review-tag-button") # 點擊 課程評價
ele.click()
xyy = driver.find_element_by_class_name("ux-pager_btn__next") # 翻頁功能
connt = driver.page_source
soup = BeautifulSoup(connt, 'html.parser') #得到網頁源代碼
all_table = [] #保存所需資料
all_table.append(["用戶","內容","時間","點贊數","第幾次課程"])
for i in range(1374): # 共1373頁
xyy.click()
connt = driver.page_source
soup = BeautifulSoup(connt, 'html.parser')
content = soup.find_all('div', {
'class': 'ux-mooc-comment-course-comment_comment-list_item_body'}) # 全部評論
for ctt in content:
#獲取用戶名
user_name = ctt.find("a",{"class":"primary-link ux-mooc-comment-course-comment_comment-list_item_body_user-info_name"})
user_name = user_name.text
print(user_name)
#發布時間
publish_time = ctt.find('div', {'class': 'ux-mooc-comment-course-comment_comment-list_item_body_comment-info_time'})
publish_time = publish_time.text
publish_time = publish_time[4:]
print(publish_time)
#第幾次課程
course_nums = ctt.find('div', {'class': 'ux-mooc-comment-course-comment_comment-list_item_body_comment-info_term-sign'})
course_nums = course_nums.text
course_nums = course_nums.replace(" ","")
course_nums = course_nums.replace("\n", "")
print(course_nums)
scontent = []
aspan = ctt.find_all('span')
for span in aspan:
scontent.append(span.string)
#點贊數
like = scontent[5]
#課程內容
scontent = scontent[1]
print(scontent)
all_table.append([user_name,scontent,publish_time,like,course_nums])
#保存到Excel
all_table = pd.DataFrame(all_table)
all_table.to_excel(writer, index=False)
writer.save()
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/234895.html
標籤:python
