目前自己在試著寫微博爬蟲,用request 的方法找到了想要的資料。但是不會匯出到CSV 或者excel,求大神解答。
詳情見代碼:
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 29 15:41:14 2020
@author: yzhang
"""
import requests,time,random,json,re
from urllib.parse import urlencode
#import pymongo
from requests.exceptions import RequestException
import csv
import codecs
import xlwt
def get_pages(since_id):
data = {
'type': 'uid',
'value':'1702771281',
'containerid':'1076031702771281',
'since_id': since_id
}
base_url = 'https://m.weibo.cn/api/container/getIndex?'
url = base_url + urlencode(data)
result = requests.get(url,headers = headers)
try:
if result.status_code==200:
response = requests.get(url)
res_dict = json.loads(response.text)
cards = res_dict['data']['cards']
for card in cards:
text = card['mblog']['raw_text']
like = card['mblog']['attitudes_count']
comment = card['mblog']['comments_count']
repost = card['mblog']['reposts_count']
print(text)
print(comment)
print(repost)
print(like)
print('-'*50)
write
#print(result.json()) ###網頁的回傳型別實際上是 str 型別,但是它很特殊,是 JSON 格式的 所以,如果想直接
### 決議回傳結果,得到一個字典格式的話,可以直接呼叫json()方
# CREAT TABLE weibo_test(id int primary key auto_increment,weibo_text text) DEFAULT CHARSET = 'utf8'
except requests.ConnectionError as e:
print('Error',e.args)
min_since_id = ''
def get_since_id():
global min_since_id
topic_url ='https://m.weibo.cn/api/container/getIndex?type=uid&value=https://bbs.csdn.net/topics/1702771281&containerid=1076031702771281'
topic_url = topic_url+'&since_id='+str(min_since_id)
##print(json)
result = requests.get(topic_url,headers = headers)
json = result.json()
#print(json)
items = json.get('data').get('cardlistInfo')
#print(items)
min_since_id=items['since_id' ]
return min_since_id
def main():
for i in range(10):
print('第{}頁'.format(i))
print(get_since_id())
get_pages(get_since_id())
#def save_to_mongodb(dict):
#
# client = pymongo.MongoClient()
# db = client['weibo']
# collection = db['weibo']
# if collection.insert_one(dict): #回傳ID值
# print('寫入資料成功!')
# #print(result.inserted_ids)###回傳插入資料的id串列
if __name__ == '__main__':
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
main()
uj5u.com熱心網友回復:
加一個寫入 csv 的函式
def save_csv(csv_file,data_msg):
# 保存
f = open(csv_file, 'a', encoding='utf-8')
f.write("{}\n".format(data_msg))
f.close()
在你的 get_pages() 里, 改下for card in cards: 里面的內容
把資料存到一個串列里。 for 回圈體最后一句,轉換串列為字串。呼叫 save_csv 寫入 csv 檔案。
uj5u.com熱心網友回復:
意思都明白,就是不會寫轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/18218.html
上一篇:python 中 用 baidu - aip 識別影像 ,能用網路圖片地址鏈接嗎? 識別的圖片能判斷臉在圖片中的比例嗎?或者其他判斷美女的方法?
下一篇:wxpython庫,我用TextCtrl獲取輸入資訊,然后用GetVauel()得到字串,但是列印出來是記憶體地址,該怎么修改?
