當python遇上echarts（一）了解基本知識

實戰促練

通過一個小專案，繪制折線圖，餅圖，柱狀圖，圓弧餅圖，詞云圖等

一、示例

import requests
from bs4 import BeautifulSoup
import json,re
from pyecharts import options as opts
from pyecharts.charts import Bar,Line


url_search = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp?'
headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
    'referer':'https://y.qq.com/',
    'origin':'https://y.qq.com'
}

params_str = '''
    ct: 24
    qqmusic_ver: 1298
    new_json: 1
    remoteplace: txt.yqq.song
    searchid: 65264922900538759
    t: 0
    aggr: 1
    cr: 1
    catZhida: 1
    lossless: 0
    flag_qc: 0
    p: 1
    n: 10
    w: 林俊杰
    g_tk_new_20200303: 5381
    g_tk: 5381
    loginUin: 0
    hostUin: 0
    format: json
    inCharset: utf8
    outCharset: utf-8
    notice: 0
    platform: yqq.json
    needNewCode: 0
'''

def switch_to_dict(params_str):
    params = {}
    p = params_str.replace('\n',',').replace(' ','')
    p_l = p.split(',')
    for i in p_l:
        if i:
            i = i.split(':')
            params[i[0]] = i[1]
    return params
params = switch_to_dict(params_str)

singer_list = ['周杰倫','林俊杰','王力宏','張杰','汪蘇瀧','許嵩','薛之謙','陳奕迅','李榮浩','陶喆']
song_list =[]
album_list = []
mv_list = []

def crawl():
    for singer in singer_list:
        params['w'] = singer
        #搜索頁面的response
        res_serach = requests.get(url_search,headers = headers,params = params)
        #將json資料轉化為dict資料
        json_search = res_serach.json()
        #單曲數量
        song_num = json_search['data']['zhida']['zhida_singer']['songNum']
        album_num = json_search['data']['zhida']['zhida_singer']['albumNum']
        mv_num = json_search['data']['zhida']['zhida_singer']['mvNum']
        song_list.append(song_num)
        album_list.append(album_num)
        mv_list.append(mv_num)
    return song_list,album_list,mv_list
song_list,album_list,mv_list = crawl()
print(song_list,album_list,mv_list)


def set_bar():
    bar = Bar(init_opts=opts.InitOpts(width = '800px',height = '800px',page_title='qq音樂柱狀圖'))
    bar.add_xaxis(xaxis_data = singer_list)
    bar.add_yaxis(series_name = '單曲',y_axis = song_list)
    bar.add_yaxis(series_name = 'MV',y_axis = mv_list)
    bar.set_global_opts(
        title_opts=opts.TitleOpts(title = '我喜歡的九位歌手',subtitle='數錯了,是十位',pos_left='20%'),
        #添加坐標軸名稱，位置以及大小
        xaxis_opts = opts.AxisOpts(name = '歌手',name_location='center',name_gap=25,name_textstyle_opts=opts.TextStyleOpts(font_size = 20)),
        yaxis_opts = opts.AxisOpts(name = '單曲數量/首')
        )
    #增加一個y軸
    bar.extend_axis(
        yaxis = opts.AxisOpts(
            name = '專輯數量/個',
            type_ = 'value',
            min_ = -20,
            max_ = 110,
            interval=5
        )
    )
    return bar

def set_line():
    line = Line()
    line.add_xaxis(xaxis_data = singer_list)
    line.add_yaxis(
        series_name = '專輯',
        y_axis = album_list,
        yaxis_index = 1
        )
    line.set_global_opts(legend_opts=opts.LegendOpts(legend_icon='pin'))
    return line

bar = set_bar()
line = set_line()
bar.overlap(line).render('十位歌手.html')

在這里插入圖片描述
當然，剛開始繪圖，折線圖和柱狀圖的混合有點難度，下面通過這些資料，把基本圖表都過一遍

song_list = [972, 685, 553, 852, 472, 257, 243, 1307, 320, 438]
album_list = [35, 52, 58, 76, 71, 37, 18, 108, 13, 23]
mv_list = [1344, 745, 603, 497, 267, 266, 368, 1207, 220, 244]
singer_list = ['周杰倫','林俊杰','王力宏','張杰','汪蘇瀧','許嵩','薛之謙','陳奕迅','李榮浩','陶喆']

二、柱狀圖

from pyecharts import options as opts
from pyecharts.charts import Bar
#柱狀圖的資料格式：x軸和y軸都是串列資料
song_list = [972, 685, 553, 852, 472, 257, 243, 1307, 320, 438]
album_list = [35, 52, 58, 76, 71, 37, 18, 108, 13, 23]
mv_list = [1344, 745, 603, 497, 267, 266, 368, 1207, 220, 244]
singer_list = ['周杰倫','林俊杰','王力宏','張杰','汪蘇瀧','許嵩','薛之謙','陳奕迅','李榮浩','陶喆']

def set_bar():
    #設定初始項，圖表高width,寬height，以及網頁的名稱
    bar = Bar(init_opts=opts.InitOpts(width = '800px',height = '600px',page_title='qq音樂柱狀圖'))
    #添加x軸資料
    bar.add_xaxis(xaxis_data = singer_list)
    #添加y軸資料，加上series_name，表示圖例
    bar.add_yaxis(series_name = '單曲',y_axis = song_list)
    bar.add_yaxis(series_name = 'MV',y_axis = mv_list)
    bar.add_yaxis(series_name = '專輯',y_axis = album_list)
    #設定全域項
    bar.set_global_opts(
        #設定圖表主標題，副標題和標題位置
        title_opts=opts.TitleOpts(title = '我喜歡的九位歌手',subtitle='數錯了,是十位',pos_left='20%'),
        #添加坐標軸名稱，位置以及大小，name_gap表示名稱與x軸距離，font_size是字體大小
        xaxis_opts = opts.AxisOpts(name = '歌手',name_location='center',name_gap=25,name_textstyle_opts=opts.TextStyleOpts(font_size = 20)),
        yaxis_opts = opts.AxisOpts(name = '單曲數量/首')
        )
    return bar

bar = set_bar()
#生成html檔案
bar.render('柱狀圖.html')

在這里插入圖片描述

三、折線圖

from pyecharts import options as opts
from pyecharts.charts import Line

#折線圖的資料格式：x軸和y軸都是串列資料
song_list = [972, 685, 553, 852, 472, 257, 243, 1307, 320, 438]
album_list = [35, 52, 58, 76, 71, 37, 18, 108, 13, 23]
mv_list = [1344, 745, 603, 497, 267, 266, 368, 1207, 220, 244]
singer_list = ['周杰倫','林俊杰','王力宏','張杰','汪蘇瀧','許嵩','薛之謙','陳奕迅','李榮浩','陶喆']

def set_line():
    line = Line()
    line.add_xaxis(xaxis_data = singer_list)
    #添加y軸資料，加上series_name，表示圖例
    line.add_yaxis(series_name = '單曲',y_axis = song_list)
    line.add_yaxis(series_name = 'MV',y_axis = mv_list)
    line.add_yaxis(series_name = '專輯',y_axis = album_list)
    line.set_global_opts(
        #設定圖例形狀
        legend_opts=opts.LegendOpts(legend_icon='pin'),
        #設定圖表主標題，副標題和標題位置
        title_opts=opts.TitleOpts(title = '我喜歡的九位歌手',subtitle='數錯了,是十位',pos_left='20%'),
        #添加坐標軸名稱，位置以及大小，name_gap表示名稱與x軸距離，font_size是字體大小
        xaxis_opts = opts.AxisOpts(name = '歌手',name_location='center',name_gap=25,name_textstyle_opts=opts.TextStyleOpts(font_size = 20)),
        yaxis_opts = opts.AxisOpts(name = '單曲數量/首')
        )
    return line

line = set_line()
line.render('折線圖.html')

在這里插入圖片描述

四、餅圖 Pie

from pyecharts import options as opts
#匯入Pie類
from pyecharts.charts import Pie

#餅圖的資料型別，為串列的嵌套：[[key1, value1], [key2, value2]]
song_list = [972, 685, 553, 852, 472, 257, 243, 1307, 320, 438]
singer_list = ['周杰倫','林俊杰','王力宏','張杰','汪蘇瀧','許嵩','薛之謙','陳奕迅','李榮浩','陶喆']
#使用zip函式后[(972, '周杰倫'), (685, '林俊杰'), (553, '王力宏'), (852, '張杰'), (472, '汪蘇瀧'), (257, '許嵩'), (243, '薛之謙'), (1307, '陳奕迅'), (320, '李榮浩'), (438, '陶喆')]
#再將其中的元組轉換成串列
data_pair =  [list(i) for i in zip(singer_list,song_list)]

def set_pie():
    pie = Pie()
    pie.add(
        series_name = '',
        data_pair = data_pair,
        color = 'red',
        #設定圖表的標簽(指示圖表區域),formatter是設定標簽內容格式，在餅圖中：{a}（系列名稱），{b}（資料項名稱），{c}（數值）, {d}（百分比）
        label_opts = opts.LabelOpts(is_show=True,formatter='{b}:{c} \n ({d}%)'),
        # 是否展示成南丁格爾圖，通過半徑區分資料大小，有'radius'和'area'兩種模式，
        # radius：扇區圓心角展現資料的百分比，半徑展現資料的大小
        # area：所有扇區圓心角相同，僅通過半徑展現資料大小
        rosetype = 'radius',
        # 餅圖的半徑，陣列的第一項是內半徑，第二項是外半徑
        # 默認設定成百分比，相對于容器高寬中較小的一項的一半
        radius=['20%','75%']
    )
    pie.set_global_opts(
        #設定圖例形狀,位置,orient表示橫向還是縱向，horizontal和vertical
        legend_opts=opts.LegendOpts(legend_icon='pin',orient='vertical',pos_right='10%'),
        #設定圖表主標題，副標題和標題位置
        title_opts=opts.TitleOpts(title = '我喜歡的九位歌手',subtitle='數錯了,是十位',pos_left='20%'),
    )
    #設定餅圖的顏色，可選項，不設也有默認的顏色，
    pie.set_colors(['blue','red','orange','yellow','green','purple','black','brown','pink','grey'])
    return pie

pie = set_pie()
pie.render('餅圖.html')

不加rosetype引數和radius引數

在這里插入圖片描述

加上rosetype引數
加上radius引數

五、散點圖

任務：生成sin和cos的散點圖

from pyecharts import options as opts
#匯入Scatter類
from pyecharts.charts import Scatter
import numpy as np

#散點圖的資料型別：x軸和y軸結尾串列
#linspace(start,stop,num),在0-10中回傳50個等間距的數
x = np.linspace(0,10,50)
#print(x)
y1 = np.sin(x)
y2 = np.cos(x)

def set_scatter():
    scatter = Scatter(init_opts=opts.InitOpts(width = '800px',height='600px',page_title='散點圖'))
    #添加x軸資料
    scatter.add_xaxis(xaxis_data = x)
    #點的形狀：symbol引數的取值：'circle', 'rect', 'roundRect', 'triangle', 'diamond', 'pin', 'arrow', 'none'
    scatter.add_yaxis(series_name = 'sin散點圖',y_axis = y1,symbol='circle',label_opts=opts.LabelOpts(is_show=False))
    #為了讓圖更美觀簡潔，設定標簽項不顯示is_show = False
    scatter.add_yaxis(series_name = 'cos散點圖',y_axis = y2,symbol='triangle',label_opts=opts.LabelOpts(is_show=False))
    scatter.set_global_opts(title_opts=opts.TitleOpts(title = '三角函式散點圖'),tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'))
    return scatter

scatter = set_scatter()
scatter.render('散點圖.html')

在這里插入圖片描述

六、詞云圖

這是我最喜歡的一種圖了，不僅美觀，簡潔，而且直觀，印象深刻，

任務

爬取qq音樂評論，資料處理后，繪制詞云圖

引入模塊

import requests
from bs4 import BeautifulSoup
import json
from pyecharts import options as opts
from pyecharts.charts import WordCloud
from jieba.analyse import TFIDF
import jieba
from collections import Counter

資料爬取

首先進入網頁版QQ音樂，打開一首歌的播放頁面，在開發者選項中找到關于comment的請求（按照size排序，最大的那個就是）對比后發現不同歌曲topid不同，同一歌曲不同頁數，pagenum不同，這樣，我們就可以通過構造params來爬取歌曲評論了
在這里插入圖片描述

your_chioce = input('你想繪制那首歌的評論詞云圖：')

url_search = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp?'
headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
    'referer':'https://y.qq.com/',
    'origin':'https://y.qq.com'
}

params_str = '''
    ct: 24
    qqmusic_ver: 1298
    new_json: 1
    remoteplace: txt.yqq.song
    searchid: 65264922900538759
    t: 0
    aggr: 1
    cr: 1
    catZhida: 1
    lossless: 0
    flag_qc: 0
    p: 1
    n: 10
    w: 林俊杰
    g_tk_new_20200303: 5381
    g_tk: 5381
    loginUin: 0
    hostUin: 0
    format: json
    inCharset: utf8
    outCharset: utf-8
    notice: 0
    platform: yqq.json
    needNewCode: 0
'''

def switch_to_dict(params_f):
    params = {}
    p = params_f.replace('\n',',').replace(' ','')
    p_l = p.split(',')
    for i in p_l:
        if i:
            i = i.split(':')
            params[i[0]] = i[1]
    return params

def crawl_topid():
    params = switch_to_dict(params_str)
    params['w'] = your_chioce
    #搜索頁面的response
    res_serach = requests.get(url_search,headers = headers,params = params)
    #將json資料轉化為dict資料
    json_search = res_serach.json()
    #單曲數量
    top_id = json_search['data']['song']['list'][0]['id']
    return top_id

params_comment = '''
    g_tk_new_20200303: 5381
    g_tk: 5381
    loginUin: 0
    hostUin: 0
    format: json
    inCharset: utf8
    outCharset: GB2312
    notice: 0
    platform: yqq.json
    needNewCode: 0
    cid: 205360772
    reqtype: 2
    biztype: 1
    topid: 277438634
    cmd: 8
    needmusiccrit: 0
    pagenum: 0
    pagesize: 25
    lasthotcommentid: 
    domain: qq.com
    ct: 24
    cv: 10101010
'''

url_comment = 'https://c.y.qq.com/base/fcgi-bin/fcg_global_comment_h5.fcg?'
def crawl_comment():
    #創建txt檔案，用于寫入comment
    file = open('comment.txt','w',encoding='utf-8')
    params = switch_to_dict(params_comment)
    params['topid'] = crawl_topid()
    for i in range(10):
        params['pagenum'] = str(i) 
        res_comment = requests.get(url_comment,headers = headers,params = params)
        json_comment = res_comment.json()
        #生成評論的串列
        comment_list = [j['rootcommentcontent'] for j in json_comment['comment']['commentlist']]
        #for z in comment_list:
        file.writelines(comment_list)
    file.close()
#生成txt檔案
crawl_comment()

資料處理

file = open('comment.txt','r',encoding='utf-8')
content = file.readlines()
#洗掉空元素
for i in content:
    if not i :
        del i

words = []
#傳入字串，回傳分詞的串列
for j in content:
    #提取文本中的關鍵字
    tfidf = TFIDF()
    tfidf.set_stop_words('data/stopwords.txt')
    words_list = tfidf.extract_tags(sentence = j)
    words.extend(words_list)

#對words串列中的分詞進行數量統計
words_dict = dict(Counter(words))

#詞云圖的資料型別為串列嵌套元組：[(word1, count1), (word2, count2)]
#將words_dict中的資料轉換成需要的格式
data_pair = [(k,v) for k,v in words_dict.items() if len(k)<6]

繪制詞云圖

def set_wordcloud():
    wordcloud = WordCloud()
    wordcloud.add(
        series_name = '詞頻分析',
        data_pair = data_pair,
        # 詞云圖輪廓，有 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star' 可選
        #shape = 'star',
        # 自定義的圖片（目前支持 jpg, jpeg, png, ico 的格式.
        # 注：如果使用了 mask_image 之后第一次渲染會出現空白的情況，再重繪一次就可以了（Echarts 的問題）
        #mask_image = '' ,
    )
    wordcloud.set_global_opts(title_opts = opts.TitleOpts(title = '{}-詞云分析'.format(your_chioce)))
    return wordcloud

wordcloud = set_wordcloud()
wordcloud.render('十位歌手詞云圖.html')