?TIOBE

TIOBE排行榜是根據互聯網上有經驗的程式員、課程和第三方廠商的數量，并使用搜索引擎（如Google、Bing、Yahoo!）以及Wikipedia、Amazon、YouTube統計出排名資料，只是反映某個編程語言的熱門程度，并不能說明一門編程語言好不好，或者一門語言所撰寫的代碼數量多少，

TIOBE開發語言排行榜每月更新一次，依據的指數是基于世界范圍內的資深軟體工程師和第三方供應商提供，其結果作為當前業內程式開發語言的流行使用程度的有效指標，

該指數可以用來檢閱開發者的編程技能能否跟上趨勢，或是否有必要作出戰略改變，以及什么編程語言是應該及時掌握的，觀察認為，該指數反應的雖并非當前最流行或應用最廣的語言，但對世界范圍內開發語言的走勢仍具有重要參考意義，

?目標網站

https://www.tiobe.com/tiobe-index/

?目標資料

TIOBE Index for April 2021

Very Long Term History

?需求分析

?利用Python決議頁面資訊?

?分析相關資料繪制圖表?

?保存相關榜單資訊?

?繪制圖表效果

TOP20 編程語言使用率

TOP20 編程語言變化趨勢

各類編程語言長期排名變化情況

?繪制詞云效果

TOP20 編程語言使用率詞云

?代碼實作

import requests
from requests.exceptions import RequestException
from lxml import etree
import matplotlib.pyplot as plt
import wordcloud


# 獲取html
def getHTMLText(url):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
    }
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except RequestException as e:
        print('error', e)


# 決議html
def analyzeRankHtml(html):
    html = etree.HTML(html, etree.HTMLParser())
    uhead = html.xpath('//table[contains(@class,"table-top20")]/thead/tr//text()')
    ulist = html.xpath('//table[contains(@class,"table-top20")]/tbody/tr//text()')
    for i in range(7):
        uhead.remove('\r\n')
    uhead.pop(2)
    # print(uhead)
    # print(ulist)
    return uhead, ulist


# 資料清洗 1
def dataPreTreat1(uinfo: list) -> list:
    language = []
    ratings = []
    change = []

    for i in range(len(uinfo)):
        if i % 5 == 2:
            language.append(uinfo[i])
        elif i % 5 == 3:
            ratings.append(uinfo[i])
        elif i % 5 == 4:
            change.append(uinfo[i])
        else:
            pass

    ratings_data = https://www.cnblogs.com/Flat-White/archive/2021/04/26/[]
    for i in ratings:
        ratings_data.append(float(i.rstrip('%')) / 100)

    change_data = https://www.cnblogs.com/Flat-White/archive/2021/04/26/[]
    for i in change:
        change_data.append(float(i.rstrip('%')) / 100)

    return language, ratings_data, change_data


# 列印排行資訊
def printRank(uhead, ulist):
    for i in range(len(uhead)):
        if i == 2:
            print('{:^25}\t'.format(uhead[i]), end='')
        else:
            print('{:^10}\t'.format(uhead[i]), end='')
    print()

    tplt = "{0:^10}\t{1:^10}\t{2:^25}\t{3:^10}\t{4:^10}"

    j = 0
    for i in range(20):
        if i == 0:
            print(tplt.format(ulist[0], ulist[1], ulist[2], ulist[3], ulist[4]))
        else:
            print(tplt.format(ulist[j], ulist[j + 1], ulist[j + 2], ulist[j + 3], ulist[j + 4]))
        j += 5
    print()


# 保存TOP20編程語言情況
def saveRank(uhead, ulist):
    with open('TOP20 編程語言情況.txt', 'w+') as f:
        for i in range(len(uhead)):
            if i == 2:
                f.write('{:^25}\t'.format(uhead[i]))
            else:
                f.write('{:^10}\t'.format(uhead[i]))
        f.write('\n')

        tplt = "{0:^10}\t{1:^10}\t{2:^25}\t{3:^10}\t{4:^10}\t{5:}"
        j = 0
        for i in range(20):
            if i == 0:
                f.write(tplt.format(ulist[0], ulist[1], ulist[2], ulist[3], ulist[4], '\n'))
            else:
                f.write(tplt.format(ulist[j], ulist[j + 1], ulist[j + 2], ulist[j + 3], ulist[j + 4], '\n'))
            j += 5

    print('TOP20 編程語言情況.txt 已保存...\n')


# 獲取各類編程語言長期排名情況
def analyzeVlthHtml(html):
    html = etree.HTML(html, etree.HTMLParser())
    vlth = html.xpath('//table[contains(@id,"VLTH")]//tr//text()')
    vhead = []
    for i in range(9):
        vhead.append(vlth.pop(0))
    # print(vhead)
    # print(vlth)
    return vhead, vlth


# 列印長期排行資訊
def printLongTermHistoryRank(vhead, vlth):
    for i in range(len(vhead)):
        if i == 0:
            print('{0:^25}\t'.format(vhead[i]), end='')
        else:
            print('{:^5}\t'.format(vhead[i]), end='')
    print()

    tplt = "{0:^25}\t{1:^5}\t{2:^5}\t{3:^5}\t{4:^5}\t{5:^5}\t{6:^5}\t{7:^5}\t{8:^5}"
    j = 0
    for i in range(13):
        if i == 0:
            print(tplt.format(vlth[0], vlth[1], vlth[2], vlth[3], vlth[4], vlth[5], vlth[6], vlth[7], vlth[8]))
        else:
            print(tplt.format(vlth[j], vlth[j + 1], vlth[j + 2], vlth[j + 3], vlth[j + 4], vlth[j + 5], vlth[j + 6],
                              vlth[j + 7], vlth[j + 8]))
        j += 9
    print()


# 保存各類編程語言長期排名變化情況
def saveLongTermHistoryRank(vhead, vlth):
    with open('各類編程語言長期排名變化情況.txt', 'w+') as f:
        for i in range(len(vhead)):
            if i == 0:
                f.write('{0:^25}\t'.format(vhead[i]))
            else:
                f.write('{:^5}\t'.format(vhead[i]))
        f.write('\n')

        tplt = "{0:^25}\t{1:^5}\t{2:^5}\t{3:^5}\t{4:^5}\t{5:^5}\t{6:^5}\t{7:^5}\t{8:^5}\t{9:}"

        j = 0
        for i in range(13):
            if i == 0:
                f.write(tplt.format(vlth[0], vlth[1], vlth[2], vlth[3], vlth[4], vlth[5], vlth[6], vlth[7], vlth[8], '\n'))
            else:
                f.write(tplt.format(vlth[j], vlth[j + 1], vlth[j + 2], vlth[j + 3], vlth[j + 4], vlth[j + 5], vlth[j + 6],
                                vlth[j + 7], vlth[j + 8], '\n'))
            j += 9

    print('各類編程語言長期排名情況.txt 已保存...\n')


# 資料清洗 2
def dataPreTreat2(vhead, vlth: list) -> list:
    month = []
    for i in range(len(vhead)):
        month.append(vhead[i])
    month.pop(0)
    month.sort()
    # print(month)

    vlth = [40 if i == '-' else i for i in vlth]
    tmp = []
    data = []
    for i in range(13):
        for j in range(9):
            tmp.append(vlth.pop(0))
        # print(tmp)
        data.append(tmp)
        tmp = []
    # print(data)
    language = []
    for i in data:
        language.append(i.pop(0))

    data = [[float(j) for j in i] for i in data]
    # print(language)
    # print(month)
    # print(data)

    return language, month, data


# 繪制圖表
def drawPic(ratings_data, change_data):
    print('開始繪制->TOP20 編程語言使用率...\n')
    # 解決title中文亂碼
    plt.rcParams['font.sans-serif'] = ['SimHei']  # Windows
    # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']  # macOS
    plt.rcParams['axes.unicode_minus'] = False

    # 輸出所有可使用style
    # print(plt.style.available)
    # 設定繪圖style
    plt.style.use('bmh')

    plt.figure(figsize=(12, 8))
    plt.pie(x=ratings_data, explode=ratings_data, labels=language, autopct='%1.1f%%', normalize=True, shadow=False,
            startangle=150)
    plt.title("TOP20 編程語言使用率")
    plt.legend(loc='best', fontsize='x-small')
    plt.savefig('TOP20 編程語言使用率.png')
    plt.show()
    print('TOP20 編程語言使用率.png 已保存...\n')

    print('開始繪制->TOP20 編程語言變化趨勢...\n')

    color = []
    for i in change_data:
        if i >= 0:
            color.append('red')
        else:
            color.append('green')

    plt.figure(figsize=(16, 10))
    bar_width = 0.5
    plt.bar(language, change_data, bar_width,
            color=color,
            align="center", label="change", alpha=0.5)
    plt.xlabel("Programming Language")
    plt.ylabel("Percentage")
    plt.xticks(rotation=30)
    plt.title("TOP20 編程語言變化趨勢")
    plt.savefig('TOP20 編程語言變化趨勢.png')
    plt.show()
    print('TOP20 編程語言變化趨勢.png 已保存...\n')


# 繪制詞云圖
def drawWordcloud(language, ratings):
    print('開始繪制詞云圖...\n')
    ratings = [i * 1000 for i in ratings]
    # print(ratings)
    words = []
    for i in range(len(language)):
        for j in range(int(ratings[i])):
            words.append(language[i])

    txt = ",".join(words)
    # print(txt)
    w = wordcloud.WordCloud(width=800, height=600, background_color="white",
                            max_words=15,
                            collocations=False)
    w.generate(txt)
    w.to_file("TOP20 編程語言使用率詞云.png")
    print('TOP20 編程語言使用率詞云.png 已保存...\n')


# 繪制折線圖
def drawLineChart(language, month, data):
    print('開始繪制->各類編程長期排名情況...\n')

    # print(month)
    # print(language)

    # 解決title中文亂碼
    plt.rcParams['font.sans-serif'] = ['SimHei']  # Windows
    # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']  # macOS
    plt.rcParams['axes.unicode_minus'] = False

    # 輸出所有可使用style
    # print(plt.style.available)
    # 設定繪圖style
    plt.style.use('bmh')

    for i in range(len(data)):
        # print(data[i])
        plt.plot(month, data[i], label=language[i])
    plt.title("各類編程語言長期排名變化情況")
    plt.legend(loc='best', fontsize='x-small')
    plt.ylim(0, 40)  # 設定y軸
    plt.gca().invert_yaxis()  # y軸逆序
    plt.savefig('各類編程語言長期排名變化情況.png')
    plt.show()
    print('各類編程語言長期排名變化情況.png 已保存...\n')


if __name__ == '__main__':

    url = 'https://www.tiobe.com/tiobe-index/'

    print('正在決議頁面資訊...請稍等...\n')

    html = getHTMLText(url)

    uhead, uinfo = analyzeRankHtml(html)

    language, ratings, change = dataPreTreat1(uinfo)

    print('決議：https://www.tiobe.com/tiobe-index/ 獲取到 TOP20 編程語言情況如下：\n')

    printRank(uhead, uinfo)
    saveRank(uhead, uinfo)
    drawPic(ratings, change)
    drawWordcloud(language, ratings)

    print('正在決議各類編程語言長期排名情況...請稍等...\n')

    vhead, vlth = analyzeVlthHtml(html)

    language, month, data = https://www.cnblogs.com/Flat-White/archive/2021/04/26/dataPreTreat2(vhead, vlth)

    print('決議：https://www.tiobe.com/tiobe-index/ 獲取到 各類編程語言長期排名情況如下：\n')

    printLongTermHistoryRank(vhead, vlth)
    saveLongTermHistoryRank(vhead, vlth)
    drawLineChart(language, month, data)

?參考及參考

https://baike.baidu.com/item/Tiobe/2830870

https://blog.csdn.net/handuoduo123/article/details/105319321

?轉載請注明出處

本文作者：雙份濃縮馥芮白

原文鏈接：https://www.cnblogs.com/Flat-White/p/14704210.html

著作權所有，如需轉載請注明出處，

轉載請註明出處，本文鏈接：https://www.uj5u.com/houduan/280497.html

標籤：其他

上一篇：java 基于DelayQueue延時佇列的使用

下一篇：python 的__str__和__repr__有什么區別？

【Python】Python獲取TIOBE排行榜 繪制圖表及詞云