目錄標題
- 導讀
- 爬取中國大學排名
- 可視化分析
- 詞云
- 條形統計圖
- 獲取全國211以上大學的動態地理坐標
- 最后對肥友說
- 資料領取和詳解
導讀
肥友們,最近有位粉絲找到我直言出價讓我代做作業,我一聽當場就急眼了,我肥學是這樣的人嗎?

直接就問他給多少錢,嘻嘻嘻!!!當然了多少錢不也不會干的,既然是粉絲我肯定盡量幫啊,于是我就開始了今天的博客,
爬取中國大學排名
鏈接:中國大學排名
其實還是挺簡單的,這位粉絲肯定沒有好好看我以前的文章,這種爬取說過很多次了,所以我們直接整起來,

我們直接找到要獲得的這些資訊的id或者class
res=requests.get(url=url,headers=header).content.decode('utf-8')
soup=BeautifulSoup(res,'lxml')
names=soup.findAll(name="a",attrs={"class":"name-cn"})
xinxi=soup.findAll("td")
with open("中國大學.csv", 'a', encoding="utf-8", newline="") as f:
for i in range(0,len(xinxi),6):
w=csv.writer(f)
w.writerow([xinxi[i].text.strip(),xinxi[i+1].text.strip(),xinxi[i+2].text.strip(),xinxi[i+3].text.strip(),xinxi[i+4].text.strip(),xinxi[i+5].text.strip()])
然后順利拿到資訊

可視化分析
詞云
上面我們以經拿到了資訊,我們先對這些大學集中的省市和類別做一個詞云分析

可以看出來綜合和北京的居多
import jieba
from wordcloud import WordCloud
from matplotlib import colors
import csv
import pandas as pd
info=pd.read_csv("中國大學.csv",usecols=[2,3])
text=info
cut_text = "".join(jieba.cut(str(text)))
color_list=['#FF0000','#9955FF','#66FFFF']#建立顏色陣列
colormap=colors.ListedColormap(color_list)#呼叫
#color_mask = cv2.imread("11.jpg")
word_cloud=WordCloud(
font_path="msyh.ttc",
background_color='black',
mode="RGBA",
prefer_horizontal=1,
#mask=color_mask,
height=200,
width=200,
scale=1,
colormap=colormap,#設定顏色
margin=5
)
word_cloud1=word_cloud.generate(cut_text)
word_cloud1.to_file('2.png')
print("圖片保存成功")
條形統計圖

這里運用了Echarts做得有興趣的大佬也可以用cufflinks做也可以達到同樣的效果
//這里只把js可變部分貼了出來
option = {
title: {
text: '中國大學資料'
},
tooltip: {
trigger: 'axis'
},
legend: {
data: ['總分', '辦學層次']
},
grid: {
left: '3%',
right: '4%',
bottom: '3%',
containLabel: true
},
toolbox: {
feature: {
saveAsImage: {}
}
},
xAxis: {
type: 'category',
boundaryGap: false,
data: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
},
yAxis: {
type: 'value'
},
series: [
{
name: '總分',
type: 'line',
stack: 'Total',
data: [969.2,855.3,768.7,723.4,654.8,649.7,577.0,574.3,567.9,537.9,522.6,519.3,518.3,516.6,513.8,508.3,488.1,487.8,474.0,465.3,447.0,444.3,442.2,435.7,430.5,427.8,419.8,418.2,401.8,400.4]
},
{
name: '辦學層次',
type: 'line',
stack: 'Total',
data: [37.9,36.1,34.3,35.5,35.1,36.6,40.0,32.1,31.8,34.5,32.7,30.9,34.8,30.7,32.8,33.2,34.3,34.5,32.3,31.5,28.8,32.7,30.8,30.4,32.4,32.7,30.5,30.2,35.2,31.8]
}
]
};
獲取全國211以上大學的動態地理坐標

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['animation.writer'] = 'html'
plt.rcParams['animation.embed_limit'] = 100
def rgba_to_rgb(img_rgba):
img_rgb = Image.new("RGB", img_rgba.size, (255, 255, 255))
img_rgb.paste(img_rgba, mask=img_rgba.split()[3])
return img_rgb
def html_to_gif(html_file, gif_file, duration=0.5):
path = html_file.replace(".html", "_frames")
images = [os.path.join(path, x) for x in sorted(os.listdir(path))]
frames = [imageio.imread(x) for x in images]
if frames[0].shape[-1] == 4:
frames = [np.array(rgba_to_rgb(Image.fromarray(x))) for x in frames]
imageio.mimsave(gif_file, frames, 'gif', duration=duration)
return gif_file
cmap = ['#2E91E5','#1CA71C','#DA16FF','#B68100','#EB663B','#00A08B','#FC0080','#6C7C32','#862A16','#620042','#DA60CA','#0D2A63'] * 100
def getCoords(geom):
if isinstance(geom, geo.MultiPolygon):
return [np.array(g.exterior) for g in geom.geoms]
elif isinstance(geom, geo.Polygon):
return [np.array(geom.exterior)]
elif isinstance(geom, geo.LineString):
return [np.array(geom)]
elif isinstance(geom, geo.MultiLineString):
return [np.array(x) for x in list(geom.geoms)]
else:
raise Exception("geom must be one of [polygon,MultiPolygon,LineString,MultiLineString]!")
# 底圖資料
dfprovince = gpd.read_file("dfprovince.geojson").set_crs("epsg:4326").to_crs("epsg:2343")
dfnanhai = gpd.read_file("dfnanhai.geojson").set_crs("epsg:4326").to_crs("epsg:2343")
dfline9 = dfnanhai[(dfnanhai["LENGTH"] > 1.0) & (dfnanhai["LENGTH"] < 2.0)]
# 散點資料
df985 = gpd.read_file("中國985大學.geojson").set_crs("epsg:4326").to_crs("epsg:2343")
df211 = gpd.read_file("中國211大學.geojson").set_crs("epsg:4326").to_crs("epsg:2343")
dfpoints = pd.concat([df985, df211], axis=0)
df = pd.DataFrame({"x": [pt.x for pt in dfpoints["geometry"]],
"y": [pt.y for pt in dfpoints["geometry"]]})
df["z"] = 1.0
df.index = dfpoints["name"].values
def bubble_map_dance(df, title="中國116所211高校位置分布",
filename=None,
figsize=(8, 6), dpi=144,
duration=0.5,
anotate_points=["北京郵電大學", "南昌大學", "華中農業大學", "東華大學", "云南大學",
"陜西師范大學", "內蒙古大學", "西藏大學", "新疆大學", "青海大學", "哈爾濱工程大學"]):
fig, ax_base = plt.subplots(figsize=figsize, dpi=dpi)
ax_child = fig.add_axes([0.800, 0.125, 0.10, 0.20])
def plot_frame(i):
ax_base.clear()
ax_child.clear()
# 繪制省邊界
polygons = [getCoords(x) for x in dfprovince["geometry"]]
for j, coords in enumerate(polygons):
for x in coords:
poly = plt.Polygon(x, fill=True, ec="gray", fc="white", alpha=0.5, linewidth=.8)
poly_child = plt.Polygon(x, fill=True, ec="gray", fc="white", alpha=0.5, linewidth=.8)
ax_base.add_patch(poly)
ax_child.add_patch(poly_child)
# 繪制九段線
coords = [getCoords(x) for x in dfline9["geometry"]]
lines = [y for x in coords for y in x]
for ln in lines:
x, y = np.transpose(ln)
line = plt.Line2D(x, y, color="gray", linestyle="-.", linewidth=1.5)
line_child = plt.Line2D(x, y, color="gray", linestyle="-.", linewidth=1.5)
ax_base.add_artist(line)
ax_child.add_artist(line_child)
# 設定spine格式
for spine in ['top', 'left', "bottom", "right"]:
ax_base.spines[spine].set_color("none")
ax_child.spines[spine].set_alpha(0.5)
ax_base.axis("off")
# 設定繪圖范圍
bounds = dfprovince.total_bounds
ax_base.set_xlim(bounds[0] - (bounds[2] - bounds[0]) / 10, bounds[2] + (bounds[2] - bounds[0]) / 10)
ax_base.set_ylim(bounds[1] + (bounds[3] - bounds[1]) / 3.5, bounds[3] + (bounds[3] - bounds[1]) / 100)
ax_child.set_xlim(bounds[2] - (bounds[2] - bounds[0]) / 2.5, bounds[2] - (bounds[2] - bounds[0]) / 20)
ax_child.set_ylim(bounds[1] - (bounds[3] - bounds[1]) / 20, bounds[1] + (bounds[3] - bounds[1]) / 2)
# 移除坐標軸刻度
ax_child.set_xticks([]);
ax_child.set_yticks([]);
k = i // 3 + 1
m = i % 3
text = "NO." + str(k)
dfdata = df.iloc[:k, :].copy()
dftmp = df.iloc[:k - 1, :].copy()
# 繪制散點影像
if len(dftmp) > 0:
ax_base.scatter(dftmp["x"], dftmp["y"], s=100 * dftmp["z"] / df["z"].mean(),
c=(cmap * 100)[0:len(dftmp)], alpha=0.3, zorder=3)
ax_child.scatter(dftmp["x"], dftmp["y"], s=100 * dftmp["z"] / df["z"].mean(),
c=(cmap * 100)[0:len(dftmp)], alpha=0.3, zorder=3)
# 添加注釋文字
for i, p in enumerate(dftmp.index):
px, py, pz = dftmp.loc[p, ["x", "y", "z"]].tolist()
if p in anotate_points:
ax_base.annotate(p, xy=(px, py), xycoords="data", xytext=(-15, 10),
fontsize=10, fontweight="bold", color=cmap[i], textcoords="offset points")
# 添加標題和排名序號
# ax_base.set_title(title,color = "black",fontsize = 12)
ax_base.text(0.5, 0.95, title, va="center", ha="center",
size=12, transform=ax_base.transAxes)
ax_base.text(0.5, 0.5, text, va="center", ha="center",
alpha=0.3, size=50, transform=ax_base.transAxes)
# 添加注意力影片
if m == 0:
px, py, pz = dfdata["x"][[-1]], dfdata["y"][[-1]], dfdata["z"][-1]
p = dfdata.index[-1]
ax_base.scatter(px, py, s=800 * pz / df["z"].mean(),
c=cmap[len(dfdata) - 1:len(dfdata)], alpha=0.5, zorder=4)
ax_base.annotate(p, xy=(px, py), xycoords="data",
xytext=(-15, 10), fontsize=20, fontweight="bold",
color=cmap[k - 1], textcoords="offset points", zorder=5)
if m == 1:
px, py, pz = dfdata["x"][[-1]], dfdata["y"][[-1]], dfdata["z"][-1]
p = dfdata.index[-1]
ax_base.scatter(px, py, s=400 * pz / df["z"].mean(),
c=cmap[len(dfdata) - 1:len(dfdata)], alpha=0.5, zorder=4)
ax_base.annotate(p, xy=(px, py), xycoords="data",
xytext=(-15, 10), fontsize=15, fontweight="bold",
color=cmap[k - 1], textcoords="offset points", zorder=5)
if m == 2:
px, py, pz = dfdata["x"][[-1]], dfdata["y"][[-1]], dfdata["z"][-1]
p = dfdata.index[-1]
ax_base.scatter(px, py, s=100 * pz / df["z"].mean(),
c=cmap[len(dfdata) - 1:len(dfdata)], alpha=0.5, zorder=4)
ax_base.annotate(p, xy=(px, py), xycoords="data",
xytext=(-15, 10), fontsize=10, fontweight="bold",
color=cmap[k - 1], textcoords="offset points", zorder=5)
my_animation = animation.FuncAnimation(fig, plot_frame, frames=range(0, 3 * len(df)), interval=int(duration * 1000))
if filename is None:
try:
from IPython.display import HTML
HTML(my_animation.to_jshtml())
return HTML(my_animation.to_jshtml())
except ImportError:
pass
else:
my_animation.save(filename)
return filename
最后對肥友說
為了粉絲這把拼了 我覺得真的收費的大概也就這樣了吧,好幾天沒有寫python了這把直接給我整爽了,持續關注我后面Java和python的web都給大家整一套,最后還是那句話一起肥學,一起加油
特別介紹
📣小白練手專欄,適合剛入手的新人歡迎訂閱編程小白進階
📣python有趣練手專案里面包括了像《機器人尬聊》《惡搞程式》這樣的有趣文章,可以讓你快樂學python練手專案專欄
📣另外想學JavaWeb進廠的同學可以看看這個專欄:傳送們
📣這是個面試和考研的演算法練習我們一起加油上岸之路
資料領取和詳解
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/323058.html
標籤:其他
