我有以下來自一百萬行的采樣資料框。它將顯示例外行的值計數、只有例外行的資料框和繪圖。
輸入資料:
df_sample = pd.DataFrame({
'AbsoluteTopImpressionPercentage': [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.75, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 1.0, 1.0],
'AdFormat': ['TEXT', 'TEXT', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'TEXT', 'TEXT', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'TEXT', 'UNKNOWN', 'TEXT', 'TEXT', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'TEXT', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'TEXT', 'UNKNOWN', 'UNKNOWN', 'TEXT', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'TEXT', 'UNKNOWN', 'TEXT', 'UNKNOWN', 'UNKNOWN'],
'AdNetworkType1': ['SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH'],
'AdNetworkType2': ['SEARCH', 'SEARCH', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH_PARTNERS', 'SEARCH_PARTNERS', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH', 'SEARCH'],
'AllConversionRate': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
'Clicks': [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 1, 0, 0, 1, 0],
'ConversionRate': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
'Ctr': [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2105, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3333, 1.0, 0.25, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2326, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
'Date': ['2021-04-09 00:00:00 00:00', '2021-04-19 00:00:00 00:00', '2022-08-24 00:00:00 00:00', '2021-07-25 00:00:00 00:00', '2022-07-29 00:00:00 00:00', '2022-05-27 00:00:00 00:00', '2022-05-18 00:00:00 00:00', '2022-07-24 00:00:00 00:00', '2021-02-05 00:00:00 00:00', '2021-04-30 00:00:00 00:00', '2021-07-11 00:00:00 00:00', '2021-05-24 00:00:00 00:00', '2022-05-22 00:00:00 00:00', '2021-07-01 00:00:00 00:00', '2021-07-11 00:00:00 00:00', '2022-03-24 00:00:00 00:00', '2021-05-12 00:00:00 00:00', '2022-06-14 00:00:00 00:00', '2021-04-27 00:00:00 00:00', '2021-01-29 00:00:00 00:00', '2022-09-08 00:00:00 00:00', '2021-06-07 00:00:00 00:00', '2022-05-28 00:00:00 00:00', '2022-03-26 00:00:00 00:00', '2021-04-09 00:00:00 00:00', '2022-05-22 00:00:00 00:00', '2021-05-28 00:00:00 00:00', '2022-05-22 00:00:00 00:00', '2021-07-06 00:00:00 00:00', '2021-07-14 00:00:00 00:00', '2021-06-24 00:00:00 00:00', '2021-03-24 00:00:00 00:00', '2021-06-03 00:00:00 00:00', '2022-05-30 00:00:00 00:00', '2021-03-15 00:00:00 00:00', '2022-08-05 00:00:00 00:00', '2021-07-06 00:00:00 00:00', '2022-03-30 00:00:00 00:00', '2022-09-07 00:00:00 00:00', '2021-05-27 00:00:00 00:00', '2021-06-04 00:00:00 00:00', '2022-04-16 00:00:00 00:00', '2022-05-22 00:00:00 00:00', '2021-07-08 00:00:00 00:00', '2022-05-26 00:00:00 00:00', '2021-02-09 00:00:00 00:00', '2022-04-27 00:00:00 00:00', '2021-05-06 00:00:00 00:00', '2021-06-29 00:00:00 00:00', '2022-06-01 00:00:00 00:00'],
'Anomaly': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
})
我的代碼:
df_sample = df_sample.sort_values(by='Date')
df_sample['Date'] = df_sample['Date'].apply(pd.to_datetime)
print(df_sample.Anomaly.value_counts())
print('')
display(df_sample.loc[df_sample['Anomaly'] == 1])
colors=[]
for val in df_sample['Anomaly']:
if val == 1:
colors.append('red')
else:
colors.append('blue')
# or
#colors = pd.cut(df['Sum'].tolist(), [-np.inf, 10, 20, np.inf],
# labels=['green', 'orange', 'red'])
#ax = df['Sum'].plot(kind='barh',color=colors)
fig, axs = plt.subplots(figsize=(22, 12))
df_sample.groupby(df_sample['Date'].dt.date)["Clicks"].sum().plot(kind='barh', ax=axs, color=colors)
plt.xlabel("Clicks")
plt.ylabel("Dates")
every_nth = 7
for n, label in enumerate(axs.yaxis.get_ticklabels()):
if n % every_nth != 0:
label.set_visible(False)
輸出:
現在在輸出中,我希望那些 df_sample.Anomaly 值 == 1 的行在圖形水平條中顯示為紅色而不是藍色。任何想法?
uj5u.com熱心網友回復:
在不過多更改代碼的情況下,您需要在分組資料上創建顏色串列,稍后您將繪制這些顏色串列。
我創建的行out用作max列的聚合Anomaly。在每組的示例資料中,Anomaly只有 0 或只有 1。如果這些值不同,您可以在此處決定您想要做什么(例如取第一個或最后一個,將它們相加,最小值或最大值)
然后根據新資料構建顏色串列out['Anomaly']并根據out['Clicks']
df_sample['Date'] = pd.to_datetime(df_sample['Date'])
df_sample = df_sample.sort_values(by='Date')
out = df_sample.groupby(df_sample['Date'].dt.date).agg({'Clicks' : 'sum', 'Anomaly': 'max'})
print(out)
#collect colors based on that data
colors = ['red' if val==1 else 'blue' for val in out['Anomaly']]
fig, axs = plt.subplots(figsize=(10,8))
out['Clicks'].plot(kind='barh', ax=axs, color=colors)
axs.set_xlabel("Clicks")
axs.set_ylabel("Dates")
every_nth = 7
for n, label in enumerate(axs.yaxis.get_ticklabels()):
if n % every_nth != 0:
label.set_visible(False)

轉載請註明出處,本文鏈接:https://www.uj5u.com/yidong/508694.html
