李沐《動手學深度學習》第二版比賽2-Classify Leaves
我的偶像,李沐大神主講的《動手學深度學習》(使用Pytorch框架,第一版使用的是MXNet框架)目前已經進行到了雙向回圈神經網路,第二部分(卷積神經網路)的競賽內容為樹葉分類,

- 首先匯入需要的包
# 首先匯入包
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
import torchvision.models as models
# This is for the progress bar.
from tqdm import tqdm
import seaborn as sns
- 使用pd.read_csv將訓練集表格讀入,然后看看label檔案長啥樣,image欄是圖片的名稱,label是圖片的分類標簽,
labels_dataframe = pd.read_csv('./classify-leaves/train.csv')
labels_dataframe.head(10)
- 使用pd.describe()函式生成描述性統計資料,統計資料集的集中趨勢,分散和行列的分布情況,不包括 NaN值,可以看到訓練集總共有18353張圖片,標簽有176類,
labels_dataframe.describe()
- 用條形圖可視化176類圖片的分布(數目),
# function to show bar length
def barw(ax):
for p in ax.patches:
val = p.get_width() # height of the bar
x = p.get_x()+ p.get_width() # x- position
y = p.get_y() + p.get_height()/2 # y-position
ax.annotate(round(val,2),(x,y))
# finding top leaves
plt.figure(figsize = (15,30))
# 類別特征的頻數條形圖(x軸是count數,y軸是類別,)
ax0 =sns.countplot(y=labels_dataframe['label'],order=labels_dataframe['label'].value_counts().index)
barw(ax0)
plt.show()
- 把label標簽按字母排個序,這里僅顯示前10個,
# 把label檔案排個序
leaves_labels = sorted(list(set(labels_dataframe['label'])))
n_classes = len(leaves_labels)
print(n_classes)
leaves_labels[:10]
- 把label和176類zip一下再字典,把label轉成對應的數字,
# 把label轉成對應的數字
class_to_num = dict(zip(leaves_labels, range(n_classes)))
class_to_num
- 再將類別數轉換回label,方便最后預測的時候使用,
# 再轉換回來,方便最后預測的時候使用
num_to_class = {v : k for k, v in class_to_num.items()}
- 創建樹葉資料集類LeavesData(Dataset),用來批量管理訓練集、驗證集和測驗集,
# 繼承pytorch的dataset,創建自己的
class LeavesData(Dataset):
def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.2, resize_height=256, resize_width=256):
"""
Args:
csv_path (string): csv 檔案路徑
img_path (string): 影像檔案所在路徑
mode (string): 訓練模式還是測驗模式
valid_ratio (float): 驗證集比例
"""
# 需要調整后的照片尺寸,我這里每張圖片的大小尺寸不一致#
self.resize_height = resize_height
self.resize_width = resize_width
self.file_path = file_path
self.mode = mode
# 讀取 csv 檔案
# 利用pandas讀取csv檔案
self.data_info = pd.read_csv(csv_path, header=None) #header=None是去掉表頭部分
# 計算 length
self.data_len = len(self.data_info.index) - 1
self.train_len = int(self.data_len * (1 - valid_ratio))
if mode == 'train':
# 第一列包含影像檔案的名稱
self.train_image = np.asarray(self.data_info.iloc[1:self.train_len, 0])
#self.data_info.iloc[1:,0]表示讀取第一列,從第二行開始到train_len
# 第二列是影像的 label
self.train_label = np.asarray(self.data_info.iloc[1:self.train_len, 1])
self.image_arr = self.train_image
self.label_arr = self.train_label
elif mode == 'valid':
self.valid_image = np.asarray(self.data_info.iloc[self.train_len:, 0])
self.valid_label = np.asarray(self.data_info.iloc[self.train_len:, 1])
self.image_arr = self.valid_image
self.label_arr = self.valid_label
elif mode == 'test':
self.test_image = np.asarray(self.data_info.iloc[1:, 0])
self.image_arr = self.test_image
self.real_len = len(self.image_arr)
print('Finished reading the {} set of Leaves Dataset ({} samples found)'
.format(mode, self.real_len))
def __getitem__(self, index):
# 從 image_arr中得到索引對應的檔案名
single_image_name = self.image_arr[index]
# 讀取影像檔案
img_as_img = Image.open(self.file_path + single_image_name)
#如果需要將RGB三通道的圖片轉換成灰度圖片可參考下面兩行
# if img_as_img.mode != 'L':
# img_as_img = img_as_img.convert('L')
#設定好需要轉換的變數,還可以包括一系列的nomarlize等等操作
if self.mode == 'train':
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), #隨機水平翻轉 選擇一個概率
transforms.ToTensor()
])
else:
# valid和test不做資料增強
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()
])
img_as_img = transform(img_as_img)
if self.mode == 'test':
return img_as_img
else:
# 得到影像的 string label
label = self.label_arr[index]
# number label
number_label = class_to_num[label]
return img_as_img, number_label #回傳每一個index對應的圖片資料和對應的label
def __len__(self):
return self.real_len
- 定義一下不同資料集的csv_path,并通過更改mode修改資料集類的實體物件,
train_path = './classify-leaves/train.csv'
test_path = './classify-leaves/test.csv'
# csv檔案中已經images的路徑了,因此這里只到上一級目錄
img_path = './classify-leaves/'
train_dataset = LeavesData(train_path, img_path, mode='train')
val_dataset = LeavesData(train_path, img_path, mode='valid')
test_dataset = LeavesData(test_path, img_path, mode='test')
print(train_dataset)
print(val_dataset)
print(test_dataset)
- 定義data loader,設定batch_size,
# 定義data loader
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset,
batch_size=8,
shuffle=False,
num_workers=5
)
val_loader = torch.utils.data.DataLoader(
dataset=val_dataset,
batch_size=8,
shuffle=False,
num_workers=5
)
test_loader = torch.utils.data.DataLoader(
dataset=test_dataset,
batch_size=8,
shuffle=False,
num_workers=5
)
- 展示資料
# 給大家展示一下資料長啥樣
def im_convert(tensor):
""" 展示資料"""
image = tensor.to("cpu").clone().detach()
image = image.numpy().squeeze()
image = image.transpose(1,2,0)
image = image.clip(0, 1)
return image
fig=plt.figure(figsize=(20, 12))
columns = 4
rows = 2
dataiter = iter(val_loader)
inputs, classes = dataiter.next()
for idx in range (columns*rows):
ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
ax.set_title(num_to_class[int(classes[idx])])
plt.imshow(im_convert(inputs[idx]))
plt.show()
# 看一下是在cpu還是GPU上
def get_device():
return 'cuda' if torch.cuda.is_available() else 'cpu'
device = get_device()
print(device)
# 是否要凍住模型的前面一些層
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
model = model
for param in model.parameters():
param.requires_grad = False
# resnet34模型
def res_model(num_classes, feature_extract = False, use_pretrained=True):
model_ft = models.resnet34(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes))
return model_ft
# 超引數
learning_rate = 3e-4
weight_decay = 1e-3
num_epoch = 50
model_path = './pre_res_model.ckpt'
# Initialize a model, and put it on the device specified.
model = res_model(176)
model = model.to(device)
model.device = device
# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()
# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
# The number of training epochs.
n_epochs = num_epoch
best_acc = 0.0
for epoch in range(n_epochs):
# ---------- Training ----------
# Make sure the model is in train mode before training.
model.train()
# These are used to record information in training.
train_loss = []
train_accs = []
# Iterate the training set by batches.
for batch in tqdm(train_loader):
# A batch consists of image data and corresponding labels.
imgs, labels = batch
imgs = imgs.to(device)
labels = labels.to(device)
# Forward the data. (Make sure data and model are on the same device.)
logits = model(imgs)
# Calculate the cross-entropy loss.
# We don't need to apply softmax before computing cross-entropy as it is done automatically.
loss = criterion(logits, labels)
# Gradients stored in the parameters in the previous step should be cleared out first.
optimizer.zero_grad()
# Compute the gradients for parameters.
loss.backward()
# Update the parameters with computed gradients.
optimizer.step()
# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels).float().mean()
# Record the loss and accuracy.
train_loss.append(loss.item())
train_accs.append(acc)
# The average loss and accuracy of the training set is the average of the recorded values.
train_loss = sum(train_loss) / len(train_loss)
train_acc = sum(train_accs) / len(train_accs)
# Print the information.
print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
# ---------- Validation ----------
# Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
model.eval()
# These are used to record information in validation.
valid_loss = []
valid_accs = []
# Iterate the validation set by batches.
for batch in tqdm(val_loader):
imgs, labels = batch
# We don't need gradient in validation.
# Using torch.no_grad() accelerates the forward process.
with torch.no_grad():
logits = model(imgs.to(device))
# We can still compute the loss (but not the gradient).
loss = criterion(logits, labels.to(device))
# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy.
valid_loss.append(loss.item())
valid_accs.append(acc)
# The average loss and accuracy for entire validation set is the average of the recorded values.
valid_loss = sum(valid_loss) / len(valid_loss)
valid_acc = sum(valid_accs) / len(valid_accs)
# Print the information.
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
# if the model improves, save a checkpoint at this epoch
if valid_acc > best_acc:
best_acc = valid_acc
torch.save(model.state_dict(), model_path)
print('saving model with acc {:.3f}'.format(best_acc))
saveFileName = './classify-leaves/submission.csv'
## predict
model = res_model(176)
# create model and load weights from checkpoint
model = model.to(device)
model.load_state_dict(torch.load(model_path))
# Make sure the model is in eval mode.
# Some modules like Dropout or BatchNorm affect if the model is in training mode.
model.eval()
# Initialize a list to store the predictions.
predictions = []
# Iterate the testing set by batches.
for batch in tqdm(test_loader):
imgs = batch
with torch.no_grad():
logits = model(imgs.to(device))
# Take the class with greatest logit as prediction and record it.
predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())
preds = []
for i in predictions:
preds.append(num_to_class[i])
test_data = pd.read_csv(test_path)
test_data['label'] = pd.Series(preds)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv(saveFileName, index=False)
print("Done!!!!!!!!!!!!!!!!!!!!!!!!!!!")
參考文獻
- https://www.kaggle.com/c/classify-leaves 比賽平臺
- https://www.cnblogs.com/zgqcn/p/14160093.html kaggle 訓練操作
- https://www.kaggle.com/nekokiku/simple-resnet-baseline 大神提供的baseline
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/291175.html
標籤:AI
