本文實作以下功能:
先用 這篇博客 中的方法生成了檔案后,按照 pix2pix 準備資料的要求 進行資料集的準備前,需要劃分資料類別,
import os
import random
import shutil
from shutil import copy2
def data_set_split(src_data_folder, target_data_folder, train_scale=0.8, val_scale=0.1, test_scale=0.1):
'''
讀取源資料檔案夾,生成劃分好的檔案夾,分為trian、val、test三個檔案夾進行
:param src_data_folder: 源檔案夾 /Result
:param target_data_folder: 目標檔案夾 /to/data/
:param train_scale: 訓練集比例
:param val_scale: 驗證集比例
:param test_scale: 測驗集比例
:return:
'''
print("開始資料集劃分")
class_names = os.listdir(src_data_folder)
split_names = ['train', 'val', 'test']
# 在目標目錄下創建類別檔案夾
for class_name in class_names:
class_split_path = os.path.join(target_data_folder, class_name)
if os.path.exists(class_split_path):
pass
# shutil.rmtree(class_split_path)
else:
os.mkdir(class_split_path)
# 然后在 類別檔案夾下創建 'train'/'val'/'test'檔案夾
for split_name in split_names:
split_path = os.path.join(class_split_path, split_name)
if os.path.exists(split_path):
# pass
# 如果該檔案夾本來存在,則洗掉該檔案夾下所有檔案
shutil.rmtree(split_path)
os.mkdir(split_path)
# 按照比例劃分資料集,并進行資料圖片的復制
# 首先對A進行分類遍歷,同時相應的將B的源檔案夾中的檔案放入B的目標檔案中
A_class_data_path = os.path.join(src_data_folder, 'A')
B_class_data_path = os.path.join(src_data_folder, 'B')
A_all_data = os.listdir(A_class_data_path)
A_data_length = len(A_all_data)
A_data_index_list = list(range(A_data_length))
random.shuffle(A_data_index_list)
A_train_folder = os.path.join(os.path.join(
target_data_folder, 'A'), 'train')
A_val_folder = os.path.join(os.path.join(
target_data_folder, 'A'), 'val')
A_test_folder = os.path.join(os.path.join(
target_data_folder, 'A'), 'test')
B_train_folder = os.path.join(os.path.join(
target_data_folder, 'B'), 'train')
B_val_folder = os.path.join(os.path.join(
target_data_folder, 'B'), 'val')
B_test_folder = os.path.join(os.path.join(
target_data_folder, 'B'), 'test')
train_stop_flag = A_data_length * train_scale
val_stop_flag = A_data_length * (train_scale + val_scale)
current_idx = 0
train_num = 0
val_num = 0
test_num = 0
for i in A_data_index_list:
A_src_img_path = os.path.join(
A_class_data_path, A_all_data[i])
B_src_img_path = os.path.join(
B_class_data_path, A_all_data[i])
if current_idx <= train_stop_flag:
copy2(A_src_img_path, A_train_folder)
copy2(B_src_img_path, B_train_folder)
# print("{}復制到了{}".format(src_img_path, train_folder))
train_num = train_num + 1
elif (current_idx > train_stop_flag) and (current_idx <= val_stop_flag):
copy2(A_src_img_path, A_val_folder)
copy2(B_src_img_path, B_val_folder)
# print("{}復制到了{}".format(src_img_path, val_folder))
val_num = val_num + 1
else:
copy2(A_src_img_path, A_test_folder)
copy2(B_src_img_path, B_test_folder)
# print("{}復制到了{}".format(src_img_path, test_folder))
test_num = test_num + 1
current_idx = current_idx + 1
print("A類按照{}:{}:{}的比例劃分完成,一共{}張圖片".format(
train_scale, val_scale, test_scale, A_data_length))
print("訓練集{}:{}張".format(A_train_folder, train_num))
print("驗證集{}:{}張".format(A_val_folder, val_num))
print("測驗集{}:{}張".format(A_test_folder, test_num))
print("B 類的訓練集、驗證集、測驗集完全按照 A 類的檔案名稱對應分類!")
if __name__ == '__main__':
src_data_folder = "Result"
target_data_folder = "to/data"
# 如果目標目錄不存在,則創建該目錄,
if os.path.exists(target_data_folder):
pass
else:
os.makedirs(target_data_folder)
data_set_split(src_data_folder, target_data_folder)
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/150949.html
標籤:其他
