RotNet：預測影像旋轉

論文導讀
- 影像旋轉預測框架
旋轉拖動驗證碼解決方案
兩種思路
大資料應用賽
- 卷積神經網路
- 模型編譯
- 訓練引數
- 回呼
- 模型訓練
- 完整代碼
- 模型呼叫

論文導讀

RotNet 通過預測影像旋轉進行自監督學習

這是2018年ICLR發表的一篇論文，被參考超過1100次，論文的想法來源于：如果某人不了解影像中描繪的物件的概念，則他無法識別應用于影像的旋轉，

在這篇文章中，我們回顧了巴黎科技大學(University Paris-Est)通過預測影像旋轉進行的無監督表示學習，使用RotNet通過訓練ConvNets來學習影像特征，以識別應用于作為輸入的影像的2d旋轉，通過這種方法，無監督的預訓練AlexNet模型達到了54.4%的mAP，僅比有監督的AlexNet低2.4點，

影像旋轉預測框架

在這里插入圖片描述

給定四種可能的幾何變換，即0、90、180和270度旋轉，卷積網路模型F(:)被訓練來識別輸入的影像應用了哪個旋轉，

Fy(Xy) 是模型 F(:) 預測的旋轉變換 y 的概率，它的輸入是一個已經被旋轉變換的影像，輸出圖片的旋轉角度，

為了成功地預測影像的旋轉，ConvNet模型必須學習定位影像中的顯著目標，識別它們的方向和物件型別，然后將物件方向與原始影像進行關聯，
在這里插入圖片描述
由經過訓練的 AlexNet 模型生成的注意力圖（a）識別物件（監督）和（b）識別影像旋轉（自監督），

上述注意力圖是根據卷積層的每個空間單元的激活幅度計算的，本質上反映了網路將大部分焦點放在何處以對輸入影像進行分類，

途中可以看到，監督模型和自監督模型似乎都關注大致相同的影像區域，

旋轉拖動驗證碼解決方案

曾幾何時，你是否被一個旋轉驗證碼而困擾，沒錯今日主題——旋轉驗證碼，

在這里插入圖片描述
當進行模擬登錄時，圖片驗證碼是一大難點，

不過有了RotNet，這一問題便迎刃而解旋轉拖動驗證碼解決方案，

兩種思路

影像旋轉考慮兩種思路：回歸與分類

回歸：預測數值結果范圍是0-360°.
分類：預測360個類別，模型預測輸出哪個類別的概率最大.

定義卷積神經網路訓練旋轉圖片集，進行預測圖片旋轉的角度，

大資料應用賽

大資料應用賽：計算機視覺在眾多的AI中應用廣泛，比如自動駕駛、視覺導航、目標檢測、目標識別等等，無一不關系到計算機視覺，而影像技術往往能幫助計算機視覺得到提升，比如隨機剪裁、隨機旋轉、影像模糊等等影像手段，影像技術對計算機視覺的重要性則不言而喻，故本次大資料應用賽的賽題為影像扶正挑戰，
在這里插入圖片描述

在這里插入圖片描述

卷積神經網路

分類代碼：

# number of convolutional filters to use
nb_filters = 64
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)
# number of classes
nb_classes = 360

# model definition
input = Input(shape=(img_rows, img_cols, img_channels))
x = Conv2D(nb_filters, kernel_size, activation='relu')(input)
x = Conv2D(nb_filters, kernel_size, activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.25)(x)
x = Dense(nb_classes, activation='softmax')(x)

model = Model(inputs=input, outputs=x)

model.summary()

模型編譯

# model compilation
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=[angle_error])

訓練引數

# training parameters
batch_size = 128
nb_epoch = 50

回呼

# callbacks
checkpointer = ModelCheckpoint(
    filepath=os.path.join(output_folder, model_name + '.hdf5'),
    save_best_only=True
)
early_stopping = EarlyStopping(patience=2)
tensorboard = TensorBoard()

模型訓練

# training loop
model.fit_generator(
    RotNetDataGenerator(
        X_train,
        batch_size=batch_size,
        preprocess_func=binarize_images,
        shuffle=True
    ),
    steps_per_epoch=nb_train_samples / batch_size,
    epochs=nb_epoch,
    validation_data=RotNetDataGenerator(
        X_test,
        batch_size=batch_size,
        preprocess_func=binarize_images
    ),
    validation_steps=nb_test_samples / batch_size,
    verbose=1,
    callbacks=[checkpointer, early_stopping, tensorboard]
)

完整代碼

"""
@Author: ZS
@CSDN  : https://zsyll.blog.csdn.net/
@Time  : 2021/11/20 10:48
"""
from __future__ import print_function

import os
import sys

from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
from keras.applications.resnet50 import ResNet50
from keras.applications.imagenet_utils import preprocess_input
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import SGD

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils import angle_error, RotNetDataGenerator
from getImagePath import getPath

data_path = r'./data/image/'
train_filenames, test_filenames = getPath(data_path)

print(len(train_filenames), 'train samples')
print(len(test_filenames), 'test samples')

model_name = 'rotnet_resnet50'

# 分類數量
nb_classes = 360
# input image shape
input_shape = (320, 320, 3)

# 加載基礎模型
base_model = ResNet50(weights='imagenet', include_top=False,
                      input_shape=input_shape)

# 添加分類層
x = base_model.output
x = Flatten()(x)
final_output = Dense(nb_classes, activation='softmax', name='fc360')(x)

# 創建新的模型
model = Model(inputs=base_model.input, outputs=final_output)

model.summary()

# 模型編譯
model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=0.01, momentum=0.9),
              metrics=[angle_error])

# 訓練引數
batch_size = 64
nb_epoch = 20

output_folder = 'models'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# callbacks
monitor = 'val_angle_error'
checkpointer = ModelCheckpoint(
    filepath=os.path.join(output_folder, model_name + '.hdf5'),
    monitor=monitor,
    save_best_only=True
)

reduce_lr = ReduceLROnPlateau(monitor=monitor, patience=3)
early_stopping = EarlyStopping(monitor=monitor, patience=5)
tensorboard = TensorBoard()

# 訓練模型
model.fit_generator(
    RotNetDataGenerator(
        train_filenames,
        input_shape=input_shape,
        batch_size=batch_size,
        preprocess_func=preprocess_input,
        crop_center=True,
        crop_largest_rect=True,
        shuffle=True
    ),
    steps_per_epoch=len(train_filenames) / batch_size,
    epochs=nb_epoch,
    validation_data=RotNetDataGenerator(
        test_filenames,
        input_shape=input_shape,
        batch_size=batch_size,
        preprocess_func=preprocess_input,
        crop_center=True,
        crop_largest_rect=True
    ),
    validation_steps=len(test_filenames) / batch_size,
    callbacks=[checkpointer, reduce_lr, early_stopping, tensorboard],
    workers=10
)

模型呼叫

# import區域，sys為必須匯入，其他根據需求匯入
from __future__ import print_function
import os
import sys
import random
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
import tensorflow.keras as keras



import matplotlib.pyplot as plt
from mykeras.applications.imagenet_utils import preprocess_input
from mykeras.models import load_model
from utils import display_examples, RotNetDataGenerator, angle_error
import warnings
warnings.filterwarnings("ignore")
from tensorflow.keras import layers

# 代碼區，根據需求寫
class FileSequence(keras.utils.Sequence):
    def __init__(self,filenames,batch_size,filefunc,fileargs=(),labels=None,labelfunc=None,labelargs=(),shuffle=False):
        if labels: assert len(filenames) == len(labels)
        self.filenames  = filenames
        self.batch_size = batch_size
        self.filefunc   = filefunc
        self.fileargs   = fileargs
        self.labels     = labels
        self.labelfunc  = labelfunc
        self.labelargs  = labelargs  
        if shuffle:
            idx_list = list(range(len(self.filenames)))
            random.shuffle(idx_list)
            self.filenames = [self.filenames[idx] for idx in idx_list]
            if self.labels: self.labels = [self.labels[idx] for idx in idx_list]

    def __len__(self):
        return int(np.ceil(len(self.filenames) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_filenames = self.filenames[idx * self.batch_size: (idx+1) * self.batch_size]
        
        files = []
        for filename in batch_filenames:
            # tf.print(filename)
            file = self.filefunc(filename,*self.fileargs)
            files.append(file)
        if self.labels:
            batch_labels = self.labels[idx * self.batch_size: (idx+1) * self.batch_size]
            if self.labelfunc:
                return np.array(files), self.labelfunc(batch_labels,*self.labelargs)
            else:
                return np.array(files), batch_labels
        else:
            return np.array(files)

def fillWhite(img,size,mode=None):
    if len(img.shape) == 2: img = img.reshape(*img.shape,-1)
    assert len(img.shape) == 3
    h, w, c = img.shape
    assert (h < size) and (w < size)
    fillImg = np.zeros(shape=(size,size,c))
    if mode == "random":
        sh = random.randint(0,size-h)
        sw = random.randint(0,size-w)
        fillImg[sh:sh+h,sw:sw+w,...] = img
    elif mode == "centre" or mode == "center":
        fillImg[(size-h)//2:(size+h)//2,(size-w)//2:(size+w)//2,...] = img
    else:
        fillImg[:h,:w,...] = img
    return fillImg

def cropImg(img,size,mode=None):
    if len(img.shape) == 2: img = img.reshape(*img.shape,-1)
    assert len(img.shape) == 3
    h, w, c = img.shape
    assert (h >= size) and (w >= size)
    if mode == "random":
        sh = random.randint(0,h-size)
        sw = random.randint(0,w-size)
        cropImg = img[sh:sh+size,sw:sw+size,...]
    elif mode == "centre" or mode == "center":
        cropImg = img[(h-size)//2:(h+size)//2,(w-size)//2:(w+size)//2,...]
    else:
        cropImg = img[:size,:size,...]
    return cropImg

def fillCrop(img,size,mode=None):
    if len(img.shape) == 2: img = img.reshape(*img.shape,-1)
    assert len(img.shape) == 3
    h, w, c = img.shape
    assert ((h >= size) and (w < size)) or ((h < size) and (w >= size))
    fillcropImg = np.zeros(shape=(size,size,c))
    if mode == "random":
        if (h >= size) and (w < size):
            sh = random.randint(0,h-size)
            sw = random.randint(0,size-w)
            fillcropImg[:,sw:sw+w,:] = img[sh:sh+size,...]
        else:
            sh = random.randint(0,size-h)
            sw = random.randint(0,w-size)
            fillcropImg[sh:sh+h,...] = img[:,sw:sw+size,:]
    elif mode == "centre" or mode == "center":
        if (h >= size) and (w < size):
            fillcropImg[:,(size-w)//2:(size+w)//2,:] = img[(h-size)//2:(h+size)//2,...]
        else:
            fillcropImg[(size-h)//2:(size+h)//2,...] = img[:,(w-size)//2:(w+size)//2,:]
    else:
        if (h >= size) and (w < size):
            fillcropImg[:,:size,:] = img[:size,...]
        else:
            fillcropImg[:size,...] = img[:,:size,:]
    return fillcropImg

def resizeImg(img,size,mode=None):
    if len(img.shape) == 2: img = img.reshape(*img.shape,-1)
    assert len(img.shape) == 3
    h, w, c = img.shape
    if (h < size) and (w < size): return fillWhite(img,size,mode)
    elif (h >= size) and (w >= size): return cropImg(img,size,mode)
    else: return fillCrop(img,size,mode)

def filefunc(filename,mode):
    tf.print(filename)
    img = cv2.imread(filename)
    if not isinstance(img,np.ndarray):
        tf.print(filename)
    h, w, c = img.shape
    if (h >=256) or (w >= 256):
        img = resizeImg(img,256,mode)
        img = cv2.resize(img,(64,64))
    elif (h >=128) or (w >= 128):
        img = resizeImg(img,128,mode)
        img = cv2.resize(img,(64,64))
    else:
        img = resizeImg(img,64,mode)
    return img    

# 主函式，格式固定，to_pred_dir為預測所在檔案夾，result_save_path為預測結果生成路徑
# 以下為示例
def main(to_pred_dir, result_save_path):
    runpyp = os.path.abspath(__file__)
    modeldirp = os.path.dirname(runpyp)
    modelp = os.path.join(modeldirp,"model.hdf5")
    model = load_model(modelp, custom_objects={'angle_error': angle_error})  # 自定義物件

    pred_imgs = os.listdir(to_pred_dir)
    pred_imgsp_lines = [os.path.join(to_pred_dir,p) for p in pred_imgs]

    name, label = display_examples(
        model,
        pred_imgsp_lines,
        num_images=len(pred_imgsp_lines),
        size=(224, 224),
        crop_center=True,
        crop_largest_rect=True,
        preprocess_func=preprocess_input,
    )

    
    df = pd.DataFrame({"id":name,"label":label})
    df.to_csv(result_save_path,index=None)

# ！！！注意：
# 圖片賽題給出的引數為to_pred_dir,是一個檔案夾，其圖片內容為
# to_pred_dir/to_pred_0.png
# to_pred_dir/to_pred_1.png
# to_pred_dir/......
# 所需要生成的csv檔案頭為id,label,如下
# image_id,label
# to_pred_0,4
# to_pred_1,76
# to_pred_2,...

if __name__ == "__main__":
    to_pred_dir = sys.argv[1]  # 所需預測的檔案夾路徑
    result_save_path = sys.argv[2]  # 預測結果保存檔案路徑
    main(to_pred_dir, result_save_path)

參考：Link

加油!

感謝!

努力!

轉載請註明出處，本文鏈接：https://www.uj5u.com/qita/392283.html

標籤：其他

上一篇：深度學習從入門到精通——影像分割之DeepLab系列演算法

下一篇：頭歌平臺-人臉識別系統——Dlib人臉特征提取

【RotNet 自監督學習】預測影像旋轉角度