1. 案例結構目錄

在這里插入圖片描述

2. 案例效果演示

在這里插入圖片描述

3. 步驟分析

配置獲取相關預測資料類別，網路引數
獲取攝像頭視頻
獲取攝像每幀資料，進行格式形狀處理
模型預測、結果NMS過濾
畫圖：顯示物體位置，FPS值（每秒幀數）

4. 代碼實作

匯入相關包

import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image
import pickle
import numpy as np
from random import shuffle
from scipy.misc import imread, imresize
from timeit import default_timer as timer

from utils.ssd_utils import BBoxUtility

1、配置獲取相關預測資料類別，網路引數

class VideoTag(object):
    """
    """

    def __init__(self, class_names, model, input_shape):
        # 獲取
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)

2、獲取攝像頭視頻

def run(self, video_path=0, conf_thresh=0.6):
     """運行測驗
     """

     vid = cv2.VideoCapture(video_path)
     if not vid.isOpened():
         raise IOError(("找不到對應的視頻或者攝像頭"))

3、獲取攝像每幀資料，進行格式形狀處理

# 獲取視頻或者攝像頭內容
while True:
    retval, orig_image = cap.read()
    if not retval:
        print("視頻檢測結束!")
        return
    source_image = np.copy(orig_image)

    # 進行輸入每幀資料形狀修改以及圖片的格式修改BGR--->RGB
    im_size = (self.input_shape[0], self.input_shape[1])
    resized = cv2.resize(orig_image, im_size)
    rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

    # 將資料轉換成原始需要畫出的圖片
    to_draw = cv2.resize(resized, (int(source_image.shape[1]), int(source_image.shape[0])))

4、模型預測、結果NMS過濾

# 使用模型進行每幀資料預測
inputs = [image.img_to_array(rgb)]
tmp_inp = np.array(inputs)
x = preprocess_input(tmp_inp)

y = self.model.predict(x)

# 對預測結果進行NMS過濾
results = self.bbox_util.detection_out(y)

5、畫圖顯示
- 畫出物體位置，給定固定閾值

# 畫圖顯示
if len(results) > 0 and len(results[0]) > 0:
    # 獲取每個框的位置以及類別概率
    det_label = results[0][:, 0]
    det_conf = results[0][:, 1]
    det_xmin = results[0][:, 2]
    det_ymin = results[0][:, 3]
    det_xmax = results[0][:, 4]
    det_ymax = results[0][:, 5]
    # 過濾概率小的
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]
    top_ymax = det_ymax[top_indices]

    for i in range(top_conf.shape[0]):
        xmin = int(round(top_xmin[i] * to_draw.shape[1]))
        ymin = int(round(top_ymin[i] * to_draw.shape[0]))
        xmax = int(round(top_xmax[i] * to_draw.shape[1]))
        ymax = int(round(top_ymax[i] * to_draw.shape[0]))

        # 對于四個坐標物體框進行畫圖顯示
        class_num = int(top_label_indices[i])
        cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                      self.class_colors[class_num], 2)
        text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])

        # 文本框進行設定顯示
        text_top = (xmin, ymin - 10)
        text_bot = (xmin + 80, ymin + 5)
        text_pos = (xmin + 5, ymin)
        cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
        cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)

顯示FPS引數

# 計算 FPS顯示
fps = "FPS: " + str(cap.get(cv2.CAP_PROP_FPS))

# 畫出FPS
cv2.rectangle(to_draw, (0, 0), (50, 17), (255, 255, 255), -1)
cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)

顯示圖片

	# 顯示圖片
    cv2.imshow("SSD result", to_draw)
    cv2.waitKey(1)
# 釋放capture資源
cap.release()
cv2.destroyAllWindows()

5. 呼叫視頻預測

import sys
import keras
from utils.tag_video import VideoTag
from nets.ssd_net import SSD300


def main():

    input_shape = (300, 300, 3)

    # 資料集的配置
    class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
                   "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
                   "tvmonitor"]

    NUM_CLASSES = len(class_names)

    model = SSD300(input_shape, num_classes=NUM_CLASSES)

    # 加載模型
    model.load_weights('./ckpt/pre_trained/weights_SSD300.hdf5')

    vid_test = VideoTag(class_names, model, input_shape)

    vid_test.run(0)


if __name__ == '__main__':
    main()

6. 完整代碼

"""
配置獲取相關預測資料類別，網路引數
獲取攝像頭視頻
獲取攝像每幀資料，進行格式形狀處理
模型預測、結果NMS過濾
畫圖：顯示物體位置，FPS值（每秒幀數）
"""
from tensorflow.python.keras.preprocessing.image import img_to_array
from tensorflow.python.keras.applications.imagenet_utils import preprocess_input
from computerVision.utils.ssd_utils import BBoxUtility
import cv2
import numpy as np

class VideoTag(object):
    def __init__(self, model, input_shape, num_classes):
        self.model = model
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.bbox_util = BBoxUtility(num_classes=self.num_classes)
        self.class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
                   "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
                   "tvmonitor"]

        # 創建不同類別20種顯示的顏色型別
        self.class_colors = []
        for i in range(0, self.num_classes):
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype('uint8')
            col[0][0][0] = hue
            col[0][0][1] = 128
            col[0][0][2] = 255
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col)

    def run(self, file_path, conf_thresh=0.6):
        """
        運行捕捉攝像頭，每一幀圖片資料，進行預測，標記顯示
        :return:
        """
        # 獲取攝像頭視頻
        cap = cv2.VideoCapture(file_path)
        if not cap.isOpened():
            raise IOError(('打開本地視頻或者攝像頭失敗！'))

        # 獲取攝像每幀資料，進行格式形狀處理
        while True:
            ret, orig_image = cap.read()
            if not ret:
                print('視頻檢測結束!')
                return

            # 對每一幀視頻中的圖片或者攝像頭捕捉的圖片，進行大小改變（300，300）
            # BGR-->RGB
            source_image = np.copy(orig_image)
            resized = cv2.resize(orig_image, (self.input_shape[0], self.input_shape[1]))
            # 顏色改變
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            # 保留原始圖片資料
            # print(source_image.shape)
            to_draw = cv2.resize(resized, (int(source_image.shape[1]), int(source_image.shape[0])))

            # 模型預測、結果NMS過濾
            # 3維-->4維：preprocess_input
            inputs = [img_to_array(rgb)]
            x = preprocess_input(np.array(inputs))
            y = self.model.predict(x)

            # 進行NMS過濾
            results = self.bbox_util.detection_out(y)
            print(results[0].shape)

            # 畫圖：顯示物體位置，FPS值（每秒幀數）
            if len(results) > 0 and len(results[0]) > 0:
                # 獲取每個框的位置以及類別概率
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                # 過濾概率小的下標
                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()  # 這些方框中有哪些類別（下標）
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):  # 讀取圖片中物體數
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[1]))

                    class_num = int(top_label_indices[i])
                    print('該幀圖片檢測到{}物體，索引為{}'.format(i, class_num))

                    # 畫出這一幀中所有物體框的位置
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), self.class_colors[class_num], 2)

                    # 畫出文本框
                    text = self.class_names[class_num] + ' ' + ('%.2f' % (top_conf[i]))
                    # 矩形框
                    text_top = (xmin - 1, ymin - 10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)

                    cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)  # 矩形填充
                    cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1, cv2.LINE_AA)

            # 計算 FPS顯示
            fps = 'FPS: ' + str(cap.get(cv2.CAP_PROP_FPS))

            # 畫出FPS
            cv2.rectangle(to_draw, (0, 0), (58, 12), (255, 255, 255), -1)
            cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)

            # 顯示當前圖片
            cv2.imshow('SSD detector result', to_draw)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # 釋放資源關閉視窗
        cap.release()
        cv2.destroyAllWindows()

        return None


if __name__ == '__main__':
	input_shape = (300, 300, 3)
    # 資料集的配置
    class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
                   "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
                   "tvmonitor"]
    model = SSD300(input_shape, num_classes=len(class_names))
    # 加載已訓練好的模型
    model.load_weights("./ckpt/pre_trained/weights_SSD300.hdf5", by_name=True)

    vt = VideoTag(model, input_shape, len(class_names))  # len(class_names)：傳入類比數
    vt.run(0)

在這里插入圖片描述

轉載請註明出處，本文鏈接：https://www.uj5u.com/qita/294576.html

標籤：其他

上一篇：案例分享：Qt政務標簽設計器，標簽排版軟體定制與列印

下一篇：OpenCv實時設定攝像頭引數/獲得攝像頭引數值的方法論

【計算機視覺】案例：應用opencv+keras完成視頻物體檢測

案例：視頻物體檢測測驗

1. 案例結構目錄

2. 案例效果演示

3. 步驟分析

4. 代碼實作

5. 呼叫視頻預測

6. 完整代碼