我正在嘗試使用 Tensorflow 中制作的 ANN 分類模型進行預測,以使用 MediaPipe 對姿勢關鍵點進行分類。mediapipe 姿勢跟蹤器有 33 個 xy 和 z 坐標關鍵點,總共 99 個資料點。
我正在培訓 4 個班級。
這是運行姿勢嵌入
import mediapipe as mp
import numpy as np
import tensorflow as tf
from tensorflow import keras
mp_pose = mp.solutions.pose
def get_center_point(landmarks, left_bodypart, right_bodypart):
"""Calculates the center point of the two given landmarks."""
left = tf.gather(landmarks, left_bodypart.value, axis=1)
right = tf.gather(landmarks, right_bodypart.value, axis=1)
center = left * 0.5 right * 0.5
return center
def get_pose_size(landmarks, torso_size_multiplier=2.5):
"""Calculates pose size.
It is the maximum of two values:
* Torso size multiplied by `torso_size_multiplier`
* Maximum distance from pose center to any pose landmark
"""
# Hips center
hips_center = get_center_point(landmarks, mp_pose.PoseLandmark.LEFT_HIP,
mp_pose.PoseLandmark.RIGHT_HIP)
# Shoulders center
shoulders_center = get_center_point(landmarks,mp_pose.PoseLandmark.LEFT_SHOULDER,
mp_pose.PoseLandmark.RIGHT_SHOULDER)
# Torso size as the minimum body size
torso_size = tf.linalg.norm(shoulders_center - hips_center)
# Pose center
pose_center_new = get_center_point(landmarks,mp_pose.PoseLandmark.LEFT_HIP,
mp_pose.PoseLandmark.RIGHT_HIP)
pose_center_new = tf.expand_dims(pose_center_new, axis=1)
# Broadcast the pose center to the same size as the landmark vector to
# perform substraction
pose_center_new = tf.broadcast_to(pose_center_new,
[tf.size(landmarks) // (33*3), 33, 3])
# Dist to pose center
d = tf.gather(landmarks - pose_center_new, 0, axis=0,
name="dist_to_pose_center")
# Max dist to pose center
max_dist = tf.reduce_max(tf.linalg.norm(d, axis=0))
# Normalize scale
pose_size = tf.maximum(torso_size * torso_size_multiplier, max_dist)
return pose_size
def normalize_pose_landmarks(landmarks):
"""Normalizes the landmarks translation by moving the pose center to (0,0) and
scaling it to a constant pose size.
"""
# Move landmarks so that the pose center becomes (0,0)
pose_center = get_center_point(landmarks, mp_pose.PoseLandmark.LEFT_HIP,
mp_pose.PoseLandmark.RIGHT_HIP)
pose_center = tf.expand_dims(pose_center, axis=1)
# Broadcast the pose center to the same size as the landmark vector to perform
# substraction
pose_center = tf.broadcast_to(pose_center,
[tf.size(landmarks) // (33*3), 33, 3])
landmarks = landmarks - pose_center
# Scale the landmarks to a constant pose size
pose_size = get_pose_size(landmarks)
landmarks /= pose_size
return landmarks
def landmarks_to_embedding(landmarks_and_scores):
"""Converts the input landmarks into a pose embedding."""
# Reshape the flat input into a matrix with shape=(33, 3)
reshaped_inputs = keras.layers.Reshape((33, 3))(landmarks_and_scores)
# Normalize landmarks 3D
landmarks = normalize_pose_landmarks(reshaped_inputs[:, :, :3])
# Flatten the normalized landmark coordinates into a vector
embedding = keras.layers.Flatten()(landmarks)
return embedding
然后我創建模型并將嵌入輸入提供給它
import csv
import cv2
import itertools
import numpy as np
import pandas as pd
import os
import sys
import tempfile
import tqdm
import mediapipe as mp
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from poseEmbedding import get_center_point, get_pose_size, normalize_pose_landmarks, landmarks_to_embedding
def load_pose_landmarks(csv_path):
#load CSV file
dataframe = pd.read_csv(csv_path)
df_to_process = dataframe.copy()
#extract the list of class names
classes = df_to_process.pop('class_name').unique()
#extract the labels
y = df_to_process.pop('class_no')
#convert the input features and labels into float64 format for training
X = df_to_process.astype('float64')
y = keras.utils.to_categorical(y)
return X,y, classes, dataframe
csvs_out_train_path = 'train_data.csv'
csvs_out_test_path = 'test_data.csv'
#Load training data
X, y, class_names, _ = load_pose_landmarks(csvs_out_train_path)
#split training data(X,y) into (X_train, y_train) and (X_val, y_val)
X_train, X_val, y_train, y_val = train_test_split(X,y, test_size=0.15)
X_test, y_test, _, df_test = load_pose_landmarks(csvs_out_test_path)
mp_pose = mp.solutions.pose
inputs = tf.keras.Input(shape=(99))
embedding = landmarks_to_embedding(inputs)
layer = keras.layers.Dense(128, activation=tf.nn.relu6)(embedding)
layer = keras.layers.Dropout(0.5)(layer)
layer = keras.layers.Dense(64, activation=tf.nn.relu6)(layer)
layer = keras.layers.Dropout(0.5)(layer)
outputs = keras.layers.Dense(4, activation="softmax")(layer)
model = keras.Model(inputs, outputs)
#model.summary()
model.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics=['accuracy']
)
# Start training
history = model.fit(X_train, y_train,
epochs=200,
batch_size=16,
validation_data=(X_val, y_val))
model.save("complete_epoch_model")
# Visualize the training history to see whether you're overfitting.
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['TRAIN', 'VAL'], loc='lower right')
plt.show()
loss, accuracy = model.evaluate(X_test, y_test)
模型摘要列印出來:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_18 (InputLayer) [(None, 99)] 0 []
reshape_17 (Reshape) (None, 33, 3) 0 ['input_18[0][0]']
tf.__operators__.getitem_10 (S (None, 33, 3) 0 ['reshape_17[0][0]']
licingOpLambda)
tf.compat.v1.gather_69 (TFOpLa (None, 3) 0 ['tf.__operators__.getitem_10[0][
mbda) 0]']
tf.compat.v1.gather_70 (TFOpLa (None, 3) 0 ['tf.__operators__.getitem_10[0][
mbda) 0]']
tf.math.multiply_69 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_69[0][0]']
a)
tf.math.multiply_70 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_70[0][0]']
a)
tf.__operators__.add_31 (TFOpL (None, 3) 0 ['tf.math.multiply_69[0][0]',
ambda) 'tf.math.multiply_70[0][0]']
tf.compat.v1.size_17 (TFOpLamb () 0 ['tf.__operators__.getitem_10[0][
da) 0]']
tf.expand_dims_17 (TFOpLambda) (None, 1, 3) 0 ['tf.__operators__.add_31[0][0]']
tf.compat.v1.floor_div_17 (TFO () 0 ['tf.compat.v1.size_17[0][0]']
pLambda)
tf.broadcast_to_17 (TFOpLambda (None, 33, 3) 0 ['tf.expand_dims_17[0][0]',
) 'tf.compat.v1.floor_div_17[0][0]
']
tf.math.subtract_23 (TFOpLambd (None, 33, 3) 0 ['tf.__operators__.getitem_10[0][
a) 0]',
'tf.broadcast_to_17[0][0]']
tf.compat.v1.gather_75 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']
mbda)
tf.compat.v1.gather_76 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']
mbda)
tf.math.multiply_75 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_75[0][0]']
a)
tf.math.multiply_76 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_76[0][0]']
a)
tf.__operators__.add_34 (TFOpL (None, 3) 0 ['tf.math.multiply_75[0][0]',
ambda) 'tf.math.multiply_76[0][0]']
tf.compat.v1.size_18 (TFOpLamb () 0 ['tf.math.subtract_23[0][0]']
da)
tf.compat.v1.gather_73 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']
mbda)
tf.compat.v1.gather_74 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']
mbda)
tf.compat.v1.gather_71 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']
mbda)
tf.compat.v1.gather_72 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']
mbda)
tf.expand_dims_18 (TFOpLambda) (None, 1, 3) 0 ['tf.__operators__.add_34[0][0]']
tf.compat.v1.floor_div_18 (TFO () 0 ['tf.compat.v1.size_18[0][0]']
pLambda)
tf.math.multiply_73 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_73[0][0]']
a)
tf.math.multiply_74 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_74[0][0]']
a)
tf.math.multiply_71 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_71[0][0]']
a)
tf.math.multiply_72 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_72[0][0]']
a)
tf.broadcast_to_18 (TFOpLambda (None, 33, 3) 0 ['tf.expand_dims_18[0][0]',
) 'tf.compat.v1.floor_div_18[0][0]
']
tf.__operators__.add_33 (TFOpL (None, 3) 0 ['tf.math.multiply_73[0][0]',
ambda) 'tf.math.multiply_74[0][0]']
tf.__operators__.add_32 (TFOpL (None, 3) 0 ['tf.math.multiply_71[0][0]',
ambda) 'tf.math.multiply_72[0][0]']
tf.math.subtract_25 (TFOpLambd (None, 33, 3) 0 ['tf.math.subtract_23[0][0]',
a) 'tf.broadcast_to_18[0][0]']
tf.math.subtract_24 (TFOpLambd (None, 3) 0 ['tf.__operators__.add_33[0][0]',
a) 'tf.__operators__.add_32[0][0]']
tf.compat.v1.gather_77 (TFOpLa (33, 3) 0 ['tf.math.subtract_25[0][0]']
mbda)
tf.compat.v1.norm_14 (TFOpLamb () 0 ['tf.math.subtract_24[0][0]']
da)
tf.compat.v1.norm_15 (TFOpLamb (3,) 0 ['tf.compat.v1.gather_77[0][0]']
da)
tf.math.multiply_77 (TFOpLambd () 0 ['tf.compat.v1.norm_14[0][0]']
a)
tf.math.reduce_max_7 (TFOpLamb () 0 ['tf.compat.v1.norm_15[0][0]']
da)
tf.math.maximum_7 (TFOpLambda) () 0 ['tf.math.multiply_77[0][0]',
'tf.math.reduce_max_7[0][0]']
tf.math.truediv_7 (TFOpLambda) (None, 33, 3) 0 ['tf.math.subtract_23[0][0]',
'tf.math.maximum_7[0][0]']
flatten_7 (Flatten) (None, 99) 0 ['tf.math.truediv_7[0][0]']
dense_21 (Dense) (None, 128) 12800 ['flatten_7[0][0]']
dropout_14 (Dropout) (None, 128) 0 ['dense_21[0][0]']
dense_22 (Dense) (None, 64) 8256 ['dropout_14[0][0]']
dropout_15 (Dropout) (None, 64) 0 ['dense_22[0][0]']
dense_23 (Dense) (None, 4) 260 ['dropout_15[0][0]']
==================================================================================================
Total params: 21,316
Trainable params: 21,316
Non-trainable params: 0
__________________________________________________________________________________________________
現在,當我嘗試在我的網路攝像頭上運行推理時,我從 mediapipe 和 Tensorflow 收到以下錯誤:
ValueError: Input 0 is incompatible with layer model: expected shape=(None, 99), found shape=(None, 3)
我不知道如何解決這個錯誤,因為我只能用 99 的形狀進行訓練,因為 TF 在嘗試編譯時給了我使用 3 形狀的錯誤。我該如何解決?
這是我的推理代碼:
import cv2
import os
import tqdm
import numpy as np
import logging
from mediapipe.python.solutions import pose as mp_pose
from mediapipe.python.solutions import drawing_utils as mp_drawing
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.utils import CustomObjectScope
def relu6(x):
return K.relu(x, max_value=6)
logging.getLogger().setLevel(logging.CRITICAL)
cap = cv2.VideoCapture(0)
model = tf.keras.models.load_model('weights_best.hdf5', compile = True,
custom_objects = {"relu6": relu6})
with mp_pose.Pose() as pose_tracker:
while cap.isOpened():
# Get next frame of the video.
ret, frame = cap.read()
# Run pose tracker.
imagefirst = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = cv2.flip(imagefirst,1)
result = pose_tracker.process(image)
pose_landmarks = result.pose_landmarks
# Draw pose prediction.
if pose_landmarks is not None:
mp_drawing.draw_landmarks(
image,
landmark_list=pose_landmarks,
connections=mp_pose.POSE_CONNECTIONS)
if pose_landmarks is not None:
# Get landmarks.
frame_height, frame_width = frame.shape[0], frame.shape[1]
pose_landmarks = np.array([[lmk.x * frame_width, lmk.y * frame_height, lmk.z * frame_width]
for lmk in pose_landmarks.landmark], dtype=np.float32)
assert pose_landmarks.shape == (33, 3), 'Unexpected landmarks shape: {}'.format(pose_landmarks.shape)
prediction = model.predict(pose_landmarks)
# Save the output frame.
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imshow('Raw Webcam Feed', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
# Close output video.
cap.release()
cv2.destroyAllWindows()
# Release MediaPipe resources.
pose_tracker.close()
uj5u.com熱心網友回復:
也許嘗試在斷言之后和進行預測之前更改pose_landmarksfrom(33, 3)到(1, 99)的形狀:
import tensorflow as tf
pose_landmarks = tf.random.normal((33, 3))
assert pose_landmarks.shape == (33, 3), 'Unexpected landmarks shape: {}'.format(pose_landmarks.shape)
pose_landmarks = tf.expand_dims(pose_landmarks, axis=0)
shape = tf.shape(pose_landmarks)
pose_landmarks = tf.reshape(pose_landmarks, (shape[0], shape[1] * shape[2]))
tf.print(pose_landmarks.shape)
TensorShape([1, 99])
轉載請註明出處,本文鏈接:https://www.uj5u.com/qiye/361553.html
上一篇:OpenCVPythonCudaCascadeClassifier錯誤
下一篇:使用物件時如何洗掉串列中的此值
