cv.solvePnP對輸入不回應-有解無憂





# 3D model points.

MODEL_POINTS_68 = np.array([

     [-53.30021857,  -15.27635071,  60.43970857],

     [-52.85842643,   -1.810875,    59.18390214],

     [-51.25744357, 11.67454214,  58.42188571],

     [-48.626315,   24.63487214,  57.20655286],

     [-43.58355,    36.41352214,  53.98811286],

     [-35.42554,    46.354015,    48.35620143],

     [-25.24848857, 54.04395214,  39.57465214],

     [-13.64442429, 59.62245214,  28.96935357],

     [ -0.80573857, 61.05416214,  25.76168714],

     [ 11.60732,    59.40927786,  30.09232929],

     [ 22.44441643, 53.81683,     40.08901643],

     [ 32.24683929, 46.23280714,  48.37045   ],

     [ 40.083355,   36.30976214,  53.26906357],

     [ 45.11549929, 24.19315786,  55.59597786],

     [ 47.84661071, 11.00381857,  56.59250643],

     [ 49.47115857,   -2.32260857,  57.92187643],

     [ 50.10645643,  -15.67903571,  60.06320429],

     [-44.53299214,  -29.24790071,  36.57320143],

     [-37.510265,    -35.96803857,  31.583005  ],

     [-27.87970214,  -38.27258143,  26.70722214],

     [-18.03538357,  -37.58489857,  21.67254214],

     [ -9.18749714,  -34.48015571,  17.84333   ],

     [  7.73560929,  -34.98320857,  17.78346857],

     [ 17.05676643,  -38.20546071,  21.16640143],

     [ 26.50843214,  -38.83083714,  25.66079071],

     [ 35.68873143,  -36.34843286,  30.21249643],

     [ 42.01933429,  -29.83979571,  34.68843286],

     [ -0.40917357,  -24.128035,    16.83373214],

     [ -0.30141,     -16.12798571,  11.28371929],

     [ -0.16755643,   -8.23575357,   5.47430286],

     [  0.,          0.,          -0.        ],

     [-10.55176571,  1.85589643, -14.38000214],

     [ -5.12874214,  3.39399214, -16.81191714],

     [  0.39708571,  4.68778571, -18.53174857],

     [  5.90892786,  3.32928929, -16.92552929],

     [ 10.86739357,  1.88789,    -14.89868357],

     [-32.89092143,  -26.76529357,   5.027135  ],

     [-26.91049143,  -30.52179286,   2.15801214],

     [-19.91704,     -30.50822643,   0.96687786],

     [-14.03447714,  -26.25338714,  -0.07934857],

     [-20.194975,    -25.09606643,  -0.105195  ],

     [-27.20172714,  -24.94217357,   1.05472286],

     [ 13.76133429,  -26.45164714,  -0.47553286],

     [ 19.92442214,  -30.95888929,   0.1769    ],

     [ 26.74109214,  -30.79344429,   1.21173929],

     [ 32.26486071,  -27.20465357,   3.49583071],

     [ 27.28318143,  -25.38001714,   0.202115  ],

     [ 20.54642071,  -25.34592071,  -0.837625  ],

     [-20.65447643, 20.43765429,  -1.60022143],

     [-12.52371,    15.83727643, -11.38166786],

     [ -4.77470714, 13.59217929, -16.15096786],

     [  0.27214357, 14.80079857, -16.96316929],

     [  5.98245929, 13.59675714, -16.22999643],

     [ 13.48329857, 15.99579214, -11.150485  ],

     [ 20.56743714, 20.05708857,  -2.29813786],

     [ 13.61255286, 25.92732,    -10.70571214],

     [  6.39741071, 28.31041071, -16.110175  ],

     [  0.272535,   28.85403357, -16.85116143],

     [ -5.30635357, 28.454575,   -16.00436143],

     [-12.97188143, 26.19849929, -10.80136214],

     [-17.41249286, 20.48412214,  -3.41834571],

     [ -4.92688071, 18.19712571, -14.92410143],

     [  0.24333071, 18.58162071, -15.87177071],

     [  6.03194429, 18.09014143, -15.01822857],

     [ 17.48176643, 20.23072,     -4.08055429],

     [  6.03511857, 21.85444,    -14.76534929],

     [  0.14665857, 22.43481286, -15.64547857],

     [ -5.14161857, 22.03205429, -14.52001571]], dtype=float)



class PoseEstimator:

    """Estimate head pose according to the facial landmarks"""

 

    def __init__(self, img_size=(192, 256)):

        self.size = img_size    

        self.landmarks_general_68 = MODEL_POINTS_68

        # self.focal_length = 962

        self.focal_length = self.size[1]

        self.camera_center = (self.size[1] / 2, self.size[0] / 2)

        self.camera_matrix = np.array(

            [[self.focal_length, 0, self.camera_center[0]],

             [0, self.focal_length, self.camera_center[1]],

             [0, 0, 1]], dtype=float)

 

        # Assuming no lens distortion

        self.dist_coeefs = np.zeros((4, 1))

 

        # Rotation vector and translation vector; 原代碼給它設定了初值，不知道怎么來的？

        self.r_vec = np.array([[0], [0], [0]], dtype=float)

        self.t_vec = np.array([[0], [0], [0]], dtype=float)

 

 

    def get_euler_angle(self, rotation_vector):

        # calc rotation angles

        theta = cv2.norm(rotation_vector, cv2.NORM_L2)

 

        # transform to quaterniond

        w = math.cos(theta / 2)

        x = math.sin(theta / 2)*rotation_vector[0][0] / theta

        y = math.sin(theta / 2)*rotation_vector[1][0] / theta

        z = math.sin(theta / 2)*rotation_vector[2][0] / theta

 

        # pitch (x-axis rotation)

        t0 = 2.0 * (w*x + y*z)

        t1 = 1.0 - 2.0*(x**2 + y**2)

        pitch = math.atan2(t0, t1)

 

        # yaw (y-axis rotation)

        t2 = 2.0 * (w*y - z*x)

        if t2 > 1.0:

            t2 = 1.0

        if t2 < -1.0:

            t2 = -1.0

        yaw = math.asin(t2)

 

        # roll (z-axis rotation)

        t3 = 2.0 * (w*z + x*y)

        t4 = 1.0 - 2.0*(y**2 + z**2)

        roll = math.atan2(t3, t4)

 

        return pitch, yaw, roll



def solve_pose_by_68_points(self, image_points):

        print(self.landmarks_general_68.shape)

        print(image_points.shape)

        success, rotation_vector, translation_vector = cv2.solvePnP(

            self.landmarks_general_68,

            image_points,

            self.camera_matrix,

            self.dist_coeefs,

            rvec=self.r_vec,

            tvec=self.t_vec,

            flags=cv2.SOLVEPNP_ITERATIVE,

            useExtrinsicGuess=True)

            #flags=SOLVEPNP_EPNP)

        print(success)

        print(self.r_vec)

 

        return rotation_vector, translation_vector



if __name__ == "__main__":



    img = cv2.imread(IMAGE_PATH)

    img = cv2.resize(img, (192, 256))



    landmarks_load = json.load(open(LANDMARKS_ROOT, 'r'))

    landmarks_98 = get_landmarks(IMAGE_PATH, landmarks_load)

    landmarks_98 = np.array(landmarks_98, dtype=float)*4

    landmarks_68 = get_68from98(landmarks_98)



    pose_estimator = PoseEstimator(img_size=img.shape)

    rotation_vector, translation_vector = pose_estimator.solve_pose_by_68_points(landmarks_68)

    pitch, yaw, roll = pose_estimator.get_euler_angle(rotation_vector)



    def _radian2angle(r):

        return (r/math.pi)*180

    

    Y, X, Z = map(_radian2angle, [pitch, yaw, roll])

    line = 'Y:{:.1f}\nX:{:.1f}\nZ:{:.1f}'.format(Y,X,Z)

    print('{},{}'.format(os.path.basename(IMAGE_PATH), line.replace('\n',',')))

    print('yaw:{}, pitch:{}, roll:{}'.format(yaw, pitch, roll))

    print('Translation vector:{}'.format(translation_vector))

由于字數限制，其他繪圖和前處理函式就沒有貼出來了。
參考https://blog.csdn.net/ChuiGeDaQiQiu/article/details/88623267，我使用cv.solvePnP函式基于68點2D標準3D人臉，和98點預測到的2D關鍵點，估計人臉的姿態。
其中對標準模型進行了尺度縮放（到與圖片中平均人臉大小相近），坐標軸方向變換（x：左負右正，y：上負下正，z：遠正近負），坐標原點變換（點[30]，鼻尖點）；將98點2D點折算成了68點。焦距根據鏡頭引數進行了估算，其他相機內參都保留了原文的設定。
但是測驗時，不管輸入的是正面臉還是側面臉，輸出的結果都只有細微的變化，且明顯錯誤；嘗試更改焦距或者回傳原本的68點3D模型，結果會發生較大變化，但依然不會對實際輸入的變化起反應。
即便是直接使用68點3D模型中的xy維作為輸入，獲得的結果也不對。
感覺函式沒有起到預期的計算效果。請問是哪里出了問題？麻煩大佬幫忙看下~有用過這個函式的最好了，希望不是只有我一個碰到這個問題

轉載請註明出處，本文鏈接：https://www.uj5u.com/qita/43381.html

標籤：腳本語言(Perl/Python)

上一篇：[求助] UIAUTOMATION editcontrol寫入的檔案名被WIN10提示無效，但手動輸入同樣的可以

下一篇：關于區域函式參考外部函式中變數的問題