facenet tensorflow2.X人脸检测太慢了

如何解决facenet tensorflow2.X人脸检测太慢了

我在 this repository 上运行代码，运行检测功能时太慢了。

在实时视频中看起来不自然，并且视频被切断了。

我猜计算需要很长时间，我们如何解决？

GPU 使用 Geforce RTX 3090。

预先感谢您的帮助。

---------detect.py------------

import cv2 
import numpy as np
import mtcnn
from architecture import *
from train_v2 import normalize,l2_normalizer
from scipy.spatial.distance import cosine
from tensorflow.keras.models import load_model
import pickle
import time


confidence_t=0.99
recognition_t=0.5
required_size = (160,160)

def get_face(img,Box):
    x1,y1,width,height = Box
    x1,y1 = abs(x1),abs(y1)
    x2,y2 = x1 + width,y1 + height
    face = img[y1:y2,x1:x2]
    return face,(x1,y1),(x2,y2)

def get_encode(face_encoder,face,size):
    face = normalize(face)
    face = cv2.resize(face,size)
    encode = face_encoder.predict(np.expand_dims(face,axis=0))[0]
    return encode


def load_pickle(path):
    with open(path,'rb') as f:
        encoding_dict = pickle.load(f)
    return encoding_dict

def detect(img,detector,encoder,encoding_dict):
    img_rgb = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    results = detector.detect_faces(img_rgb)
    for res in results:
        if res['confidence'] < confidence_t:
            continue
        face,pt_1,pt_2 = get_face(img_rgb,res['Box'])
        encode = get_encode(encoder,required_size)
        encode = l2_normalizer.transform(encode.reshape(1,-1))[0]
        name = 'unkNown'

        distance = float("inf")
        for db_name,db_encode in encoding_dict.items():
            dist = cosine(db_encode,encode)
            if dist < recognition_t and dist < distance:
                name = db_name
                distance = dist

        if name == 'unkNown':
            cv2.rectangle(img,pt_2,(0,255),2)
            cv2.putText(img,name,cv2.FONT_HERShey_SIMPLEX,1,1)
        else:
            cv2.rectangle(img,255,0),name + f'__{distance:.2f}',(pt_1[0],pt_1[1] - 5),200,200),2)
    return img 



if __name__ == "__main__":
    required_shape = (160,160)
    face_encoder = InceptionresnetV2()
    path_m = "facenet_keras_weights.h5"
    face_encoder.load_weights(path_m)
    encodings_path = 'encodings/encodings.pkl'
    face_detector = mtcnn.MTCNN()
    encoding_dict = load_pickle(encodings_path)
   
    cap = cv2.VideoCapture("http://192.168.0.2:8081/?action=stream")


    prev_time = 0
    FPS = 10

    while cap.isOpened():
        ret,frame = cap.read()

        if not ret:
            print("CAM NOT OPEND") 
            break
        

        current_time = time.time() - prev_time

        if (ret is True) and (current_time > 1./ FPS) :
            prev_time = time.time()
            frame= detect(frame,face_detector,face_encoder,encoding_dict)
            print("detect")
            cv2.imshow('camera',frame)
            print("show")
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

-----------train_v2.py-----------

from architecture import * 
import os 
import cv2
import mtcnn
import pickle 
import numpy as np 
from sklearn.preprocessing import normalizer
from tensorflow.keras.models import load_model

######pathsandvairables#########
face_data = 'face/'
required_shape = (160,160)
face_encoder = InceptionresnetV2()
path = "facenet_keras_weights.h5"
face_encoder.load_weights(path)
face_detector = mtcnn.MTCNN()
encodes = []
encoding_dict = dict()
l2_normalizer = normalizer('l2')
###############################


def normalize(img):
    mean,std = img.mean(),img.std()
    return (img - mean) / std


for face_names in os.listdir(face_data):
    person_dir = os.path.join(face_data,face_names)

    for image_name in os.listdir(person_dir):
        image_path = os.path.join(person_dir,image_name)

        img_BGR = cv2.imread(image_path)
        img_RGB = cv2.cvtColor(img_BGR,cv2.COLOR_BGR2RGB)

        x = face_detector.detect_faces(img_RGB)
        x1,height = x[0]['Box']
        x1,abs(y1)
        x2,y2 = x1+width,y1+height
        face = img_RGB[y1:y2,x1:x2]
        
        face = normalize(face)
        face = cv2.resize(face,required_shape)
        face_d = np.expand_dims(face,axis=0)
        encode = face_encoder.predict(face_d)[0]
        encodes.append(encode)

    if encodes:
        encode = np.sum(encodes,axis=0 )
        encode = l2_normalizer.transform(np.expand_dims(encode,axis=0))[0]
        encoding_dict[face_names] = encode
    
path = 'encodings/encodings.pkl'
with open(path,'wb') as file:
    pickle.dump(encoding_dict,file)