IP Camera Capture RTSP流大延迟OPENCV

如何解决IP Camera Capture RTSP流大延迟OPENCV

我正在尝试在IP摄像机上进行一些处理，并且效果很好，但是我发现现实世界和视频捕获之间存在大约7到10秒的延迟。

我正在使用rtsp：// @ ip：port / live ext

本相机具有Web界面（IE / ActiveX），可以以非常低的延迟显示图像。（大约200到300毫秒）。

当我将视频输入到那里时，我测试了此code，它在没有延迟的情况下运行良好，并且当我将我的摄像机IP或摄像机无人机与RTSP协议配合使用时，软件的延迟时间为7〜10s。

注意：我将分辨率设置为（1080,720），并且我使用了NVIDIA Qaudro1000 GPU，它运行良好，这就是我认为问题不在于处理或硬件，而在于代码。

edit：它可能与VideoCapture缓冲区有关。有没有办法让它使用最新的图像？

edit2：我在VLC上获得了很好的滞后效果，只是延迟300ms

谢谢！

您可以在下面看到我使用的代码：

import cv2
import time

import argparse
import numpy as np
from PIL import Image
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bBox
from utils.nms import single_class_non_max_suppression
from load_model.pytorch_loader import load_pytorch_model,pytorch_inference

# model = load_pytorch_model('models/face_mask_detection.pth');
model = load_pytorch_model('models/model360.pth');
# anchor configuration
#feature_map_sizes = [[33,33],[17,17],[9,9],[5,5],[3,3]]
feature_map_sizes = [[45,45],[23,23],[12,12],[6,6],[4,4]]
anchor_sizes = [[0.04,0.056],[0.08,0.11],[0.16,0.22],[0.32,0.45],[0.64,0.72]]
anchor_ratios = [[1,0.62,0.42]] * 5

# generate anchors
anchors = generate_anchors(feature_map_sizes,anchor_sizes,anchor_ratios)

# for inference,the batch size is 1,the model output shape is [1,N,4],# so we expand dim for anchors to [1,anchor_num,4]
anchors_exp = np.expand_dims(anchors,axis=0)

id2class = {0: 'Mask',1: 'NoMask'}


def inference(image,conf_thresh=0.5,IoU_thresh=0.4,target_shape=(160,160),draw_result=True,show_result=True
              ):
    '''
    Main function of detection inference
    :param image: 3D numpy array of image
    :param conf_thresh: the min threshold of classification probabity.
    :param IoU_thresh: the IoU threshold of NMS
    :param target_shape: the model input size.
    :param draw_result: whether to daw bounding Box to the image.
    :param show_result: whether to display the image.
    :return:
    '''
    # image = np.copy(image)
    output_info = []
    height,width,_ = image.shape
    image_resized = cv2.resize(image,target_shape)
    image_np = image_resized / 255.0  # 归一化到0~1
    image_exp = np.expand_dims(image_np,axis=0)

    image_transposed = image_exp.transpose((0,3,1,2))

    y_bBoxes_output,y_cls_output = pytorch_inference(model,image_transposed)
    # remove the batch dimension,for batch is always 1 for inference.
    y_bBoxes = decode_bBox(anchors_exp,y_bBoxes_output)[0]
    y_cls = y_cls_output[0]
    # To speed up,do single class NMS,not multiple classes NMS.
    bBox_max_scores = np.max(y_cls,axis=1)
    bBox_max_score_classes = np.argmax(y_cls,axis=1)

    # keep_idx is the alive bounding Box after nms.
    keep_idxs = single_class_non_max_suppression(y_bBoxes,bBox_max_scores,conf_thresh=conf_thresh,IoU_thresh=IoU_thresh,)

    for idx in keep_idxs:
        conf = float(bBox_max_scores[idx])
        class_id = bBox_max_score_classes[idx]
        bBox = y_bBoxes[idx]
        # clip the coordinate,avoid the value exceed the image boundary.
        xmin = max(0,int(bBox[0] * width))
        ymin = max(0,int(bBox[1] * height))
        xmax = min(int(bBox[2] * width),width)
        ymax = min(int(bBox[3] * height),height)

        if draw_result:
            if class_id == 0:
                color = (0,255,0)
            else:
                color = (255,0)
            cv2.rectangle(image,(xmin,ymin),(xmax,ymax),color,2)
            cv2.putText(image,"%s: %.2f" % (id2class[class_id],conf),(xmin + 2,ymin - 2),cv2.FONT_HERShey_SIMPLEX,0.8,color)
        output_info.append([class_id,conf,xmin,ymin,xmax,ymax])

    if show_result:
        Image.fromarray(image).show()
    return output_info


def run_on_video(video_path,output_video_name,conf_thresh):
    cap = cv2.VideoCapture(video_path)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # writer = cv2.VideoWriter(output_video_name,fourcc,int(fps),(int(width),int(height)))
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    if not cap.isOpened():
        raise ValueError("Video open Failed.")
        return
    status = True
    idx = 0
    while status:
        start_stamp = time.time()
        status,img_raw = cap.read()
        img_raw = cv2.cvtColor(img_raw,cv2.COLOR_BGR2RGB)
        read_frame_stamp = time.time()
        if (status):
            inference(img_raw,conf_thresh,IoU_thresh=0.5,target_shape=(360,360),show_result=False)
            cv2.imshow('image',img_raw[:,:,::-1])
            cv2.waitKey(1)
            inference_stamp = time.time()
            # writer.write(img_raw)
            write_frame_stamp = time.time()
            idx += 1
            print("%d of %d" % (idx,total_frames))
            print("read_frame:%f,infer time:%f,write time:%f" % (read_frame_stamp - start_stamp,inference_stamp - read_frame_stamp,write_frame_stamp - inference_stamp))
    # writer.release()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Face Mask Detection")
    parser.add_argument('--img-mode',type=int,default=1,help='set 1 to run on image,0 to run on video.')
    parser.add_argument('--img-path',type=str,help='path to your image.')
    parser.add_argument('--video-path',default='0',help='path to your video,`0` means to use camera.')
    # parser.add_argument('--hdf5',help='keras hdf5 file')
    args = parser.parse_args()
    if args.img_mode:
        imgPath = args.img_path
        img = cv2.imread(imgPath)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        inference(img,show_result=True,360))
    else:
        video_path = args.video_path
        if args.video_path == '0':
            video_path = 0
        run_on_video(video_path,'',conf_thresh=0.5)

我不知道为什么OpenCV这么慢。我喜欢一些技巧来加快捕获速度。

解决方法

问题出在Opencv RTSP流实现中。

要从流中删除一个源，您需要初始化编解码器并向其提供几个压缩帧数据包。编解码器内部有一个帧缓冲区。它用作FIFO（先输入后输出）。您调用avcodec_send_packet（），然后调用avcodec_receive_frame（）。返回的帧被包装到mat对象中并返回给您。首先，几个数据包会初始化缓冲区，并且不会生成任何图片。

（更多信息，请点击https://ffmpeg.org/doxygen/3.3/group__lavc__encdec.html）

不要期望在python的opencv上使用RTSP具有低延迟。在我看来，减少延迟的唯一方法是使用FFMPEG示例并将其重写为c ++。

增加I帧数量可能会有所帮助（破坏者：不多） ps。我使用RTSP流的一些示例：https://www.youtube.com/channel/UCOK7D73tj7Dl4ZyXE-J0UNA