如何解决IP Camera Capture RTSP流大延迟OPENCV
我正在尝试在IP摄像机上进行一些处理,并且效果很好,但是我发现现实世界和视频捕获之间存在大约7到10秒的延迟。
我正在使用rtsp:// @ ip:port / live ext
本相机具有Web界面(IE / ActiveX),可以以非常低的延迟显示图像。 (大约200到300毫秒)。
当我将视频输入到那里时,我测试了此code,它在没有延迟的情况下运行良好,并且当我将我的摄像机IP或摄像机无人机与RTSP协议配合使用时,软件的延迟时间为7〜10s。
注意:我将分辨率设置为(1080,720),并且我使用了NVIDIA Qaudro1000 GPU,它运行良好,这就是我认为问题不在于处理或硬件,而在于代码。
edit:它可能与VideoCapture缓冲区有关。有没有办法让它使用最新的图像?
edit2:我在VLC上获得了很好的滞后效果,只是延迟300ms
谢谢!
您可以在下面看到我使用的代码:
import cv2
import time
import argparse
import numpy as np
from PIL import Image
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bBox
from utils.nms import single_class_non_max_suppression
from load_model.pytorch_loader import load_pytorch_model,pytorch_inference
# model = load_pytorch_model('models/face_mask_detection.pth');
model = load_pytorch_model('models/model360.pth');
# anchor configuration
#feature_map_sizes = [[33,33],[17,17],[9,9],[5,5],[3,3]]
feature_map_sizes = [[45,45],[23,23],[12,12],[6,6],[4,4]]
anchor_sizes = [[0.04,0.056],[0.08,0.11],[0.16,0.22],[0.32,0.45],[0.64,0.72]]
anchor_ratios = [[1,0.62,0.42]] * 5
# generate anchors
anchors = generate_anchors(feature_map_sizes,anchor_sizes,anchor_ratios)
# for inference,the batch size is 1,the model output shape is [1,N,4],# so we expand dim for anchors to [1,anchor_num,4]
anchors_exp = np.expand_dims(anchors,axis=0)
id2class = {0: 'Mask',1: 'NoMask'}
def inference(image,conf_thresh=0.5,IoU_thresh=0.4,target_shape=(160,160),draw_result=True,show_result=True
):
'''
Main function of detection inference
:param image: 3D numpy array of image
:param conf_thresh: the min threshold of classification probabity.
:param IoU_thresh: the IoU threshold of NMS
:param target_shape: the model input size.
:param draw_result: whether to daw bounding Box to the image.
:param show_result: whether to display the image.
:return:
'''
# image = np.copy(image)
output_info = []
height,width,_ = image.shape
image_resized = cv2.resize(image,target_shape)
image_np = image_resized / 255.0 # 归一化到0~1
image_exp = np.expand_dims(image_np,axis=0)
image_transposed = image_exp.transpose((0,3,1,2))
y_bBoxes_output,y_cls_output = pytorch_inference(model,image_transposed)
# remove the batch dimension,for batch is always 1 for inference.
y_bBoxes = decode_bBox(anchors_exp,y_bBoxes_output)[0]
y_cls = y_cls_output[0]
# To speed up,do single class NMS,not multiple classes NMS.
bBox_max_scores = np.max(y_cls,axis=1)
bBox_max_score_classes = np.argmax(y_cls,axis=1)
# keep_idx is the alive bounding Box after nms.
keep_idxs = single_class_non_max_suppression(y_bBoxes,bBox_max_scores,conf_thresh=conf_thresh,IoU_thresh=IoU_thresh,)
for idx in keep_idxs:
conf = float(bBox_max_scores[idx])
class_id = bBox_max_score_classes[idx]
bBox = y_bBoxes[idx]
# clip the coordinate,avoid the value exceed the image boundary.
xmin = max(0,int(bBox[0] * width))
ymin = max(0,int(bBox[1] * height))
xmax = min(int(bBox[2] * width),width)
ymax = min(int(bBox[3] * height),height)
if draw_result:
if class_id == 0:
color = (0,255,0)
else:
color = (255,0)
cv2.rectangle(image,(xmin,ymin),(xmax,ymax),color,2)
cv2.putText(image,"%s: %.2f" % (id2class[class_id],conf),(xmin + 2,ymin - 2),cv2.FONT_HERShey_SIMPLEX,0.8,color)
output_info.append([class_id,conf,xmin,ymin,xmax,ymax])
if show_result:
Image.fromarray(image).show()
return output_info
def run_on_video(video_path,output_video_name,conf_thresh):
cap = cv2.VideoCapture(video_path)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
# writer = cv2.VideoWriter(output_video_name,fourcc,int(fps),(int(width),int(height)))
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
if not cap.isOpened():
raise ValueError("Video open Failed.")
return
status = True
idx = 0
while status:
start_stamp = time.time()
status,img_raw = cap.read()
img_raw = cv2.cvtColor(img_raw,cv2.COLOR_BGR2RGB)
read_frame_stamp = time.time()
if (status):
inference(img_raw,conf_thresh,IoU_thresh=0.5,target_shape=(360,360),show_result=False)
cv2.imshow('image',img_raw[:,:,::-1])
cv2.waitKey(1)
inference_stamp = time.time()
# writer.write(img_raw)
write_frame_stamp = time.time()
idx += 1
print("%d of %d" % (idx,total_frames))
print("read_frame:%f,infer time:%f,write time:%f" % (read_frame_stamp - start_stamp,inference_stamp - read_frame_stamp,write_frame_stamp - inference_stamp))
# writer.release()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Face Mask Detection")
parser.add_argument('--img-mode',type=int,default=1,help='set 1 to run on image,0 to run on video.')
parser.add_argument('--img-path',type=str,help='path to your image.')
parser.add_argument('--video-path',default='0',help='path to your video,`0` means to use camera.')
# parser.add_argument('--hdf5',help='keras hdf5 file')
args = parser.parse_args()
if args.img_mode:
imgPath = args.img_path
img = cv2.imread(imgPath)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
inference(img,show_result=True,360))
else:
video_path = args.video_path
if args.video_path == '0':
video_path = 0
run_on_video(video_path,'',conf_thresh=0.5)
我不知道为什么OpenCV这么慢。我喜欢一些技巧来加快捕获速度。
解决方法
问题出在Opencv RTSP流实现中。
要从流中删除一个源,您需要初始化编解码器并向其提供几个压缩帧数据包。编解码器内部有一个帧缓冲区。它用作FIFO(先输入后输出)。您调用avcodec_send_packet(),然后调用avcodec_receive_frame()。返回的帧被包装到mat对象中并返回给您。 首先,几个数据包会初始化缓冲区,并且不会生成任何图片。
(更多信息,请点击https://ffmpeg.org/doxygen/3.3/group__lavc__encdec.html)
不要期望在python的opencv上使用RTSP具有低延迟。 在我看来,减少延迟的唯一方法是使用FFMPEG示例并将其重写为c ++。
增加I帧数量可能会有所帮助(破坏者:不多) ps。我使用RTSP流的一些示例:https://www.youtube.com/channel/UCOK7D73tj7Dl4ZyXE-J0UNA
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。