通常用于查看针对连续视频标注的结果,因博主有视频目标检测的需求所以写了该小工具
# 可视化视频标注结果
import numpy as np
import cv2
import xmltodict
import os
from tqdm import tqdm
# 基本信息填充
xml_dir="./data_handle/xml/"# VOC xml文件所在文件夹
img_dir="./data_handle/img/"# VOC img文件所在文件夹
class_list = ['conveyor', 'refueller', 'aircraft', 'lounge', 'dining car', 'front of baggage car', 'tractor'] # class_list
"""
将voc xml 的数据转为对应的bbox_list
"""
def voc_2_yolo_bbox_list(xml_dict):
objects = xml_dict["annotation"]["object"]
obj_list = []
if isinstance(objects,list): # xml文件中包含多个object
for obj in objects:
obj_list.append(obj)
else: # xml文件中包含1个object
obj_list.append(objects)
bbox_list = []
for obj in obj_list:
# 获取voc格式的数据信息
x1 = int(obj['bndbox']['xmin'])
y1 = int(obj['bndbox']['ymin'])
x2 = int(obj['bndbox']['xmax'])
y2 = int(obj['bndbox']['ymax'])
score = 1
cls_id = class_list.index(obj['name'])
bbox_list.append([x1,y1,x2,y2,score,cls_id])
return bbox_list
"""
生成color_list
"""
def random_color(color_num):
color_list = []
for j in range(color_num):
color_single = (int(np.random.randint(0,255)),int(np.random.randint(0,255)),int(np.random.randint(0,255)))
color_list.append(tuple(color_single))
return color_list
color_list = random_color(len(class_list))
"""
目标检测预测结果可视化函数
+ img:进行目标检测的图片
+ bbox_list:处理过的预测结果
+ class_name_list:用于将cls_is转为cls_name
+ color_list:绘制不同的类别使用不同的颜色
+ thresh:阈值
"""
def vis_detections(img, bbox_list,class_name_list=class_list,color_list=color_list,thresh=0.5):
for bbox in bbox_list:
# 参数解析
x1,y1,x2,y2,score,cls_id = bbox[0],bbox[1],bbox[2], bbox[3],bbox[4],int(bbox[5])
cls_name = class_name_list[cls_id]
color = color_list[cls_id]
# 跳过低于阈值的框
if score<thresh:continue
# 画框
cv2.rectangle(img, (int(x1),int(y1)), (int(x2),int(y2)),color_list[cls_id],2)
# 画label
label_text = '{:s} {:.3f}'.format(cls_name, score)
cv2.putText(img, label_text, (x1-5, y1-5),cv2.FONT_HERSHEY_SIMPLEX, 0.8, color_list[cls_id], 2)
return img
img_list = os.listdir(img_dir)
frame_rate = 30 # 帧率
frame_shape = cv2.imread(os.path.join(img_dir,img_list[0])).shape[:-1] # 图片大小/帧shape
frame_shape = (frame_shape[1],frame_shape[0]) # 交换w和h
videoWriter = cv2.VideoWriter('result.mp4', cv2.VideoWriter_fourcc(*'MJPG'), frame_rate, frame_shape) # 初始化视频帧writer
# 加入进度条支持
pbar = tqdm(total=len(img_list))
pbar.set_description("VOC2VID")
# 开始逐帧写入视频帧
frame_id = 1
for file in img_list:
img_path = os.path.join(img_dir,file) # img地址
img = cv2.imread(img_path) # 读取img
xml_path = os.path.join(xml_dir,file[:-3]+"xml") # xml地址
# 读取xml文件+转为字典+ 转为bbox_list
with open(xml_path,'r',encoding="utf8") as f:
xml_str = f.read()
xml_dict = xmltodict.parse(xml_str)
bbox_list = voc_2_yolo_bbox_list(xml_dict)
# 绘制xml标注结果
img = vis_detections(img,bbox_list)
frame_id += 1
# if frame_id%120 == 0:
# break
videoWriter.write(img)
pbar.update(1)
pbar.close()
videoWriter.release()
评论 (0)