VOC2VID:将VOC格式的数据集转为视频进行查看|可视化视频标注结果

通常用于查看针对连续视频标注的结果，因博主有视频目标检测的需求所以写了该小工具
# 可视化视频标注结果

import numpy as np
import cv2
import xmltodict
import os
from tqdm import tqdm

# 基本信息填充
xml_dir="./data_handle/xml/"# VOC xml文件所在文件夹
img_dir="./data_handle/img/"# VOC img文件所在文件夹
class_list = ['conveyor', 'refueller', 'aircraft', 'lounge', 'dining car', 'front of baggage car', 'tractor'] # class_list


"""
将voc xml 的数据转为对应的bbox_list
"""
def voc_2_yolo_bbox_list(xml_dict):
    objects = xml_dict["annotation"]["object"]
    obj_list = []
    if isinstance(objects,list): # xml文件中包含多个object
        for obj in objects:
            obj_list.append(obj)
    else: # xml文件中包含1个object
        obj_list.append(objects)
    
    bbox_list = []
    
    for obj in obj_list:
        # 获取voc格式的数据信息 
        x1 = int(obj['bndbox']['xmin'])
        y1 = int(obj['bndbox']['ymin'])
        x2 = int(obj['bndbox']['xmax'])
        y2 = int(obj['bndbox']['ymax'])
        score = 1
        cls_id = class_list.index(obj['name'])
        bbox_list.append([x1,y1,x2,y2,score,cls_id])
    
    return bbox_list

"""
生成color_list 
"""
def random_color(color_num):
    color_list = []
    
    for j in range(color_num):
        color_single = (int(np.random.randint(0,255)),int(np.random.randint(0,255)),int(np.random.randint(0,255)))
        color_list.append(tuple(color_single))
    return color_list
color_list = random_color(len(class_list))

"""
目标检测预测结果可视化函数
    + img:进行目标检测的图片
    + bbox_list:处理过的预测结果
    + class_name_list:用于将cls_is转为cls_name
    + color_list:绘制不同的类别使用不同的颜色
    + thresh:阈值
"""
def vis_detections(img, bbox_list,class_name_list=class_list,color_list=color_list,thresh=0.5):
    for bbox in bbox_list:
        # 参数解析
        x1,y1,x2,y2,score,cls_id = bbox[0],bbox[1],bbox[2], bbox[3],bbox[4],int(bbox[5])
        cls_name = class_name_list[cls_id]
        color = color_list[cls_id]
        
        # 跳过低于阈值的框
        if score<thresh:continue
        
        # 画框
        cv2.rectangle(img, (int(x1),int(y1)), (int(x2),int(y2)),color_list[cls_id],2)
        
        # 画label
        label_text = '{:s} {:.3f}'.format(cls_name, score)
        cv2.putText(img, label_text, (x1-5, y1-5),cv2.FONT_HERSHEY_SIMPLEX, 0.8, color_list[cls_id], 2)
    return img

img_list = os.listdir(img_dir)
frame_rate = 30 # 帧率
frame_shape = cv2.imread(os.path.join(img_dir,img_list[0])).shape[:-1] # 图片大小/帧shape
frame_shape = (frame_shape[1],frame_shape[0]) # 交换w和h
videoWriter = cv2.VideoWriter('result.mp4', cv2.VideoWriter_fourcc(*'MJPG'), frame_rate, frame_shape) # 初始化视频帧writer

# 加入进度条支持
pbar = tqdm(total=len(img_list))
pbar.set_description("VOC2VID") 

# 开始逐帧写入视频帧
frame_id = 1
for file in img_list:
    img_path = os.path.join(img_dir,file) # img地址
    img = cv2.imread(img_path) # 读取img
    xml_path = os.path.join(xml_dir,file[:-3]+"xml") # xml地址
    
    # 读取xml文件+转为字典+ 转为bbox_list
    with open(xml_path,'r',encoding="utf8") as f:
        xml_str = f.read()
    xml_dict = xmltodict.parse(xml_str)
    bbox_list = voc_2_yolo_bbox_list(xml_dict)
    
    # 绘制xml标注结果
    img = vis_detections(img,bbox_list)
    
    frame_id += 1
#     if frame_id%120 == 0:
#         break
    
    videoWriter.write(img)
    pbar.update(1)
    
pbar.close()
videoWriter.release()
VOC2VID:将VOC格式的数据集转为视频进行查看|可视化视频标注结果

评论 (0)