1.删除xml中某一类别
import xmltodict
import os
from xml.dom import minidom
from tqdm import tqdm
# filter class
class_filter_list = ["person","baggage car"]
xml_dir="./新建文件夹/"# VOC xml文件所在文件夹
# 加入进度条支持
pbar = tqdm(total=len(os.listdir(xml_dir)))
pbar.set_description("VOC xml filter") # 设置前缀
# 逐一处理xml文件
for file in os.listdir(xml_dir):
if not file.endswith(".xml"):
pbar.update(1)
continue # 过滤掉非xml文件
xml_path = os.path.join(xml_dir,file) # 拼接xml地址
# 读取xml文件+转为字典
with open(xml_path,'r',encoding="utf8") as f:
xml_str = f.read()
xml_dic = xmltodict.parse(xml_str)
# 删除掉待过滤的类别
tmp_obj_list = xml_dic["annotation"]["object"]
tmp_obj_list_fiter = []
for tmp_obj in tmp_obj_list:
if not tmp_obj["name"] in class_filter_list:
tmp_obj_list_fiter.append(tmp_obj)
xml_dic["annotation"]["object"] = tmp_obj_list_fiter
xmlstr = xmltodict.unparse(xml_dic)
xml_pretty_str = minidom.parseString(xmlstr).toprettyxml()
with open(xml_path,"w",encoding="utf8") as f:
f.write(xml_pretty_str)
pbar.update(1)
pbar.close()
2.指定帧区间添加特定bbox
# 在固定位置添加指定的候选框
import xmltodict
import os
from xml.dom import minidom
from tqdm import tqdm
import copy
xml_dir="./data_handle/xml/"# 待处理xml文件夹
# 待添加bbox
xml = """
<object>
<name>LuggageVehicle</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>212</xmin>
<ymin>71</ymin>
<xmax>284</xmax>
<ymax>110</ymax>
</bndbox>
</object>
"""
xml_dict = xmltodict.parse(xml)
obj = xml_dict["object"]
cls_add = obj["name"]
bndbox = obj["bndbox"]
bbox_add = [int(bndbox["xmin"]),int(bndbox["ymin"]),int(bndbox["xmax"]),int(bndbox["ymax"])]
# 起止帧id
frame_begin = 84543
frame_end = 84919
# 加入进度条支持
pbar = tqdm(total=len(os.listdir(xml_dir)))
pbar.set_description("VOC xml add class") # 设置前缀
# 逐一处理xml文件
for file in os.listdir(xml_dir):
if not file.endswith(".xml"):
pbar.update(1)
continue # 过滤掉非xml文件
if int(file[:-4])<frame_begin or int(file[:-4])>frame_end:
pbar.update(1)
continue
xml_path = os.path.join(xml_dir,file) # 拼接地址
# 读取xml文件+转为字典
with open(xml_path,'r',encoding="utf8") as f:
xml_str = f.read()
xml_dic = xmltodict.parse(xml_str)
# 添加类别
tmp_obj_list = xml_dic["annotation"]["object"]
tmp_obj = copy.deepcopy(tmp_obj_list[0])
tmp_obj["name"] = cls_add
tmp_obj["bndbox"]["xmin"]=bbox_add[0]
tmp_obj["bndbox"]["ymin"]=bbox_add[1]
tmp_obj["bndbox"]["xmax"]=bbox_add[2]
tmp_obj["bndbox"]["ymax"]=bbox_add[3]
tmp_obj_list.append(tmp_obj)
xml_dic["annotation"]["object"] = tmp_obj_list
xmlstr = xmltodict.unparse(xml_dic)
xml_pretty_str = minidom.parseString(xmlstr).toprettyxml()
with open(xml_path,"w",encoding="utf8") as f:
f.write(xml_pretty_str)
pbar.update(1)
pbar.close()
3.指定帧区间删除特定的bbox
"""
IOU计算
+ input
+ box1:[box1_x1,box1_y1,box1_x2,box1_y2]
+ box2:[box2_x1,box2_y1,box2_x2,box2_y2]
+ output
+ iou值
"""
def cal_iou(box1,box2):
# 判断是否能相交
if abs(box2[2]+box2[0]-box1[2]-box1[0])>box2[2]-box2[0]+box1[2]-box1[0]:
return 0
if abs(box2[3]+box2[1]-box1[3]-box1[1])>box2[3]-box2[1]+box1[3]-box1[1]:
return 0
# 求相交区域左上角的坐标和右下角的坐标
box_intersect_x1 = max(box1[0], box2[0])
box_intersect_y1 = max(box1[1], box2[1])
box_intersect_x2 = min(box1[2], box2[2])
box_intersect_y2 = min(box1[3], box2[3])
# 求二者相交的面积
area_intersect = (box_intersect_y2 - box_intersect_y1) * (box_intersect_x2 - box_intersect_x1)
# 求box1,box2的面积
area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
# 求二者相并的面积
area_union = area_box1 + area_box2 - area_intersect
# 计算iou(交并比)
iou = area_intersect / area_union
return iou
# 删除指定的bbox
import xmltodict
import os
from xml.dom import minidom
from tqdm import tqdm
xml_dir="./data_handle/xml/"# VOC xml文件所在文件夹
# 匹配待删除的bbox
xml = """
<object>
<name>Person</name>
<bndbox>
<xmin>901</xmin>
<ymin>6</ymin>
<xmax>920</xmax>
<ymax>31</ymax>
</bndbox>
</object>
"""
xml_dict = xmltodict.parse(xml)
obj = xml_dict["object"]
cls_filter = obj["name"]
bndbox = obj["bndbox"]
bbox_filter = [int(bndbox["xmin"]),int(bndbox["ymin"]),int(bndbox["xmax"]),int(bndbox["ymax"])]
iou_filter = 0.6
# 起止帧id
frame_begin = 85000
frame_end = 85427
# 加入进度条支持
pbar = tqdm(total=len(os.listdir(xml_dir)))
pbar.set_description("VOC xml filter") # 设置前缀
# 逐一处理xml文件
for file in os.listdir(xml_dir):
if not file.endswith(".xml"):
pbar.update(1)
continue # 过滤掉非xml文件
if int(file[:-4])<frame_begin or int(file[:-4])>frame_end:
pbar.update(1)
continue
xml_path = os.path.join(xml_dir,file) # 拼接xml地址
# 读取xml文件+转为字典
with open(xml_path,'r',encoding="utf8") as f:
xml_str = f.read()
xml_dic = xmltodict.parse(xml_str)
# 删除掉待过滤的类别
tmp_obj_list = xml_dic["annotation"]["object"]
tmp_obj_list_fiter = []
for tmp_obj in tmp_obj_list:
tmp_bndbox = tmp_obj["bndbox"]
tmp_obj_bbox = [int(tmp_bndbox["xmin"]),int(tmp_bndbox["ymin"]),int(tmp_bndbox["xmax"]),int(tmp_bndbox["ymax"])]
if tmp_obj["name"]==cls_filter and cal_iou(bbox_filter,tmp_obj_bbox)>iou_filter:
print(cal_iou(bbox_filter,tmp_obj_bbox))
pass
else:
tmp_obj_list_fiter.append(tmp_obj)
xml_dic["annotation"]["object"] = tmp_obj_list_fiter
xmlstr = xmltodict.unparse(xml_dic)
xml_pretty_str = minidom.parseString(xmlstr).toprettyxml()
with open(xml_path,"w",encoding="utf8") as f:
f.write(xml_pretty_str)
pbar.update(1)
pbar.close()
4.指定帧区间修改指定bbox的类别
"""
IOU计算
+ input
+ box1:[box1_x1,box1_y1,box1_x2,box1_y2]
+ box2:[box2_x1,box2_y1,box2_x2,box2_y2]
+ output
+ iou值
"""
def cal_iou(box1,box2):
# 判断是否能相交
if abs(box2[2]+box2[0]-box1[2]-box1[0])>box2[2]-box2[0]+box1[2]-box1[0]:
return 0
if abs(box2[3]+box2[1]-box1[3]-box1[1])>box2[3]-box2[1]+box1[3]-box1[1]:
return 0
# 求相交区域左上角的坐标和右下角的坐标
box_intersect_x1 = max(box1[0], box2[0])
box_intersect_y1 = max(box1[1], box2[1])
box_intersect_x2 = min(box1[2], box2[2])
box_intersect_y2 = min(box1[3], box2[3])
# 求二者相交的面积
area_intersect = (box_intersect_y2 - box_intersect_y1) * (box_intersect_x2 - box_intersect_x1)
# 求box1,box2的面积
area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
# 求二者相并的面积
area_union = area_box1 + area_box2 - area_intersect
# 计算iou(交并比)
iou = area_intersect / area_union
return iou
# 修改指定框的类别
import xmltodict
import os
from xml.dom import minidom
from tqdm import tqdm
xml_dir="./data_handle/xml/"# VOC xml文件所在文件夹
# 匹配待修改的bbox
xml = """
<object>
<name>RefuelVehicle</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>106</xmin>
<ymin>389</ymin>
<xmax>237</xmax>
<ymax>542</ymax>
</bndbox>
</object>
"""
xml_dict = xmltodict.parse(xml)
obj = xml_dict["object"]
cls_filter_src = obj["name"]
bndbox = obj["bndbox"]
bbox_filter = [int(bndbox["xmin"]),int(bndbox["ymin"]),int(bndbox["xmax"]),int(bndbox["ymax"])]
cls_filter_dst = "LuggageVehicle"
iou_filter = 0.99
# 起止帧id
frame_begin = 59680
frame_end = 60000
# 加入进度条支持
pbar = tqdm(total=len(os.listdir(xml_dir)))
pbar.set_description("VOC xml filter") # 设置前缀
# 逐一处理xml文件
for file in os.listdir(xml_dir):
if not file.endswith(".xml"):
pbar.update(1)
continue # 过滤掉非xml文件
if int(file[:-4])<frame_begin or int(file[:-4])>frame_end:
pbar.update(1)
continue
xml_path = os.path.join(xml_dir,file) # 拼接原始地址
# 读取xml文件+转为字典
with open(xml_path,'r',encoding="utf8") as f:
xml_str = f.read()
xml_dic = xmltodict.parse(xml_str)
# 删除掉待过滤的类别
tmp_obj_list = xml_dic["annotation"]["object"]
tmp_obj_list_fiter = []
for tmp_obj in tmp_obj_list:
tmp_bndbox = tmp_obj["bndbox"]
tmp_obj_bbox = [int(tmp_bndbox["xmin"]),int(tmp_bndbox["ymin"]),int(tmp_bndbox["xmax"]),int(tmp_bndbox["ymax"])]
if tmp_obj["name"]==cls_filter_src and cal_iou(bbox_filter,tmp_obj_bbox)>iou_filter:
tmp_obj["name"]=cls_filter_dst
tmp_obj_list_fiter.append(tmp_obj)
else:
tmp_obj_list_fiter.append(tmp_obj)
xml_dic["annotation"]["object"] = tmp_obj_list_fiter
xmlstr = xmltodict.unparse(xml_dic)
xml_pretty_str = minidom.parseString(xmlstr).toprettyxml()
with open(xml_path,"w",encoding="utf8") as f:
f.write(xml_pretty_str)
pbar.update(1)
pbar.close()
评论 (0)