1.针对问题
在数据标注时由于设置问题导致生成的最终的XML文件中的图片的width和height变成了小数,导致在运行某些代码时候会发生异常。样例文件如下所示
<?xml version="1.0" ?>
<annotation>
<folder>/pool/label/209-20180708-08310907/JPEGImages</folder>
<filename>/pool/label/209-20180708-08310907/JPEGImages/209-20180708-08310907_frame00731.jpg</filename>
<source>
<database>Unknown</database>
</source>
<size>
<width>1920.0</width> # 问题所在
<height>1080.0</height> # 问题所在
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>RefuelVehicle</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>664</xmin>
<ymin>368</ymin>
<xmax>794</xmax>
<ymax>464</ymax>
</bndbox>
</object>
······
</annotation>
2.代码实现
# 导包
import xmltodict
import os
from progressbar import *
import numpy as np
import time
import matplotlib.pyplot as plt
import xmltodict
from xml.dom import minidom
data_root = "/data/jupiter/project/dataset/VOC_zd"
xml_dir= os.path.join(data_root,"Annotations") #xml文件路径(Annotations)
width_height_list = [] #用于存储统计结果
#进度条功能
widgets = ['box width_height 校准: ',Percentage(), ' ', Bar('#'),' ', Timer(),' ', ETA()]
pbar = ProgressBar(widgets=widgets, maxval=len(os.listdir(xml_dir))).start()
count = 0
for xml_file in os.listdir(xml_dir):
# 拼接xml文件的path
xml_file_path = os.path.join(xml_dir,xml_file)
# 读取xml文件到字符串
with open(xml_file_path) as f:
xml_str = f.read()
# xml字符串转为字典
xml_dic = xmltodict.parse(xml_str)
# 获取图片的width、height
img_width = xml_dic["annotation"]["size"]["width"]
img_height = xml_dic["annotation"]["size"]["height"]
if img_width.endswith(".0"):
xml_dic["annotation"]["size"]["width"] = img_width[:-2]
xml_dic["annotation"]["size"]["height"] = img_height[:-2]
xmlstr = xmltodict.unparse(xml_dic)
xml = minidom.parseString(xmlstr)
xml_pretty_str = xml.toprettyxml()
with open(xml_file_path,"w") as f:
f.write(xml_pretty_str)
# 更新进度条
count += 1
pbar.update(count)
#释放进度条
pbar.finish()
评论 (0)