VOC数据集分为train、val、test、trainval
分割示意图
代码实现
import os
import random
xml_dir='./labels_voc/' #xml文件路径(Annotations)
txt_save_dir = './ImageSets/Main' # txt文件 save path
if not os.path.exists(txt_save_path):
os.makedirs(txt_save_path)
# 设置数据分割比例
trainval_percent = 0.9
train_percent = 0.8
# 获取所有的xml文件名
total_xml = os.listdir(xml_dir)
# xml文件计数
num = len(total_xml)
# 根据xml文件计数结果生成索引list并根据索引list分割为train、val、test、trainval
xml_index_list = range(num)
trainval_num = int(num * trainval_percent)
train_num = int(trainval_num * train_percent)
trainval_index_list = random.sample(xml_index_list, trainval_num)
train_index_list = random.sample(trainval_index_list, train_num)
# 根据分割完的索引文件将对应的文件名分别写入对应文件
ftrainval = open(txt_save_dir + '/trainval.txt', 'w')
ftest = open(txt_save_dir + '/test.txt', 'w')
ftrain = open(txt_save_dir + '/train.txt', 'w')
fval = open(txt_save_dir + '/val.txt', 'w')
for i in xml_index_list:
file_name = total_xml[i][:-4] + '\n'
if i in trainval_index_list:
ftrainval.write(file_name)
if i in train_index_list:
ftrain.write(file_name)
else:
fval.write(file_name)
else:
ftest.write(file_name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
结果
ImageSets/
└── Main
├── test.txt
├── train.txt
├── trainval.txt
└── val.txt
参考资料
- 制作VOC数据集时生成trainval.txt,train.txt,val.txt代码:https://blog.csdn.net/weixin_41868104/article/details/89886697
评论 (0)