本文介绍了两种标注格式转换工具的实现代码:1) XML转TXT格式工具,解析PASCAL VOC格式的XML文件,提取对象类别和边界框信息,转换为YOLO所需的相对坐标格式;2) TXT转XML工具,读取YOLO格式的标注文件,结合原始图像尺寸,重新构建符合PASCAL VOC标准的XML结构。两个工具均支持批量处理,自动创建输出目录,并包含错误处理机制,适用于计算机视觉数据集的格式转换需求。代码展示了标注信息在不同格式间的转换逻辑和坐标系的相互换算方法。
一、XML转TXT
import os
import xml.etree.ElementTree as ET
def convert_xml_to_yolo(xml_file, classes, output_folder):
tree = ET.parse(xml_file)
root = tree.getroot()
image_filename = root.find('filename').text
image_width = int(root.find('size').find('width').text)
image_height = int(root.find('size').find('height').text)
yolo_labels = []
for obj in root.findall('object'):
class_name = obj.find('name').text
if class_name not in classes:
continue
class_id = classes.index(class_name)
bbox = obj.find('bndbox')
x_center = (int(bbox.find('xmin').text) + int(bbox.find('xmax').text)) / 2 / image_width
y_center = (int(bbox.find('ymin').text) + int(bbox.find('ymax').text)) / 2 / image_height
w = (int(bbox.find('xmax').text) - int(bbox.find('xmin').text)) / image_width
h = (int(bbox.find('ymax').text) - int(bbox.find('ymin').text)) / image_height
yolo_labels.append(f"{class_id} {x_center} {y_center} {w} {h}")
output_path = os.path.join(output_folder, os.path.splitext(image_filename)[0] + ".txt")
with open(output_path, 'w') as out_file:
for line in yolo_labels:
out_file.write(line + '\n')
# Define your classes
classes = ['', '', ''] # Add your own class names
# Path to the directory containing XML files
xml_dir = 'train\labels'
output_dir = 'train\label' # Output directory for YOLO format files
test_xml_dir = 'test\labels'
test_output_dir = 'test\label' # Output directory for YOLO format files
# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(test_output_dir, exist_ok=True)
# Convert all XML files to YOLO format and save to the output directory
for xml_file in os.listdir(xml_dir):
if xml_file.endswith('.xml'):
xml_path = os.path.join(xml_dir, xml_file)
convert_xml_to_yolo(xml_path, classes, output_dir)
# 测试集
for xml_file in os.listdir(test_xml_dir):
if xml_file.endswith('.xml'):
xml_path = os.path.join(test_xml_dir, xml_file)
convert_xml_to_yolo(xml_path, classes, test_output_dir)
二、TXT转XML
import os
import cv2
from xml.dom.minidom import Document
# 标签ID对应的类别名称
LABEL_DICT = {
'0': "",
'1': "",
'2': ""
}
# 支持的图片扩展名(可按需添加)
IMAGE_EXTENSIONS = [".bmp", ".jpg", ".png"]
def find_image(base_path, base_name):
"""
尝试使用多个扩展名读取图片,返回图片和路径
"""
for ext in IMAGE_EXTENSIONS:
full_path = os.path.join(base_path, base_name + ext)
if os.path.exists(full_path):
img = cv2.imread(full_path)
if img is not None:
return img, full_path
return None, None
def makexml(pic_path, txt_path, xml_path):
os.makedirs(xml_path, exist_ok=True)
files = os.listdir(txt_path)
for idx, txt_name in enumerate(files):
base_name = os.path.splitext(txt_name)[0]
txt_file_path = os.path.join(txt_path, txt_name)
# 查找对应图片
img, img_path = find_image(pic_path, base_name)
if img is None:
print(f"[ERROR] 无法读取图片:{base_name},检查是否存在并可用。")
continue
height, width, depth = img.shape
# 读取YOLO标签
with open(txt_file_path, 'r') as file:
lines = file.readlines()
# 创建XML结构
xml_builder = Document()
annotation = xml_builder.createElement("annotation")
xml_builder.appendChild(annotation)
folder = xml_builder.createElement("folder")
folder.appendChild(xml_builder.createTextNode("datasetRGB"))
annotation.appendChild(folder)
filename = xml_builder.createElement("filename")
filename.appendChild(xml_builder.createTextNode(os.path.basename(img_path)))
annotation.appendChild(filename)
size = xml_builder.createElement("size")
for tag, value in [("width", width), ("height", height), ("depth", depth)]:
node = xml_builder.createElement(tag)
node.appendChild(xml_builder.createTextNode(str(value)))
size.appendChild(node)
annotation.appendChild(size)
for line in lines:
parts = line.strip().split()
if len(parts) != 5 or parts[0] not in LABEL_DICT:
print(f"[WARNING] 无效标签行:{line.strip()},已跳过。")
continue
class_id, x_center, y_center, box_w, box_h = map(float, parts)
label = LABEL_DICT[parts[0]]
x_min = int((x_center - box_w / 2) * width)
y_min = int((y_center - box_h / 2) * height)
x_max = int((x_center + box_w / 2) * width)
y_max = int((y_center + box_h / 2) * height)
obj = xml_builder.createElement("object")
for tag, value in [
("name", label),
("pose", "Unspecified"),
("truncated", "0"),
("difficult", "0")
]:
node = xml_builder.createElement(tag)
node.appendChild(xml_builder.createTextNode(value))
obj.appendChild(node)
bndbox = xml_builder.createElement("bndbox")
for tag, value in [("xmin", x_min), ("ymin", y_min), ("xmax", x_max), ("ymax", y_max)]:
node = xml_builder.createElement(tag)
node.appendChild(xml_builder.createTextNode(str(value)))
bndbox.appendChild(node)
obj.appendChild(bndbox)
annotation.appendChild(obj)
# 保存XML
xml_save_path = os.path.join(xml_path, base_name + ".xml")
with open(xml_save_path, 'w', encoding='utf-8') as f:
xml_builder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
print(f"[OK] 已生成:{xml_save_path}")
if __name__ == "__main__":
picPath = r"\test\images"
txtPath = r"\test\labels"
xmlPath = r"\test\xml"
makexml(picPath, txtPath, xmlPath)