目标检测标签格式转换（水平框）

原创已于 2023-06-11 16:24:17 修改 · 607 阅读

6 ·

CC 4.0 BY-SA版权

文章标签：

#目标检测 #计算机视觉 #人工智能

于 2021-03-02 17:06:29 首次发布

目标检测专栏收录该内容

37 篇文章

订阅专栏

本文介绍如何在常见的目标检测数据集格式间进行转换，包括从VOC到COCO、COCO到YOLO、YOLO到VOC以及二值图到YOLO的详细步骤和代码实现。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

文章目录

VOC转COCO
COCO转YOLO
YOLO转VOC
二值图转YOLO

VOC转COCO

import sys
import os
import json
import warnings
import numpy as np
import xml.etree.ElementTree as ET
import glob

START_BOUNDING_BOX_ID = 1
# 按照你给定的类别来生成你的 category_id
# COCO 默认 0 是背景类别
# CenterNet 里面类别是从0开始的，否则生成heatmap的时候报错
PRE_DEFINE_CATEGORIES = {"person": 1}
START_IMAGE_ID = 0


# If necessary, pre-define category and its id
#  PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
#  "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
#  "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
#  "motorbike": 14, "person": 15, "pottedplant": 16,
#  "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}


def get(root, name):
    vars = root.findall(name)
    return vars


def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise ValueError("Can not find %s in %s." % (name, root.tag))
    if length > 0 and len(vars) != length:
        raise ValueError(
            "The size of %s is supposed to be %d, but is %d."
            % (name, length, len(vars))
        )
    if length == 1:
        vars = vars[0]
    return vars


def get_filename_as_int(filename):
    try:
        filename = filename.replace("\\", "/")
        filename = os.path.splitext(os.path.basename(filename))[0]
        return int(filename)
    except:
        # raise ValueError("Filename %s is supposed to be an integer." % (filename))
        image_id = np.array([ord(char) % 10000 for char in filename], dtype=np.int32).sum()
        # print(image_id)
        return 0


def get_categories(xml_files):
    """Generate category name to id mapping from a list of xml files.

    Arguments:
        xml_files {list} -- A list of xml file paths.

    Returns:
        dict -- category name to id mapping.
    """
    classes_names = []
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall("object"):
            classes_names.append(member[0].text)
    classes_names = list(set(classes_names))
    classes_names.sort()
    return {name: i for i, name in enumerate(classes_names)}


def convert(xml_files, json_file):
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    if PRE_DEFINE_CATEGORIES is not None:
        categories = PRE_DEFINE_CATEGORIES
    else:
        categories = get_categories(xml_files)
    bnd_id = START_BOUNDING_BOX_ID
    image_id = START_IMAGE_ID
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        path = get(root, "path")
        if len(path) == 1:
            filename = os.path.basename(path[0].text)
        elif len(path) == 0:
            filename = get_and_check(root, "filename", 1).text
        else:
            raise ValueError("%d paths found in %s" % (len(path), xml_file))

        ## The filename must be a number
        # image_id = get_filename_as_int(filename)
        size = get_and_check(root, "size", 1)
        width = int(get_and_check(size, "width", 1).text)
        height = int(get_and_check(size, "height", 1).text)

        if ".jpg" not in filename or ".png" not in filename:
            filename = filename + ".jpg"
            warnings.warn("filename's default suffix is jpg")

        images = {
            "file_name": filename,  # 图片名
            "height": height,
            "width": width,
            "id": image_id,  # 图片的ID编号（每张图片ID是唯一的）
        }
        json_dict["images"].append(images)

        ## Currently we do not support segmentation.
        #  segmented = get_and_check(root, 'segmented', 1).text
        #  assert segmented == '0'
        for obj in get(root, "object"):
            category = get_and_check(obj, "name", 1).text
            if category not in categories:
                new_id = len(categories)
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, "bndbox", 1)
            xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1
            ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1
            xmax = int(get_and_check(bndbox, "xmax", 1).text)
            ymax = int(get_and_check(bndbox, "ymax", 1).text)
            assert xmax > xmin
            assert ymax > ymin
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {
                "area": o_width * o_height,
                "iscrowd": 0,
                "image_id": image_id,  # 对应的图片ID（与images中的ID对应）
                "bbox": [xmin, ymin, o_width, o_height],
                "category_id": category_id,
                "id": bnd_id, # 同一张图片可能对应多个 ann
                "ignore": 0,
                "segmentation": [],
            }
            json_dict["annotations"].append(ann)
            bnd_id = bnd_id + 1
        image_id += 1

    for cate, cid in categories.items():
        cat = {"supercategory": "none", "id": cid, "name": cate}
        json_dict["categories"].append(cat)

    os.makedirs(os.path.dirname(json_file), exist_ok=True)
    json.dump(json_dict, open(json_file, 'w'), indent=4)


if __name__ == "__main__":
    xml_dir = r"D:\data\pedestrainDetection\archive\Val\Val\Annotations"
    json_file = "./train.json"  # output json
    xml_files = glob.glob(os.path.join(xml_dir, "*.xml"))

    # If you want to do train/test split, you can pass a subset of xml files to convert function.
    print("Number of xml files: {}".format(len(xml_files)))
    convert(xml_files, json_file)
    print("Success: {}".format(json_file))

COCO转YOLO

import json
import os
import glob
import cv2


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)

def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

def decode_json(json_path, txt_name):
    txt_file = open(txt_name, 'w')
    data = json.load(open(json_path, 'r', encoding='utf-8'))

    img_w = data['imageWidth']
    img_h = data['imageHeight']

    for i in data['shapes']:

        if (i['shape_type'] == 'rectangle' and i['label'] == 'js'):
            x1 = int(i['points'][0][0])
            y1 = int(i['points'][0][1])
            x2 = int(i['points'][1][0])
            y2 = int(i['points'][1][1])

            bb = (x1, x2, y1, y2)
            bbox = convert((img_w, img_h), bb)
            txt_file.write( '0' + " " + " ".join([str(a) for a in bbox]) + '\n')


if __name__ == '__main__':
    mkdir('labels')
    jsonList = glob.glob('images/*.json')
    for jsonPath in jsonList:
        baseName = os.path.basename(jsonPath).split('.')[0]
        txtName = f'labels/{baseName}.txt'
        decode_json(jsonPath, txtName)

YOLO转VOC

import os
import xml.etree.ElementTree as ET
from PIL import Image
import numpy as np


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)


img_path = './JPEGImages/'                   #原图.jpg文件的路径
labels_path = './labels/'                    #labels中.txt文件的路径
annotations_path = './Annotations/'          #生成的xml文件需要保存的路径
### 标签的类别
classes = ['Aluminium foil', 'Battery', 'Aluminium blister pack', 'Carded blister pack', 'Other plastic bottle', 'Clear plastic bottle', 'Glass bottle', 'Plastic bottle cap', 'Metal bottle cap', 'Broken glass', 'Food Can', 'Aerosol', 'Drink can', 'Toilet tube', 'Other carton', 'Egg carton', 'Drink carton', 'Corrugated carton', 'Meal carton', 'Pizza box', 'Paper cup', 'Disposable plastic cup', 'Foam cup', 'Glass cup', 'Other plastic cup', 'Food waste', 'Glass jar', 'Plastic lid', 'Metal lid', 'Other plastic', 'Magazine paper', 'Tissues', 'Wrapping paper', 'Normal paper', 'Paper bag', 'Plastified paper bag', 'Plastic film', 'Six pack rings', 'Garbage bag', 'Other plastic wrapper', 'Single-use carrier bag', 'Polypropylene bag', 'Crisp packet', 'Spread tub', 'Tupperware', 'Disposable food container', 'Foam food container', 'Other plastic container', 'Plastic glooves', 'Plastic utensils', 'Pop tab', 'Rope & strings', 'Scrap metal', 'Shoe', 'Squeezable tube', 'Plastic straw', 'Paper straw', 'Styrofoam piece', 'Unlabeled litter', 'Cigarette']

mkdir(annotations_path)
labels = os.listdir(labels_path)


def write_xml(imgname, filepath, labeldicts, w, h):                     #参数imagename是图片名（无后缀）
    root = ET.Element('Annotation')                             #创建Annotation根节点
    ET.SubElement(root, 'filename').text = str(imgname)         #创建filename子节点（无后缀）
    sizes = ET.SubElement(root,'size')                          #创建size子节点
    ET.SubElement(sizes, 'width').text = str(w)                 #没带脑子直接写了原图片的尺寸......
    ET.SubElement(sizes, 'height').text = str(h)
    ET.SubElement(sizes, 'depth').text = '3'                    #图片的通道数：img.shape[2]
    for labeldict in labeldicts:
        objects = ET.SubElement(root, 'object')                 #创建object子节点
        ET.SubElement(objects, 'name').text = labeldict['name']        #BDD100K_10.names文件中
                                                                       #的类别名
        ET.SubElement(objects, 'pose').text = 'Unspecified'
        ET.SubElement(objects, 'truncated').text = '0'
        ET.SubElement(objects, 'difficult').text = '0'
        bndbox = ET.SubElement(objects,'bndbox')
        ET.SubElement(bndbox, 'xmin').text = str(int(labeldict['xmin']))
        ET.SubElement(bndbox, 'ymin').text = str(int(labeldict['ymin']))
        ET.SubElement(bndbox, 'xmax').text = str(int(labeldict['xmax']))
        ET.SubElement(bndbox, 'ymax').text = str(int(labeldict['ymax']))
    tree = ET.ElementTree(root)
    tree.write(filepath, encoding='utf-8')


for label in labels:                                           #批量读.txt文件
    with open(labels_path + label, 'r') as f:
        img_id = os.path.splitext(label)[0]
        contents = f.readlines()
        labeldicts = []
        img = np.array(Image.open(img_path + label.strip('.txt') + '.jpg'))
        sh, sw = img.shape[0], img.shape[1]
        for content in contents:
                         #img.shape[0]是图片的高度720
                                                               #img.shape[1]是图片的宽度720
            content = content.strip('\n').split()
            x=float(content[1])*sw
            y=float(content[2])*sh
            w=float(content[3])*sw
            h=float(content[4])*sh
            new_dict = {'name': classes[int(content[0])],
                        'difficult': '0',
                        'xmin': x-w/2,                      #坐标转换公式看另一篇文章....
                        'ymin': y-h/2,
                        'xmax': x+w/2,
                        'ymax': y+h/2
                        }
            labeldicts.append(new_dict)
        write_xml(img_id, annotations_path + label.strip('.txt') + '.xml', labeldicts, sw, sh)

二值图转YOLO

import cv2
from skimage.measure import label, regionprops
import os
import glob
import matplotlib.pyplot as plt
import numpy as np


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)

def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

if __name__ == "__main__":
    tifList = glob.glob("./data/*.tif")
    outRoot = "txts"
    mkdir(outRoot)
    for tifPath in tifList:
        img = cv2.imread(tifPath, cv2.IMREAD_LOAD_GDAL) 
        img = np.where(img > 0, 1, 0).astype(np.uint8)[..., 0]

        w, h = img.shape[:2]
        baseName = os.path.basename(tifPath).split(".")[0]
        outPath = os.path.join(outRoot, f"{baseName}.txt")
        labels = label(img)
        props = regionprops(labels)
        msg = ""
        for id, prop in enumerate(props):
            yMin, xMin, yMax, xMax =  prop.bbox[0],  prop.bbox[1],  prop.bbox[2],  prop.bbox[3]
            print(f"w: {w}, xmin: {xMin}, yMin: {yMin}")
            xNorm, yNorm, wNorm, hNorm = convert((w,h), (xMin, xMax, yMin, yMax))
            msg += f"0 {xNorm} {yNorm} {wNorm} {hNorm} \n"
        
        file = open(outPath, 'w')
        file.write(msg) 
        # plt.imshow(img)
        # plt.show()