YOLO 数据集预处理 & 增强全流程脚本详解（清洗、可视化、增强、划分）

www.02

已于 2025-07-13 12:44:01 修改

阅读量427

点赞数 5

CC 4.0 BY-SA版权

文章标签： YOLO 人工智能机器学习深度学习 python

于 2025-06-16 20:40:04 首次发布

本文链接：https://blog.csdn.net/m0_73177436/article/details/148619377

在实际目标检测项目中，原始数据往往不完美，常伴随着以下问题：

标签格式混乱、坐标越界或未归一化
标注框不合理、图像缺失或标签对不上
数据分布不均，训练效果差
缺乏可视化验证和增强样本

为此，本文总结了一套 完整的 YOLO 数据预处理脚本工具链（共5个），涵盖从清洗、可视化、增强到数据划分的全部过程，每个脚本职责单一、配合高效。

✅ 1. clean_labels.py —— 清洗标签，修复坐标归一化问题

主要功能：

自动识别坐标是否未归一化（像素值），并进行归一化处理
检测并裁剪越界框，超出范围会自动裁剪到 0~1
坐标错误、无效框（宽/高 ≤ 0）自动删除
打印所有被修复或删除的框及所在文件/行号

全部代码为：

import os, cv2, glob, math, sys

IMG_DIR = "data/ggcxin-heibai/images"
LBL_DIR = "data/ggcxin-heibai/labels"

def clamp_open(v, lo=1e-6, hi=1 - 1e-6):        # 开区间 (0,1)
    return max(lo, min(hi, v))

def is_pixel_coord(xc, yc, bw, bh, thresh=1.5):
    """若最大值≫1 认为是像素坐标"""
    return max(xc, yc, bw, bh) > thresh

modified_files = 0
total_fixed = 0
total_dropped = 0

for img_path in glob.glob(os.path.join(IMG_DIR, "*.*")):
    img = cv2.imread(img_path)
    if img is None:
        continue
    ih, iw = img.shape[:2]
    stem = os.path.splitext(os.path.basename(img_path))[0]
    lbl_path = os.path.join(LBL_DIR, stem + ".txt")
    if not os.path.exists(lbl_path):
        continue

    new_lines, file_changed = [], False
    with open(lbl_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    for ln_idx, ln in enumerate(lines, 1):
        ln = ln.strip()
        if not ln:
            continue
        try:
            cls, xc, yc, bw, bh = ln.split()[:5]
            cls = int(cls); xc, yc, bw, bh = map(float, (xc, yc, bw, bh))
        except ValueError:
            print(f"[DROP] 解析失败 {lbl_path}:{ln_idx} -> {ln}")
            file_changed = True; total_dropped += 1
            continue

        orig = (xc, yc, bw, bh)

        # ---------- ① 像素坐标判定 ----------
        if is_pixel_coord(xc, yc, bw, bh):
            xc /= iw; yc /= ih; bw /= iw; bh /= ih

        # ---------- ② 转 x1y1x2y2 ----------
        x1, y1 = xc - bw / 2, yc - bh / 2
        x2, y2 = xc + bw / 2, yc + bh / 2

        # ---------- ③ 裁剪 ----------
        x1_n, y1_n, x2_n, y2_n = map(clamp_open, (x1, y1, x2, y2))
        bw_n, bh_n = x2_n - x1_n, y2_n - y1_n
        if bw_n <= 0 or bh_n <= 0:
            print(f"[DROP] 无效框   {lbl_path}:{ln_idx} -> {orig}")
            file_changed = True; total_dropped += 1
            continue

        xc_n, yc_n = (x1_n + x2_n) / 2, (y1_n + y2_n) / 2
        if not (0 < xc_n < 1 and 0 < yc_n < 1):
            print(f"[DROP] 越界中心 {lbl_path}:{ln_idx} -> {orig}")
            file_changed = True; total_dropped += 1
            continue

        # ---------- ④ 若有改动则记录 ----------
        if any(abs(a-b) > 1e-6 for a, b in [(x1, x1_n), (y1, y1_n), (x2, x2_n), (y2, y2_n)]):
            print(f"[FIX ] 裁剪框   {lbl_path}:{ln_idx}  {orig} -> {(xc_n, yc_n, bw_n, bh_n)}")
            file_changed = True; total_fixed += 1

        # ---------- ⑤ 写回 ----------
        fmt = lambda v: f"{math.floor(v * 1e6)/1e6:.6f}"
        new_lines.append(f"{cls} {fmt(xc_n)} {fmt(yc_n)} {fmt(bw_n)} {fmt(bh_n)}\n")

    if file_changed:
        if new_lines:                    # 有剩余合法框
            with open(lbl_path, "w", encoding="utf-8") as f:
                f.writelines(new_lines)
        else:                            # 全部被删光，清空标签文件
            open(lbl_path, "w").close()
        modified_files += 1

print(f"\n✅ 清洗完成：修改 {modified_files} 份标签；修复 {total_fixed} 行，删除 {total_dropped} 行。")

典型输出：

[FIX ] 裁剪超界框 data/xxx.txt:5  (0.95, 1.1, 0.2, 0.3) -> (0.95, 1.0, 0.2, 0.2)
[DROP] 越界框     data/xxx.txt:8  (1.5, 1.5, 0.1, 0.1)
✅ 完成清洗：修改 42 份标签，共修复/删除 108 行。

👉 适用于 从 VOC 或 LabelMe 转换后初步整理的 YOLO 标签。

✅ 2. see_confirm.py —— 单张标签可视化验证

主要功能：

读取单张图片及对应 .txt 标签
在图上绘制每个 YOLO 标注框（默认红色）
（可选）启用“修正框”对比，查看修剪后的标签框

全部代码为：

import cv2
import os

# ==== 输入路径 ====
IMG_PATH = "data/ggc/images/00001504.png"
LBL_PATH = "data/ggc/labels/00001504.txt"
OUT_PATH = "visualized_bbox_fix.png"

# ==== 可视化参数 ====
BOX_COLOR_ORIG = (0, 0, 255)    # 红 - 原始框
BOX_COLOR_FIXED = (0, 255, 0)   # 绿 - 修正后框


def yolo_to_xyxy(xc, yc, w, h, img_w, img_h):
    x1 = int((xc - w / 2) * img_w)
    y1 = int((yc - h / 2) * img_h)
    x2 = int((xc + w / 2) * img_w)
    y2 = int((yc + h / 2) * img_h)
    return x1, y1, x2, y2


def fix_yolo_bbox(xc, yc, w, h):
    # 剪裁 x_min/x_max 到 [0, 1]
    x_min = max(0.0, xc - w / 2)
    x_max = min(1.0, xc + w / 2)
    y_min = max(0.0, yc - h / 2)
    y_max = min(1.0, yc + h / 2)

    w_new = x_max - x_min
    h_new = y_max - y_min
    xc_new = (x_min + x_max) / 2
    yc_new = (y_min + y_max) / 2
    return xc_new, yc_new, w_new, h_new


def main():
    img = cv2.imread(IMG_PATH)
    if img is None:
        print(f"[ERR] 图像读取失败: {IMG_PATH}")
        return
    h, w = img.shape[:2]

    if not os.path.exists(LBL_PATH):
        print(f"[ERR] 标签不存在: {LBL_PATH}")
        return

    with open(LBL_PATH, "r") as f:
        lines = [ln.strip() for ln in f if ln.strip()]

    for idx, line in enumerate(lines):
        parts = line.split()
        cls = int(parts[0])
        xc, yc, bw, bh = map(float, parts[1:5])

        # 原始框
        x1, y1, x2, y2 = yolo_to_xyxy(xc, yc, bw, bh, w, h)
        cv2.rectangle(img, (x1, y1), (x2, y2), BOX_COLOR_ORIG, 2)
        cv2.putText(img, f"Orig {cls}", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, BOX_COLOR_ORIG, 1)

        # 修正框
        # xc2, yc2, bw2, bh2 = fix_yolo_bbox(xc, yc, bw, bh)
        # x1f, y1f, x2f, y2f = yolo_to_xyxy(xc2, yc2, bw2, bh2, w, h)
        # cv2.rectangle(img, (x1f, y1f), (x2f, y2f), BOX_COLOR_FIXED, 2)
        # cv2.putText(img, f"Fix {cls}", (x1f, y1f - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, BOX_COLOR_FIXED, 1)

        # 打印前后对比
        print(f"Box {idx+1} (cls={cls})")
        # print(f"  原始:  xc={xc:.6f}, yc={yc:.6f}, w={bw:.6f}, h={bh:.6f}")
        # print(f"  修正:  xc={xc2:.6f}, yc={yc2:.6f}, w={bw2:.6f}, h={bh2:.6f}")
        print("")

    cv2.imwrite(OUT_PATH, img)
    print(f"✅ 可视化结果保存至: {OUT_PATH}")


if __name__ == "__main__":
    main()

使用方式：
配置 IMG_PATH 和 LBL_PATH，运行后自动输出一张可视化图片。

✅ 可视化结果保存至: visualized_bbox_fix.png

👉 适用于 清洗后 spot check，逐张检查标签效果是否正常。

✅ 3. see_labels.py —— 对比原始框 vs 修正框

主要功能：

传入原始 bbox 和修正 bbox（YOLO 格式）
可视化展示两者在图像中的位置差异
红色框为原始，绿色框为修正后

original_bbox = (0.91, 0.46, 0.41, 0.07)
fixed_bbox    = (0.85, 0.46, 0.29, 0.07)

完整代码为：

import cv2
import os

def yolo_to_xyxy(xc, yc, w, h, img_w, img_h):
    """YOLO格式 -> 像素坐标左上右下"""
    x1 = (xc - w / 2) * img_w
    y1 = (yc - h / 2) * img_h
    x2 = (xc + w / 2) * img_w
    y2 = (yc + h / 2) * img_h
    return int(x1), int(y1), int(x2), int(y2)

def visualize_bbox_fix(img_path, original_bbox, fixed_bbox, save_path="vis_output.jpg"):
    image = cv2.imread(img_path)
    if image is None:
        print(f"图像读取失败: {img_path}")
        return

    ih, iw = image.shape[:2]

    # 原始框
    x1o, y1o, x2o, y2o = yolo_to_xyxy(*original_bbox, iw, ih)
    cv2.rectangle(image, (x1o, y1o), (x2o, y2o), (0, 0, 255), 2)  # 红色框

    # 裁剪后框
    x1f, y1f, x2f, y2f = yolo_to_xyxy(*fixed_bbox, iw, ih)
    cv2.rectangle(image, (x1f, y1f), (x2f, y2f), (0, 255, 0), 2)  # 绿色框

    cv2.putText(image, "original", (x1o, y1o - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
    cv2.putText(image, "fixed", (x1f, y1f - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)

    cv2.imwrite(save_path, image)
    print(f"✅ 保存对比图至：{save_path}")

# 示例调用（用你实际的一张数据）
if __name__ == "__main__":
    image_path = "data/ggc/images/00001085.png"

    # 注意：用的是归一化后的 YOLO 格式 bbox：x_center, y_center, w, h
    original_bbox = (0.910156, 0.466667, 0.410938, 0.070833)
    fixed_bbox    = (0.8523435, 0.466667, 0.295313, 0.070833)

    visualize_bbox_fix(image_path, original_bbox, fixed_bbox, save_path="bbox_fix_vis.jpg")

👉 适用于 单独分析异常样本、比较框修正前后的效果。

✅ 4. zengqiang.py —— 自动数据增强（含标签同步）

主要功能：

图像增强（旋转 ±10° 或 ±180°，亮度扰动，运动模糊）(具体增强效果可以自行修改)
自动同步变换后的标签框，保持正确位置
每张图生成多个增强版本（配置 n_aug_per_img 控制）
输出新的 images 和 labels，格式为 YOLO txt

完整代码为：

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
YOLO 数据集增强脚本
- 必定旋转：随机从 [-10°, 10°] 或 [170°, 190°] 里取一角度
- 亮度调整：-10% ~ +25%，倾向于增亮
- 运动模糊：随机加入以模拟拍摄拖影
输出保持 YOLO txt 格式 (class x y w h)，坐标归一化到 [0,1].
"""

import os
import random
from glob import glob
import traceback

import cv2
import albumentations as A
from tqdm import tqdm  # ✅ 新增

# 原始与输出路径
IMG_DIR      = "data/ggc/images"
LBL_DIR      = "data/ggc/labels"
OUT_IMG_DIR  = "data/ggc2/images"
OUT_LBL_DIR  = "data/ggc2/labels"

os.makedirs(OUT_IMG_DIR, exist_ok=True)
os.makedirs(OUT_LBL_DIR, exist_ok=True)


def build_transform():
    rotate_block = A.OneOf(
        [
            A.Rotate(limit=(-10, 10),   border_mode=cv2.BORDER_CONSTANT, value=0, p=0.7),
            A.Rotate(limit=(170, 190),  border_mode=cv2.BORDER_CONSTANT, value=0, p=0.5),
        ],
        p=0.9,
    )

    tfm = A.Compose(
        [
            rotate_block,
            A.RandomBrightnessContrast(
                brightness_limit=(-0.10, 0.25),
                contrast_limit=(-0.10, 0.10),
                p=0.9,
            ),
            A.MotionBlur(blur_limit=(3, 7), p=0.4),
        ],
        bbox_params=A.BboxParams(
            format="yolo",
            label_fields=["class_labels"],
            min_area=1,
            min_visibility=0.3,
        ),
    )
    return tfm


def load_yolo_labels(label_path):
    boxes, classes = [], []
    with open(label_path, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            parts = line.strip().split()
            cls = int(parts[0])
            bbox = list(map(float, parts[1:5]))
            classes.append(cls)
            boxes.append(bbox)
    return boxes, classes


def save_yolo_labels(label_path, boxes, classes):
    with open(label_path, "w", encoding="utf-8") as f:
        for cls, bbox in zip(classes, boxes):
            line = f"{cls} " + " ".join(f"{v:.6f}" for v in bbox)
            f.write(line + "\n")


def main(n_aug_per_img: int = 1):
    transform = build_transform()

    img_paths = sorted(glob(os.path.join(IMG_DIR, "*.*")))
    supported = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}

    print(f"开始增强，共 {len(img_paths)} 张图像，每张增强 {n_aug_per_img} 次。")

    for img_path in tqdm(img_paths, desc="增强中"):
        ext = os.path.splitext(img_path)[1].lower()
        if ext not in supported:
            continue

        file_name = os.path.basename(img_path)
        name_no_ext, _ = os.path.splitext(file_name)
        label_path = os.path.join(LBL_DIR, f"{name_no_ext}.txt")

        if not os.path.exists(label_path):
            print(f"[WARN] 标签缺失，跳过：{label_path}")
            continue

        image = cv2.imread(img_path)
        if image is None:
            print(f"[WARN] 图像读取失败：{img_path}")
            continue

        try:
            for i in range(n_aug_per_img):
                bboxes, class_labels = load_yolo_labels(label_path)
                if not bboxes:
                    print(f"[WARN] 标签为空，跳过：{label_path}")
                    break

                aug = transform(image=image, bboxes=bboxes, class_labels=class_labels)
                aug_img = aug["image"]
                aug_bboxes = aug["bboxes"]
                aug_labels = aug["class_labels"]

                if not aug_bboxes:
                    print(f"[INFO] 增强后所有框被裁剪，跳过：{label_path}")
                    continue

                out_img_name = f"{name_no_ext}_aug{i}.jpg"
                out_lbl_name = f"{name_no_ext}_aug{i}.txt"

                cv2.imwrite(os.path.join(OUT_IMG_DIR, out_img_name), aug_img)
                save_yolo_labels(
                    os.path.join(OUT_LBL_DIR, out_lbl_name),
                    aug_bboxes,
                    aug_labels,
                )

        except Exception as e:
            print(f"\n[ERROR] 增强失败：{img_path}")
            print(f"对应标签文件：{label_path}")
            traceback.print_exc()
            continue

    print("\n✅ 数据增强完成！")


if __name__ == "__main__":
    main(n_aug_per_img=5)

结果为：

开始增强，共 180 张图像，每张增强 5 次。
✅ 数据增强完成！

👉 适用于 数据量少、类别不平衡时，通过变换提升模型泛化能力。

✅ 5. huafen.py —— 划分训练集 / 验证集

主要功能：

按设定比例随机划分训练集 / 验证集
过滤无标签图片
自动创建标准 YOLO 目录结构
复制图像与标签到 train/ 与 val/ 下

完整代码为：

import os
import glob
import shutil
import random

# ============ 源路径配置 ============
IMAGES_DIR = "data/ggc2/images"
LABELS_DIR = "data/ggc2/labels"

# ============ 新数据集输出路径 ============
OUTPUT_DIR = "data/ggcxin"

# ============ 参数 ============
train_ratio = 0.8
seed = 42
img_exts = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")

# ============ 创建输出目录结构 ============
for subset in ("train", "val"):
    os.makedirs(os.path.join(OUTPUT_DIR, subset, "images"), exist_ok=True)
    os.makedirs(os.path.join(OUTPUT_DIR, subset, "labels"), exist_ok=True)

# ============ 获取所有图片 ============
all_imgs = []
for ext in img_exts:
    all_imgs.extend(glob.glob(os.path.join(IMAGES_DIR, f"*{ext}")))
all_imgs = sorted(all_imgs)

print(f"共找到 {len(all_imgs)} 张图像")

# ============ 过滤无标签的 ============
valid_pairs = []
for img_path in all_imgs:
    base = os.path.splitext(os.path.basename(img_path))[0]
    label_path = os.path.join(LABELS_DIR, base + ".txt")
    if os.path.isfile(label_path):
        valid_pairs.append((img_path, label_path))
    else:
        print(f"[WARN] 缺少标签文件：{label_path}，跳过此图")

print(f"有效样本数：{len(valid_pairs)}")

# ============ 随机划分 ============
random.seed(seed)
random.shuffle(valid_pairs)

split_idx = int(len(valid_pairs) * train_ratio)
train_set = valid_pairs[:split_idx]
val_set   = valid_pairs[split_idx:]

print(f"→ 训练集：{len(train_set)} | 验证集：{len(val_set)}")

# ============ 拷贝图像/标签 ============
def copy_pair(pair_list, subset_name):
    for img_path, lbl_path in pair_list:
        img_dst = os.path.join(OUTPUT_DIR, subset_name, "images", os.path.basename(img_path))
        lbl_dst = os.path.join(OUTPUT_DIR, subset_name, "labels", os.path.basename(lbl_path))
        shutil.copy2(img_path, img_dst)
        shutil.copy2(lbl_path, lbl_dst)

copy_pair(train_set, "train")
copy_pair(val_set, "val")

print("✅ 数据集划分完成")
print(f"输出目录结构: {OUTPUT_DIR}/train|val/images+labels")

输出示例：

共找到 720 张图像
有效样本数：680
→ 训练集：544 | 验证集：136
✅ 数据集划分完成

输出结构如下：

data/
└── ggcxin/
    ├── train/
    │   ├── images/
    │   └── labels/
    └── val/
        ├── images/
        └── labels/

👉 适用于 构建可直接用于 YOLO 训练的数据结构。

🔁 推荐使用流程

以下是建议的执行顺序：

✅ 清洗标签：clean_labels.py
🔍 抽查可视化：see_confirm.py + see_labels.py
🔄 数据增强：zengqiang.py
🧩 划分数据集：huafen.py

🛠 你可以根据项目需要自定义增强策略、标签过滤规则或类名映射逻辑。