在实际目标检测项目中,原始数据往往不完美,常伴随着以下问题:
-
标签格式混乱、坐标越界或未归一化
-
标注框不合理、图像缺失或标签对不上
-
数据分布不均,训练效果差
-
缺乏可视化验证和增强样本
为此,本文总结了一套 完整的 YOLO 数据预处理脚本工具链(共5个),涵盖从清洗、可视化、增强到数据划分的全部过程,每个脚本职责单一、配合高效。
✅ 1. clean_labels.py —— 清洗标签,修复坐标归一化问题
主要功能:
-
自动识别坐标是否未归一化(像素值),并进行归一化处理
-
检测并裁剪越界框,超出范围会自动裁剪到 0~1
-
坐标错误、无效框(宽/高 ≤ 0)自动删除
-
打印所有被修复或删除的框及所在文件/行号
全部代码为:
import os, cv2, glob, math, sys
IMG_DIR = "data/ggcxin-heibai/images"
LBL_DIR = "data/ggcxin-heibai/labels"
def clamp_open(v, lo=1e-6, hi=1 - 1e-6): # 开区间 (0,1)
return max(lo, min(hi, v))
def is_pixel_coord(xc, yc, bw, bh, thresh=1.5):
"""若最大值≫1 认为是像素坐标"""
return max(xc, yc, bw, bh) > thresh
modified_files = 0
total_fixed = 0
total_dropped = 0
for img_path in glob.glob(os.path.join(IMG_DIR, "*.*")):
img = cv2.imread(img_path)
if img is None:
continue
ih, iw = img.shape[:2]
stem = os.path.splitext(os.path.basename(img_path))[0]
lbl_path = os.path.join(LBL_DIR, stem + ".txt")
if not os.path.exists(lbl_path):
continue
new_lines, file_changed = [], False
with open(lbl_path, "r", encoding="utf-8") as f:
lines = f.readlines()
for ln_idx, ln in enumerate(lines, 1):
ln = ln.strip()
if not ln:
continue
try:
cls, xc, yc, bw, bh = ln.split()[:5]
cls = int(cls); xc, yc, bw, bh = map(float, (xc, yc, bw, bh))
except ValueError:
print(f"[DROP] 解析失败 {lbl_path}:{ln_idx} -> {ln}")
file_changed = True; total_dropped += 1
continue
orig = (xc, yc, bw, bh)
# ---------- ① 像素坐标判定 ----------
if is_pixel_coord(xc, yc, bw, bh):
xc /= iw; yc /= ih; bw /= iw; bh /= ih
# ---------- ② 转 x1y1x2y2 ----------
x1, y1 = xc - bw / 2, yc - bh / 2
x2, y2 = xc + bw / 2, yc + bh / 2
# ---------- ③ 裁剪 ----------
x1_n, y1_n, x2_n, y2_n = map(clamp_open, (x1, y1, x2, y2))
bw_n, bh_n = x2_n - x1_n, y2_n - y1_n
if bw_n <= 0 or bh_n <= 0:
print(f"[DROP] 无效框 {lbl_path}:{ln_idx} -> {orig}")
file_changed = True; total_dropped += 1
continue
xc_n, yc_n = (x1_n + x2_n) / 2, (y1_n + y2_n) / 2
if not (0 < xc_n < 1 and 0 < yc_n < 1):
print(f"[DROP] 越界中心 {lbl_path}:{ln_idx} -> {orig}")
file_changed = True; total_dropped += 1
continue
# ---------- ④ 若有改动则记录 ----------
if any(abs(a-b) > 1e-6 for a, b in [(x1, x1_n), (y1, y1_n), (x2, x2_n), (y2, y2_n)]):
print(f"[FIX ] 裁剪框 {lbl_path}:{ln_idx} {orig} -> {(xc_n, yc_n, bw_n, bh_n)}")
file_changed = True; total_fixed += 1
# ---------- ⑤ 写回 ----------
fmt = lambda v: f"{math.floor(v * 1e6)/1e6:.6f}"
new_lines.append(f"{cls} {fmt(xc_n)} {fmt(yc_n)} {fmt(bw_n)} {fmt(bh_n)}\n")
if file_changed:
if new_lines: # 有剩余合法框
with open(lbl_path, "w", encoding="utf-8") as f:
f.writelines(new_lines)
else: # 全部被删光,清空标签文件
open(lbl_path, "w").close()
modified_files += 1
print(f"\n✅ 清洗完成:修改 {modified_files} 份标签;修复 {total_fixed} 行,删除 {total_dropped} 行。")
典型输出:
[FIX ] 裁剪超界框 data/xxx.txt:5 (0.95, 1.1, 0.2, 0.3) -> (0.95, 1.0, 0.2, 0.2)
[DROP] 越界框 data/xxx.txt:8 (1.5, 1.5, 0.1, 0.1)
✅ 完成清洗:修改 42 份标签,共修复/删除 108 行。
👉 适用于 从 VOC 或 LabelMe 转换后初步整理的 YOLO 标签。
✅ 2. see_confirm.py —— 单张标签可视化验证
主要功能:
-
读取单张图片及对应
.txt
标签 -
在图上绘制每个 YOLO 标注框(默认红色)
-
(可选)启用“修正框”对比,查看修剪后的标签框
全部代码为:
import cv2
import os
# ==== 输入路径 ====
IMG_PATH = "data/ggc/images/00001504.png"
LBL_PATH = "data/ggc/labels/00001504.txt"
OUT_PATH = "visualized_bbox_fix.png"
# ==== 可视化参数 ====
BOX_COLOR_ORIG = (0, 0, 255) # 红 - 原始框
BOX_COLOR_FIXED = (0, 255, 0) # 绿 - 修正后框
def yolo_to_xyxy(xc, yc, w, h, img_w, img_h):
x1 = int((xc - w / 2) * img_w)
y1 = int((yc - h / 2) * img_h)
x2 = int((xc + w / 2) * img_w)
y2 = int((yc + h / 2) * img_h)
return x1, y1, x2, y2
def fix_yolo_bbox(xc, yc, w, h):
# 剪裁 x_min/x_max 到 [0, 1]
x_min = max(0.0, xc - w / 2)
x_max = min(1.0, xc + w / 2)
y_min = max(0.0, yc - h / 2)
y_max = min(1.0, yc + h / 2)
w_new = x_max - x_min
h_new = y_max - y_min
xc_new = (x_min + x_max) / 2
yc_new = (y_min + y_max) / 2
return xc_new, yc_new, w_new, h_new
def main():
img = cv2.imread(IMG_PATH)
if img is None:
print(f"[ERR] 图像读取失败: {IMG_PATH}")
return
h, w = img.shape[:2]
if not os.path.exists(LBL_PATH):
print(f"[ERR] 标签不存在: {LBL_PATH}")
return
with open(LBL_PATH, "r") as f:
lines = [ln.strip() for ln in f if ln.strip()]
for idx, line in enumerate(lines):
parts = line.split()
cls = int(parts[0])
xc, yc, bw, bh = map(float, parts[1:5])
# 原始框
x1, y1, x2, y2 = yolo_to_xyxy(xc, yc, bw, bh, w, h)
cv2.rectangle(img, (x1, y1), (x2, y2), BOX_COLOR_ORIG, 2)
cv2.putText(img, f"Orig {cls}", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, BOX_COLOR_ORIG, 1)
# 修正框
# xc2, yc2, bw2, bh2 = fix_yolo_bbox(xc, yc, bw, bh)
# x1f, y1f, x2f, y2f = yolo_to_xyxy(xc2, yc2, bw2, bh2, w, h)
# cv2.rectangle(img, (x1f, y1f), (x2f, y2f), BOX_COLOR_FIXED, 2)
# cv2.putText(img, f"Fix {cls}", (x1f, y1f - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, BOX_COLOR_FIXED, 1)
# 打印前后对比
print(f"Box {idx+1} (cls={cls})")
# print(f" 原始: xc={xc:.6f}, yc={yc:.6f}, w={bw:.6f}, h={bh:.6f}")
# print(f" 修正: xc={xc2:.6f}, yc={yc2:.6f}, w={bw2:.6f}, h={bh2:.6f}")
print("")
cv2.imwrite(OUT_PATH, img)
print(f"✅ 可视化结果保存至: {OUT_PATH}")
if __name__ == "__main__":
main()
使用方式:
配置 IMG_PATH
和 LBL_PATH
,运行后自动输出一张可视化图片。
✅ 可视化结果保存至: visualized_bbox_fix.png
👉 适用于 清洗后 spot check,逐张检查标签效果是否正常。
✅ 3. see_labels.py —— 对比原始框 vs 修正框
主要功能:
-
传入原始 bbox 和修正 bbox(YOLO 格式)
-
可视化展示两者在图像中的位置差异
-
红色框为原始,绿色框为修正后
original_bbox = (0.91, 0.46, 0.41, 0.07)
fixed_bbox = (0.85, 0.46, 0.29, 0.07)
完整代码为:
import cv2
import os
def yolo_to_xyxy(xc, yc, w, h, img_w, img_h):
"""YOLO格式 -> 像素坐标左上右下"""
x1 = (xc - w / 2) * img_w
y1 = (yc - h / 2) * img_h
x2 = (xc + w / 2) * img_w
y2 = (yc + h / 2) * img_h
return int(x1), int(y1), int(x2), int(y2)
def visualize_bbox_fix(img_path, original_bbox, fixed_bbox, save_path="vis_output.jpg"):
image = cv2.imread(img_path)
if image is None:
print(f"图像读取失败: {img_path}")
return
ih, iw = image.shape[:2]
# 原始框
x1o, y1o, x2o, y2o = yolo_to_xyxy(*original_bbox, iw, ih)
cv2.rectangle(image, (x1o, y1o), (x2o, y2o), (0, 0, 255), 2) # 红色框
# 裁剪后框
x1f, y1f, x2f, y2f = yolo_to_xyxy(*fixed_bbox, iw, ih)
cv2.rectangle(image, (x1f, y1f), (x2f, y2f), (0, 255, 0), 2) # 绿色框
cv2.putText(image, "original", (x1o, y1o - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
cv2.putText(image, "fixed", (x1f, y1f - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)
cv2.imwrite(save_path, image)
print(f"✅ 保存对比图至:{save_path}")
# 示例调用(用你实际的一张数据)
if __name__ == "__main__":
image_path = "data/ggc/images/00001085.png"
# 注意:用的是归一化后的 YOLO 格式 bbox:x_center, y_center, w, h
original_bbox = (0.910156, 0.466667, 0.410938, 0.070833)
fixed_bbox = (0.8523435, 0.466667, 0.295313, 0.070833)
visualize_bbox_fix(image_path, original_bbox, fixed_bbox, save_path="bbox_fix_vis.jpg")
👉 适用于 单独分析异常样本、比较框修正前后的效果。
✅ 4. zengqiang.py —— 自动数据增强(含标签同步)
主要功能:
-
图像增强(旋转 ±10° 或 ±180°,亮度扰动,运动模糊)(具体增强效果可以自行修改)
-
自动同步变换后的标签框,保持正确位置
-
每张图生成多个增强版本(配置
n_aug_per_img
控制) -
输出新的
images
和labels
,格式为 YOLO txt
完整代码为:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
YOLO 数据集增强脚本
- 必定旋转:随机从 [-10°, 10°] 或 [170°, 190°] 里取一角度
- 亮度调整:-10% ~ +25%,倾向于增亮
- 运动模糊:随机加入以模拟拍摄拖影
输出保持 YOLO txt 格式 (class x y w h),坐标归一化到 [0,1].
"""
import os
import random
from glob import glob
import traceback
import cv2
import albumentations as A
from tqdm import tqdm # ✅ 新增
# 原始与输出路径
IMG_DIR = "data/ggc/images"
LBL_DIR = "data/ggc/labels"
OUT_IMG_DIR = "data/ggc2/images"
OUT_LBL_DIR = "data/ggc2/labels"
os.makedirs(OUT_IMG_DIR, exist_ok=True)
os.makedirs(OUT_LBL_DIR, exist_ok=True)
def build_transform():
rotate_block = A.OneOf(
[
A.Rotate(limit=(-10, 10), border_mode=cv2.BORDER_CONSTANT, value=0, p=0.7),
A.Rotate(limit=(170, 190), border_mode=cv2.BORDER_CONSTANT, value=0, p=0.5),
],
p=0.9,
)
tfm = A.Compose(
[
rotate_block,
A.RandomBrightnessContrast(
brightness_limit=(-0.10, 0.25),
contrast_limit=(-0.10, 0.10),
p=0.9,
),
A.MotionBlur(blur_limit=(3, 7), p=0.4),
],
bbox_params=A.BboxParams(
format="yolo",
label_fields=["class_labels"],
min_area=1,
min_visibility=0.3,
),
)
return tfm
def load_yolo_labels(label_path):
boxes, classes = [], []
with open(label_path, "r", encoding="utf-8") as f:
for line in f:
if not line.strip():
continue
parts = line.strip().split()
cls = int(parts[0])
bbox = list(map(float, parts[1:5]))
classes.append(cls)
boxes.append(bbox)
return boxes, classes
def save_yolo_labels(label_path, boxes, classes):
with open(label_path, "w", encoding="utf-8") as f:
for cls, bbox in zip(classes, boxes):
line = f"{cls} " + " ".join(f"{v:.6f}" for v in bbox)
f.write(line + "\n")
def main(n_aug_per_img: int = 1):
transform = build_transform()
img_paths = sorted(glob(os.path.join(IMG_DIR, "*.*")))
supported = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}
print(f"开始增强,共 {len(img_paths)} 张图像,每张增强 {n_aug_per_img} 次。")
for img_path in tqdm(img_paths, desc="增强中"):
ext = os.path.splitext(img_path)[1].lower()
if ext not in supported:
continue
file_name = os.path.basename(img_path)
name_no_ext, _ = os.path.splitext(file_name)
label_path = os.path.join(LBL_DIR, f"{name_no_ext}.txt")
if not os.path.exists(label_path):
print(f"[WARN] 标签缺失,跳过:{label_path}")
continue
image = cv2.imread(img_path)
if image is None:
print(f"[WARN] 图像读取失败:{img_path}")
continue
try:
for i in range(n_aug_per_img):
bboxes, class_labels = load_yolo_labels(label_path)
if not bboxes:
print(f"[WARN] 标签为空,跳过:{label_path}")
break
aug = transform(image=image, bboxes=bboxes, class_labels=class_labels)
aug_img = aug["image"]
aug_bboxes = aug["bboxes"]
aug_labels = aug["class_labels"]
if not aug_bboxes:
print(f"[INFO] 增强后所有框被裁剪,跳过:{label_path}")
continue
out_img_name = f"{name_no_ext}_aug{i}.jpg"
out_lbl_name = f"{name_no_ext}_aug{i}.txt"
cv2.imwrite(os.path.join(OUT_IMG_DIR, out_img_name), aug_img)
save_yolo_labels(
os.path.join(OUT_LBL_DIR, out_lbl_name),
aug_bboxes,
aug_labels,
)
except Exception as e:
print(f"\n[ERROR] 增强失败:{img_path}")
print(f"对应标签文件:{label_path}")
traceback.print_exc()
continue
print("\n✅ 数据增强完成!")
if __name__ == "__main__":
main(n_aug_per_img=5)
结果为:
开始增强,共 180 张图像,每张增强 5 次。
✅ 数据增强完成!
👉 适用于 数据量少、类别不平衡时,通过变换提升模型泛化能力。
✅ 5. huafen.py —— 划分训练集 / 验证集
主要功能:
-
按设定比例随机划分训练集 / 验证集
-
过滤无标签图片
-
自动创建标准 YOLO 目录结构
-
复制图像与标签到
train/
与val/
下
完整代码为:
import os
import glob
import shutil
import random
# ============ 源路径配置 ============
IMAGES_DIR = "data/ggc2/images"
LABELS_DIR = "data/ggc2/labels"
# ============ 新数据集输出路径 ============
OUTPUT_DIR = "data/ggcxin"
# ============ 参数 ============
train_ratio = 0.8
seed = 42
img_exts = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")
# ============ 创建输出目录结构 ============
for subset in ("train", "val"):
os.makedirs(os.path.join(OUTPUT_DIR, subset, "images"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, subset, "labels"), exist_ok=True)
# ============ 获取所有图片 ============
all_imgs = []
for ext in img_exts:
all_imgs.extend(glob.glob(os.path.join(IMAGES_DIR, f"*{ext}")))
all_imgs = sorted(all_imgs)
print(f"共找到 {len(all_imgs)} 张图像")
# ============ 过滤无标签的 ============
valid_pairs = []
for img_path in all_imgs:
base = os.path.splitext(os.path.basename(img_path))[0]
label_path = os.path.join(LABELS_DIR, base + ".txt")
if os.path.isfile(label_path):
valid_pairs.append((img_path, label_path))
else:
print(f"[WARN] 缺少标签文件:{label_path},跳过此图")
print(f"有效样本数:{len(valid_pairs)}")
# ============ 随机划分 ============
random.seed(seed)
random.shuffle(valid_pairs)
split_idx = int(len(valid_pairs) * train_ratio)
train_set = valid_pairs[:split_idx]
val_set = valid_pairs[split_idx:]
print(f"→ 训练集:{len(train_set)} | 验证集:{len(val_set)}")
# ============ 拷贝图像/标签 ============
def copy_pair(pair_list, subset_name):
for img_path, lbl_path in pair_list:
img_dst = os.path.join(OUTPUT_DIR, subset_name, "images", os.path.basename(img_path))
lbl_dst = os.path.join(OUTPUT_DIR, subset_name, "labels", os.path.basename(lbl_path))
shutil.copy2(img_path, img_dst)
shutil.copy2(lbl_path, lbl_dst)
copy_pair(train_set, "train")
copy_pair(val_set, "val")
print("✅ 数据集划分完成")
print(f"输出目录结构: {OUTPUT_DIR}/train|val/images+labels")
输出示例:
共找到 720 张图像
有效样本数:680
→ 训练集:544 | 验证集:136
✅ 数据集划分完成
输出结构如下:
data/
└── ggcxin/
├── train/
│ ├── images/
│ └── labels/
└── val/
├── images/
└── labels/
👉 适用于 构建可直接用于 YOLO 训练的数据结构。
🔁 推荐使用流程
以下是建议的执行顺序:
-
✅ 清洗标签:
clean_labels.py
-
🔍 抽查可视化:
see_confirm.py
+see_labels.py
-
🔄 数据增强:
zengqiang.py
-
🧩 划分数据集:
huafen.py
🛠 你可以根据项目需要自定义增强策略、标签过滤规则或类名映射逻辑。