人工智能学习66-Yolo标签数据生成-CSDN博客

本文链接：https://blog.csdn.net/qixiang_chen/article/details/149141748

人工智能学习66-Yolo标签数据生成—快手视频
 人工智能学习67-Yolo标签数据生成—快手视频
 人工智能学习68-Yolo标签数据生成—快手视频
 人工智能学习69-Yolo标签数据生成—快手视频

Yolo算法的标签数据

根据样本标签数据生成Yolo网络训练时使用image_data，y_true数据。
image_data是图片二进制编码数据，数据格式(n,w,h,c)=(16, 416, 416, 3)。其中16是图片数量，每次取16幅图片用于训练网络模型，416*416分别是图片宽度和高度，3代表图片的RGBs三个颜色通道。

y_true是标签数据，数据格式(n,w,h,boxes,classes)=(16,13,13,3,85)、(16,26,26,3,85)、(16,52,52,3,85)。其中16是图片数量，训练样本每幅图片都有标签值，1313、2626、52*52代表三个不同大小的特征图，boxes=3代表每个尺寸的特征图设定3个先验框box，先验框配置文件在/model_data/yolo_anchors.txt中设置。Classes=85代表80类物体种类和坐标信息，前五个元素分别是物体x、y、w、h、confidence，后80个元素是80类物体其中之一的one-hot编码，具体数据格式如下：
在这里插入图片描述

样本提供类

dataloader.py

import math
from random import shuffle

import cv2
import keras
import numpy as np
from PIL import Image

from utils import cvtColor, preprocess_input


class YoloDatasets(keras.utils.Sequence):
    def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, anchors_mask, train):
        self.annotation_lines = annotation_lines
        self.length = len(self.annotation_lines)

        self.input_shape = input_shape
        self.anchors = anchors
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.anchors_mask = anchors_mask
        self.train = train

    def __len__(self):
        return math.ceil(len(self.annotation_lines) / float(self.batch_size))

    def __getitem__(self, index):
        # index: position of the batch in the Sequence.
        image_data = []
        box_data = []
        for i in range(index * self.batch_size, (index + 1) * self.batch_size):
            # 数据计数取self.length的模,保证数据计数没有超出self.length
            i = i % self.length
            # ---------------------------------------------------#
            #   训练时进行数据的随机增强
            #   验证时不进行数据的随机增强
            #   self.annotation_lines[i] 数据文件2007_train.txt每行数据
            # ---------------------------------------------------#
            # 返回图片数据和预测框信息,box为100维的数组,image是转化后添加灰度条的图片
            image, box = self.get_random_data(self.annotation_lines[i], self.input_shape, random=self.train)
            # 将图片数据image数据归一化,是batch_size=16维对集合
            image_data.append(preprocess_input(np.array(image, np.float32)))
            # 将预测框信息添加到集合box_data,是batch_size=16维对集合
            box_data.append(box)
        image_data = np.array(image_data)  # python列表转换为np.array数组,第一维为batch_size
        box_data = np.array(box_data)  # python列表转换为np.array数组,第一维为batch_size,第二维为100
        # 返回深度学习Label导师数据y_true
        # print("dataloader.py box_data={},line={}".format(box_data, self.annotation_lines[i]))
        y_true = self.preprocess_true_boxes(box_data, self.input_shape, self.anchors, self.num_classes,
                                            self.annotation_lines[i])
        # 将image_data, *y_true打包成新的列表
        return [image_data, *y_true], np.zeros(self.batch_size)

    def on_epoch_end(self):
        shuffle(self.annotation_lines)

    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

    # 返回图片数据和预测框信息绝对坐标值
    def get_random_data(self, annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=0.7, val=0.4,
                        random=True):
        line = annotation_line.split()  # 文件2007_train.txt每行数据格式
        # ------------------------------#
        #   读取图像并转换成RGB图像
        # ------------------------------#
        image = Image.open(line[0])  # 图片文件目录
        image = cvtColor(image)
        # ------------------------------#
        #   获得图像的高宽与目标高宽
        # ------------------------------#
        iw, ih = image.size  # 目录中图标大小
        h, w = input_shape  # [416, 416]
        # ------------------------------#
        #   获得标注的真实框
        #   box是二维数组
        # ------------------------------#
        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])  # 每个预测框
        if not random:  # 非训练分支
            scale = min(w / iw, h / ih)  # 取最小的宽高比例
            nw = int(iw * scale)  # 转化图片宽度
            nh = int(ih * scale)  # 转化图片高度
            dx = (w - nw) // 2  # 转化图片宽度与416差值的一半
            dy = (h - nh) // 2  # 转化图片高度与416差值的一半,dx,dy其一为0

            # ---------------------------------#
            #   将图像多余的部分加上灰条
            # ---------------------------------#
            image = image.resize((nw, nh), Image.BICUBIC)  # 转化图片
            new_image = Image.new('RGB', (w, h), (128, 128, 128))
            new_image.paste(image, (dx, dy))  # 将图片image复制到new_image,左上角坐标(dx,dy)
            image_data = np.array(new_image, np.float32)  # 新图像数据

            # ---------------------------------#
            #   对真实框进行调整
            # ---------------------------------#
            box_data = np.zeros((max_boxes, 5))  # 返回填0数组,填充100*5二维数组
            if len(box) > 0:
                np.random.shuffle(box)
                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx  # 根据转化图片宽度比例,调整预测框起始横坐标x和宽度w
                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy  # 根据转化图片高度比例,调整预测框起始纵坐标y和高度h
                box[:, 0:2][box[:, 0:2] < 0] = 0
                box[:, 2][box[:, 2] > w] = w
                box[:, 3][box[:, 3] > h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w > 1, box_h > 1)]
                if len(box) > max_boxes:
                    box = box[:max_boxes]
                box_data[:len(box)] = box  # 将转换后矩形框坐标填充到box_data空数组中
            return image_data, box_data

        # ------------------------------------------#
        #   训练样本需要对图片转换和增强
        #   对图像进行缩放并且进行长和宽的扭曲
        # ------------------------------------------#
        new_ar = iw / ih * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2) #随机生成比例
        if new_ar < 1: #输入图片宽度小于高度
            nh = int(scale * h)
            nw = int(nh * new_ar)
        else: #输入图片宽带大于高度
            nw = int(scale * w)
            nh = int(nw / new_ar)
        #转化图片尺寸
        image = image.resize((nw, nh), Image.BICUBIC)

        # ------------------------------------------#
        #   将图像多余的部分加上灰条
        # ------------------------------------------#
        dx = int(self.rand(0, w - nw))
        dy = int(self.rand(0, h - nh))
        new_image = Image.new('RGB', (w, h), (128, 128, 128))
        new_image.paste(image, (dx, dy))
        image = new_image

        # ------------------------------------------#
        #   翻转图像
        # ------------------------------------------#
        flip = self.rand() < .5
        if flip:
            image = image.transpose(Image.FLIP_LEFT_RIGHT)

        image_data = np.array(image, np.uint8)
        # ---------------------------------#
        #   对图像进行色域变换
        #   计算色域变换的参数
        # ---------------------------------#
        r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
        # ---------------------------------#
        #   将图像转到HSV上
        # ---------------------------------#
        hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
        dtype = image_data.dtype
        # ---------------------------------#
        #   应用变换
        # ---------------------------------#
        x = np.arange(0, 256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)

        # ---------------------------------#
        #   对真实框进行调整
        # ---------------------------------#
        box_data = np.zeros((max_boxes, 5)) #生成(100,5)的赋值为0矩阵
        if len(box) > 0:
            np.random.shuffle(box)
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx #图片x,w 分别调整nw/iw倍+dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy #图片y,h 分别调整nh/ih倍+dy
            if flip: #如果flip=True,x=w-box[:,2],w = w=box[:,0]
                box[:, [0, 2]] = w - box[:, [2, 0]]
            box[:, 0:2][box[:, 0:2] < 0] = 0
            #box[:, 2] 返回一维数组
            #box[:, 2][box[:, 2] > w]返回一维数组中元素>w的索引
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box
            if len(box) > max_boxes:
                box = box[:max_boxes]
            box_data[:len(box)] = box #将box赋值到box_data前面几个元素
            #box[:,0],box[:,1]为真实框左上角坐标x,y。返回数据都是调整后的绝对坐标值
        return image_data, box_data

    # 返回深度学习Label导师数据y_true
    # true_boxes是真实框绝对坐标信息,每张图片中100个真实框,但存在xywh的数据的真实框数量不多
    def preprocess_true_boxes(self, true_boxes, input_shape, anchors, num_classes, line):
        assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes'
        # -----------------------------------------------------------#
        #   获得框的坐标和图片的大小
        #   true_boxes= (8, 100, 5) 第一维为batch_size,第二维为box的数量,最大为100,第三维为xywhc,c为物体分类序号
        # -----------------------------------------------------------#
        true_boxes = np.array(true_boxes, dtype='float32')
        input_shape = np.array(input_shape, dtype='int32')  # 转换为np.array int32类型数组
        # -----------------------------------------------------------#
        #   一共有三个特征层数
        # -----------------------------------------------------------#
        num_layers = len(self.anchors_mask)
        # -----------------------------------------------------------#
        #   m为图片数量，true_boxes.shape[0]集合中元素的个数
        #   分别获取三个特征层大小,grid_shapes为网格的shape
        #   13,13 26,26, 52,52
        #   416/32=13;416/16=26;416/8=52
        # -----------------------------------------------------------#
        m = true_boxes.shape[0]  # 第一维是图片数量,第一维为batch_size
        grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
        # grid_shapes是集合类型 H*W,包含3个元素[array([13, 13], dtype=int32), array([26, 26], dtype=int32), array([52, 52], dtype=int32)]
        # -----------------------------------------------------------#
        #   循环三个特征层,返回各个特征层的y_true的格式为,都赋值0
        #   (m,13,13,3,85)
        #   (m,26,26,3,85)
        #   (m,52,52,3,85)
        #   13*13 26*26 52*52为特征图宽与高,3为三个真实框,85为xywhc + 80类物体one-hot编码,其中c为真实框中是否存在物体
        #   y_true[1]=(16, 26, 26, 3, 25),y_true[2]=(16, 52, 52, 3, 25)
        # -----------------------------------------------------------#
        y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(self.anchors_mask[l]), 5 + num_classes),
                           dtype='float32') for l in range(num_layers)]
        # -----------------------------------------------------------#
        #   文件2007_train.txt给出真实框的坐标是左上角和右下角两个坐标点
        #   通过计算获得真实框的中心和宽高
        #   中心点(m,n,2) 宽高(m,n,2)
        # -----------------------------------------------------------#
        boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2  # 计算真实框中心坐标
        boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]  # 计算真实框宽高
        # -----------------------------------------------------------#
        #   将真实框xywh归一化到小数形式
        #   x,y,w,h都是通过除以input_shape归一化数据
        # -----------------------------------------------------------#
        true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
        true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]

        # -----------------------------------------------------------#
        #   [9,2] -> [?,9,2]
        #   [0,0] 获得anchor_maxes右下角，anchor_mins左上角
        # -----------------------------------------------------------#
        anchors = np.expand_dims(anchors, 0)  # 将先验框anchors扩展第一维为batch_size
        # anchors=[ [[ 10.  13.] [ 16.  30.] [ 33.  23.] [ 30.  61.] [ 62.  45.] [ 59. 119.] [116.  90.] [156. 198.] [373. 326.]] ]
        anchor_maxes = anchors / 2.  # 获取先验框宽高的一半
        # anchor_maxes=[ [[  5.    6.5][  8.   15. ][ 16.5  11.5][ 15.   30.5][ 31.   22.5][ 29.5  59.5][ 58.   45. ][ 78.   99. ][186.5 163. ]] ]
        anchor_mins = - anchor_maxes  # 获取负值作为最小宽高
        # anchor_mins=[ [[  -5.    -6.5][  -8.   -15. ][ -16.5  -11.5][ -15.   -30.5][ -31.   -22.5][ -29.5  -59.5][ -58.   -45. ][ -78.   -99. ][-186.5 -163. ]] ]

        # -----------------------------------------------------------#
        #   长宽要大于0才有效 type(valid_mask)=valid_mask=<class 'numpy.ndarray'>
        #   len(valid_mask)=16
        # -----------------------------------------------------------#
        valid_mask = boxes_wh[..., 0] > 0 #返回一维的布尔矩阵,w>0返回True,否则返回False

        for b in range(m):  # 遍历每一张图片 m = batch_size = 16
            # -----------------------------------------------------------#
            #   对每一张图进行处理
            # -----------------------------------------------------------#
            wh = boxes_wh[b, valid_mask[b]]  # 使用布尔矩阵获取数据,过滤w,h>0的真实框
            if len(wh) == 0: #如果此图片没有标注真实框数据,过滤掉
                continue
            # -----------------------------------------------------------#
            #   [n,2] -> [n,1,2]
            #   [0,0] 获得box_maxes右下角，box_mins左上角
            # -----------------------------------------------------------#
            wh = np.expand_dims(wh, -2)  # wh第一维为batch_size,第二维为wh,在倒数第二维添加一维,为了方便与anchor_mins,anchor_maxes矩阵运算
            box_maxes = wh / 2.  # 获取原宽高的一半 box_maxes形如[[[115. 68.5]]]
            box_mins = - box_maxes  # 获取负值作为最小宽高 box_mins形如[[[-115. -68.5]]]

            # -----------------------------------------------------------#
            #   计算所有真实框和先验框的交并比
            #   intersect_area  [n,9]
            #   box_area        [n,1]
            #   anchor_area     [1,9]
            #   iou             [n,9]
            #   n个真实框，和9个先验框，他们的重合程度。
            # -----------------------------------------------------------#
            intersect_mins = np.maximum(box_mins, anchor_mins)  # 真实框与先验框左上角最小坐标 intersect_mins.shape= (1, 9, 2)
            intersect_maxes = np.minimum(box_maxes, anchor_maxes)  # 真实框与先验框右下角最大坐标 intersect_maxes.shape= (1, 9, 2)
            intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)  # 交集最大区域的宽高 intersect_wh.shape= (1, 9, 2)
            intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]  # 真实框与先验框交集面积 intersect_area.shape= (1, 9)
            box_area = wh[..., 0] * wh[..., 1]  # 真实框面积,wh一维长度取决于配置文件2007_train.txt中标注几个物体 box_area.shape= (n, 1)
            anchor_area = anchors[..., 0] * anchors[..., 1]  # 先验框面积,anchors一维长度为9 anchor_area.shape= (1, 9)
            # box_area为(n,1),anchor_area和intersect_area为(1,9),它们之间可以进行算术运算
            iou = intersect_area / (box_area + anchor_area - intersect_area)  # iou 计算三个特征层中交集与并集的面积比率
            #输出示例一：box_area.shape = (1, 1) intersect_area.shape = (1, 9) iou.shape = (1, 9)
            #输出示例二：box_area.shape = (2, 1) intersect_area.shape = (2, 9) iou.shape = (2, 9)
            # -----------------------------------------------------------#
            #   维度是[n,]
            #   获得每一个真实框最对应的先验框。
            #   np.argmax 返回向量的最大值的索引,iou(3,9) axis=-1最后一维得到结果best_anchor {ndarray:(3,)} [5 7 8]
            # -----------------------------------------------------------#
            best_anchor = np.argmax(iou, axis=-1)  # 将best_anchor转化为向量,记录每个真实框交并比最大的序号
            for t, n in enumerate(best_anchor):  # 遍历best_anchor为向量,t为数据序号,n为anchor序号
                # -----------------------------------------------------------#
                #   t是计数,从0开始递增,n代表best_anchor中的元素(anchor索引值)
                #   找到每个真实框所属的特征层
                # -----------------------------------------------------------#
                for l in range(num_layers):  # 每个特征层遍历一次
                    if n in self.anchors_mask[l]:  # 如果索引值在anchors_mask=[[6, 7, 8], [3, 4, 5], [0, 1, 2]]
                        # -----------------------------------------------------------#
                        #   floor用于向下取整，找到真实框所属的特征层对应的x、y轴坐标
                        #   b为图片数量维度,t是真实框序号,
                        # -----------------------------------------------------------#
                        i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')  # i=x*W代表特征图行数
                        j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')  # j=y*H代表特征图列数
                        # -----------------------------------------------------------#
                        #   k指的的当前这个特征点的第k个先验框,n是anchors中的序号,就是anchors_mask集合里面的元素
                        #   0<= k <=3
                        # -----------------------------------------------------------#
                        k = self.anchors_mask[l].index(n)
                        # -----------------------------------------------------------#
                        #   c指的是当前这个真实框的种类,c是文件2007_train.txt中配置的数值,代表物体种类编号
                        # -----------------------------------------------------------#
                        c = true_boxes[b, t, 4].astype('int32')
                        # -----------------------------------------------------------#
                        #   y_true的shape为
                        #   (m,13,13,3,85)
                        #   (m,26,26,3,85)
                        #   (m,52,52,3,85)
                        #   最后的85可以拆分成4+1+80，4代表的是框的中心与宽高、
                        #   1代表的是置信度、80代表的是种类
                        #   y_true[l] l是特征层 y_true[l][b, j, i, k, 4] k是第几个先验框(每层一共3个先验框)
                        #   j是横坐标,i是纵坐标 它们是特征图grid上的坐标点
                        #   y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]写法含义是
                        #   使用b, j, i, k定位y_true[l],使用b, t定位true_boxes,将y_true[l]的0-4列数据赋值到y_true[l]的0-4列数据
                        #   t与true_boxes的第二维度匹配
                        # -----------------------------------------------------------#
                        y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]  # 矩形框坐标信息[x,y,w,h]
                        y_true[l][b, j, i, k, 4] = 1  # 矩形框中存在物体
                        y_true[l][b, j, i, k, 5 + c] = 1  # one-hot码,不是c种类的默认为0
                        #print('y_true=',np.argmax(y_true[l][b, j, i, k,6:])) 输出80类物体序号
        #y_true尺寸是(n,x,y,w,h,是否存在物体,80分类one-hot),x,y是grid中离散数据,x,y,w,h都是归一化数据
        return y_true

代码解释部分

方法get_random_data第78行
box = np.array([np.array(list(map(int, box.split(‘,’)))) for box in line[1:]])
循环box字符串，以逗号分隔生成list对象，再封装np.array数组
例程：
str = [“/VOC2007/JPEGImages/000001.jpg”, “48,240,195,371,16”, “8,12,352,498,0”]
line = np.array(str)
box = np.array([np.array(list(map(int, box.split(‘,’)))) for box in line[1:]])
print(box)
[[ 48 240 195 371 16]
[ 8 12 352 498 0]]

方法get_random_data第97行
box_data = np.zeros((max_boxes, 5))
max_boxes=100生成100*5二维数组,填充0
例程：
max_boxes = 100
box_data = np.zeros((max_boxes, 5))
print(box_data)

方法get_random_data第100,101行
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
根据转化图片宽度比例,调整预测框起始横坐标x和宽度w
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
根据转化图片高度比例,调整预测框起始纵坐标y和高度h
np.array数组切片用法，box[:, [0, 2]]代表获取box第一维度的第1，3个元素。
例程：
b = np.array([1,2,3,4,5])
print(b[…,[0,2]])
b = np.array([[1,2,3,4,5],[6,7,8,9],[10,11,12]])
print(b[…,[0,2]])
[1 3]
[list([1, 2, 3, 4, 5]) list([10, 11, 12])]

方法get_random_data第103,104行
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
取box数组第2维中第3个元素，如果此元素大于w，则赋值为w。
例程：
w = 4
b = np.array([[1,2,3,4,5],[6,7,8,9,10]])
b[:, 2][b[:, 2] > w] = w
print(b)
[[ 1 2 3 4 5]
[ 6 7 4 9 10]]

w = 4
b = np.array([[1,2,3,4,5],[6,7,8,9,10]])
b[:, :][b[:, 2] > w] = w
print(b)
[[1 2 3 4 5]
[4 4 4 4 4]]

方法get_random_data第107行
box = box[np.logical_and(box_w > 1, box_h > 1)]
返回数组box，第1，2维数据同时大于1的数组。
例程：
box = np.array([[10,10,20,30], [15,20,30,50]])
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 10, box_h > 2)]
print(box)
[[15 20 30 50]]

方法preprocess_true_boxes第211行
grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
例程
input_shape = [416, 416]
input_shape = np.array(input_shape, dtype=‘int32’) # 转换为np.array int32类型数组
num_layers = 3
grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
print(grid_shapes)
[array([13, 13], dtype=int32), array([26, 26], dtype=int32), array([52, 52], dtype=int32)]

方法preprocess_true_boxes第252行
valid_mask = boxes_wh[…, 0] > 0
wh = boxes_wh[b, valid_mask[b]]
例程
b = 1
boxes_wh = np.array([[10,10],[20,20],[0,30]])
valid_mask = boxes_wh[…, 0] > 0
print(‘valid_mask=’,valid_mask)
wh = boxes_wh[b, valid_mask[b]]
print(‘wh (b=1)=’, wh)
b = 2
wh = boxes_wh[b, valid_mask[b]]
print(‘wh (b=2)=’, wh)
valid_mask= [ True True False]
wh (b=1)= [[20 20]]
wh (b=2)= []

方法preprocess_true_boxes第282行
anchor_area = anchors[…, 0] * anchors[…, 1]
例程
anchors = [[10,13], [16,30], [33,23], [30,61], [62,45], [59,119], [116,90], [156,198], [373,326]]
anchors = np.array(anchors)
anchor_area = anchors[…, 0] * anchors[…, 1]
print(‘anchor_area=’,anchor_area)
anchor_area= [ 130 480 759 1830 2790 7021 10440 30888 121598]

方法preprocess_true_boxes第292行
best_anchor = np.argmax(iou, axis=-1)
例程
iou = np.random.randint(low=0, high=10, size=27).reshape(3, 9)
print(‘iou=’, iou)
best_anchor = np.argmax(iou, axis=-1)
print(‘best_anchor=’, best_anchor)
for t, n in enumerate(best_anchor):
print(‘t=’,t, ‘n=’, n)
iou= [[6 2 3 2 5 0 7 9 6]
[1 9 0 5 1 6 2 3 6]
[8 2 3 4 8 0 2 9 8]]
best_anchor= [7 1 7]
t= 0 n= 7
t= 1 n= 1
t= 2 n= 7

方法preprocess_true_boxes第330行
y_true[l][b, j, i, k, 5 + c] = 1
例程
m = 16
num_classes = 80
y_true = [np.zeros((m, 13, 13, 3, 5 + num_classes), dtype=‘float32’)]
y_true = np.array(y_true)
print(y_true.shape)
b = 15
j = 12
i = 12
k = 2
c = 79
y_true[0][b, j, i, k, 5 + c] = 1
print(y_true)
[[0. 0. 0. … 0. 0. 0.]
[0. 0. 0. … 0. 0. 0.]
[0. 0. 0. … 0. 0. 1.]]]]]]