目录
步骤五、数据读取与处理
数据集对于训练非常重要,好的数据集可以有效提高训练精度和效率。在加载数据集前,我们通常会对数据集进行一些处理。
定义数据集及数据操作
我们定义一个函数create_dataset来创建数据集。在这个函数中,我们定义好需要进行的数据增强和处理操作:
- 读取数据集。
- 定义进行数据增强和处理所需要的一些参数。
- 根据参数,生成对应的数据增强操作。
- 使用map映射函数,将数据操作应用到数据集。
- 对生成的数据集进行处理。
- 对处理好的数据进行样例展示。
# 数据处理
def read_data(path,config,usage="train"):
# 从目录中读取图像的源数据集。
dataset = ds.ImageFolderDataset(path,
class_indexing={'daisy':0,'dandelion':1,'roses':2,'sunflowers':3,'tulips':4})
# define map operations
# 图像解码算子
decode_op = vision.Decode()
# 图像正则化算子
normalize_op = vision.Normalize(mean=[cfg._R_MEAN, cfg._G_MEAN, cfg._B_MEAN], std=[cfg._R_STD, cfg._G_STD, cfg._B_STD])
# 图像调整大小算子
resize_op = vision.Resize(cfg._RESIZE_SIDE_MIN)
# 图像裁剪算子
center_crop_op = vision.CenterCrop((cfg.HEIGHT, cfg.WIDTH))
# 图像随机水平翻转算子
horizontal_flip_op = vision.RandomHorizontalFlip()
# 图像通道数转换算子
channelswap_op = vision.HWC2CHW()
# 图像随机裁剪解码编码调整大小算子
random_crop_decode_resize_op = vision.RandomCropDecodeResize((cfg.HEIGHT, cfg.WIDTH), (0.5, 1.0), (1.0, 1.0), max_attempts=100)
# 只对训练集做的预处理操作
if usage == 'train':
dataset = dataset.map(input_columns="image", operations=random_crop_decode_resize_op)
dataset = dataset.map(input_columns="image", operations=horizontal_flip_op)
# 只对测试集做的预处理操作
else:
dataset = dataset.map(input_columns="image", operations=decode_op)
dataset = dataset.map(input_columns="image", operations=resize_op)
dataset = dataset.map(input_columns="image", operations=center_crop_op)
# 对全部数据集做的预处理操作
dataset = dataset.map(input_columns="image", operations=normalize_op)
dataset = dataset.map(input_columns="image", operations=channelswap_op)
# 对训练集做的批次处理
if usage == 'train':
dataset = dataset.shuffle(buffer_size=10000) # 10000 as in imageNet train script
dataset = dataset.batch(cfg.batch_size, drop_remainder=True)
# 对测试集做的批次处理
else:
dataset = dataset.batch(1, drop_remainder=True)
# 数据增强
dataset = dataset.repeat(1)
dataset.map_model = 4
return dataset
# 查看训练集和测试集的数量
de_train = read_data(cfg.data_path,cfg,usage="train")
de_test = read_data(cfg.test_path,cfg,usage="test")
print('训练数据集数量:',de_train.get_dataset_size()*cfg.batch_size) # get_dataset_size()获取批处理的大小。
print('测试数据集数量:',de_test.get_dataset_size())
# 查看训练集的样图
data_next = de_train.create_dict_iterator(output_numpy=True).__next__()
print('通道数/图像长/宽:', data_next['image'][0,...].shape)
print('一张图像的标签样式:', data_next['label'][0]) # 一共5类,用0-4的数字表达类别。
plt.figure()
plt.imshow(data_next['image'][0,0,...])
plt.colorbar()
plt.grid(False)
plt.show()
步骤六、模型构建训练
-
定义模型
残差块(Residual Block)
将前面若干层的输出跳过中间层作为后几层的输入部分,也就是说后面特征层会有前几层的部分线性贡献。此种设计是为了克服随着网络层数加深而产生的学习效率变低和准确率无法有效提升的问题。
瓶颈(BottleNeck)模块:
瓶颈(BottleNeck)模块,思路和Inception模块一样,通过1x1 conv来巧妙地缩减或扩张feature map维度从而使3x3 conv的filters数目不受外界即上一层输入的影响,自然它的输出也不会影响到下一层module。
ResNet50模型构建
ResNet50有两个基本的块,分别名为Conv Block和Identity Block,其中Conv Block输入和输出的维度是不一样的,所以不能连续串联,它的作用是改变网络的维度;Identity Block输入维度和输出维度相同,可以串联,用于加深网络。
"""ResNet."""
# 定义权重初始化函数
def _weight_variable(shape, factor=0.01):
init_value = np.random.randn(*shape).astype(np.float32) * factor
return Tensor(init_value)
# 定义3X3卷积函数
def _conv3x3(in_channel, out_channel, stride=1):
weight_shape = (out_channel, in_channel, 3, 3)
weight = _weight_variable(weight_shape)
return nn.Conv2d(in_channel, out_channel,
kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
# 定义1X1卷积层函数
def _conv1x1(in_channel, out_channel, stride=1):
weight_shape = (out_channel, in_channel, 1, 1)
weight = _weight_variable(weight_shape)
return nn.Conv2d(in_channel, out_channel,
kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
# 定义7X7卷积层函数
def _conv7x7(in_channel, out_channel, stride=1):
weight_shape = (out_channel, in_channel, 7, 7)
weight = _weight_variable(weight_shape)
return nn.Conv2d(in_channel, out_channel,
kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
# 定义Batch Norm层函数
def _bn(channel):
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
# 定义最后一层的Batch Norm函数
def _bn_last(channel):
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
# 定义全连接层函数
def _fc(in_channel, out_channel):
weight_shape = (out_channel, in_channel)
weight = _weight_variable(weight_shape)
return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
# 构建残差模块
class ResidualBlock(nn.Cell):
"""
ResNet V1 residual block definition.
Args:
in_channel (int): Input channel.
out_channel (int): Output channel.
stride (int): Stride size for the first convolutional layer. Default: 1.
Returns:
Tensor, output tensor.
Examples:
>>> ResidualBlock(3, 256, stride=2)
"""
expansion = 4 # conv2_x--conv5_x中,前两层的卷积核的个数是第三层(也就是输出通道)的4分之一。
def __init__(self, in_channel, out_channel, stride=1):
super(ResidualBlock, self).__init__()
# 前两层的卷积核个数等于输出通道的四分之一
channel = out_channel // self.expansion
# 第一层卷积
self.conv1 = _conv1x1(in_channel, channel, stride=1)
self.bn1 = _bn(channel)
# 第二层卷积
self.conv2 = _conv3x3(channel, channel, stride=stride)
self.bn2 = _bn(channel)
# 第三层卷积,其中卷积核个数等于输出通道
self.conv3 = _conv1x1(channel, out_channel, stride=1)
self.bn3 = _bn_last(out_channel)
# Relu激活层
self.relu = nn.ReLU()
self.down_sample = False
# 当步长不为1、或输出通道不等于输入通道时,进行图像下采样,用来调整通道数
if stride != 1 or in_channel != out_channel:
self.down_sample = True
self.down_sample_layer = None
# 用1X1卷积调整通道数
if self.down_sample:
self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), # 1X1卷积
_bn(out_channel)]) # Batch Norm
# 加法算子
self.add = ops.Add()
# 构建残差块
def construct(self, x):
# 输入
identity = x
# 第一层卷积 1X1
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
# 第二层卷积 3X3
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
# 第三层卷积 1X1
out = self.conv3(out)
out = self.bn3(out)
# 改变网络的维度
if self.down_sample:
identity = self.down_sample_layer(identity)
# 加上残差
out = self.add(out, identity)
# Relu激活
out = self.relu(out)
return out
# 构建残差网络
class ResNet(nn.Cell):
"""
ResNet architecture.
Args:
block (Cell): Block for network.
layer_nums (list): Numbers of block in different layers.
in_channels (list): Input channel in each layer.
out_channels (list): Output channel in each layer.
strides (list): Stride size in each layer.
num_classes (int): The number of classes that the training images are belonging to.
Returns:
Tensor, output tensor.
Examples:
>>> ResNet(ResidualBlock,
>>> [3, 4, 6, 3],
>>> [64, 256, 512, 1024],
>>> [256, 512, 1024, 2048],
>>> [1, 2, 2, 2],
>>> 10)
"""
# 输入参数为:残差块,残差块重复数,输入通道,输出通道,步长,图像类别数
def __init__(self, block, layer_nums, in_channels, out_channels, strides, num_classes):
super(ResNet, self).__init__()
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
# 第一层卷积,卷积核7X7,输入通道3,输出通道64,步长2
self.conv1 = _conv7x7(3, 64, stride=2)
self.bn1 = _bn(64)
self.relu = ops.ReLU()
# 3X3池化层,步长2
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
# conv2_x残差块
self.layer1 = self._make_layer(block,
layer_nums[0],
in_channel=in_channels[0],
out_channel=out_channels[0],
stride=strides[0])
# conv3_x残差块
self.layer2 = self._make_layer(block,
layer_nums[1],
in_channel=in_channels[1],
out_channel=out_channels[1],
stride=strides[1])
# conv4_x残差块
self.layer3 = self._make_layer(block,
layer_nums[2],
in_channel=in_channels[2],
out_channel=out_channels[2],
stride=strides[2])
# conv5_x残差块
self.layer4 = self._make_layer(block,
layer_nums[3],
in_channel=in_channels[3],
out_channel=out_channels[3],
stride=strides[3])
# 均值算子
self.mean = ops.ReduceMean(keep_dims=True)
# Flatten层
self.flatten = nn.Flatten()
# 输出层
self.end_point = _fc(out_channels[3], num_classes)
# 输入参数为:残差块,残差块重复数,输入通道,输出通道,步长
def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
"""
Make stage network of ResNet.
Args:
block (Cell): Resnet block.
layer_num (int): Layer number.
in_channel (int): Input channel.
out_channel (int): Output channel.
stride (int): Stride size for the first convolutional layer.
Returns:
SequentialCell, the output layer.
Examples:
>>> _make_layer(ResidualBlock, 3, 128, 256, 2)
"""
# 搭建convn_x的残差块
layers = []
resnet_block = block(in_channel, out_channel, stride=stride)
layers.append(resnet_block)
for _ in range(1, layer_num):
resnet_block = block(out_channel, out_channel, stride=1)
layers.append(resnet_block)
return nn.SequentialCell(layers)
# 构建 ResNet网络
def construct(self, x):
x = self.conv1(x) # 第一层卷积7X7,步长为2
x = self.bn1(x) # 第一层的Batch Norm
x = self.relu(x) # Rule激活层
c1 = self.maxpool(x) # 最大池化3X3,步长为2
c2 = self.layer1(c1) # conv2_x残差块
c3 = self.layer2(c2) # conv3_x残差块
c4 = self.layer3(c3) # conv4_x残差块
c5 = self.layer4(c4) # conv5_x残差块
out = self.mean(c5, (2, 3)) # 平均池化层
out = self.flatten(out) # Flatten层
out = self.end_point(out) # 输出层
return out
# 构建ResNet50 网络
def resnet50(class_num=5):
"""
Get ResNet50 neural network.
Args:
class_num (int): Class number.
Returns:
Cell, cell instance of ResNet50 neural network.
Examples:
>>> net = resnet50(10)
"""
return ResNet(ResidualBlock, # 残差块
[3, 4, 6, 3], # 残差块数量
[64, 256, 512, 1024], # 输入通道
[256, 512, 1024, 2048], # 输出通道
[1, 2, 2, 2], # 步长
class_num) # 输出类别数
- 开始训练
完成数据预处理、网络定义、损失函数和优化器定义之后,开始模型训练。模型训练包含2层迭代,数据集的多轮迭代epoch和每一轮迭代按分组从数据集中抽取数据,输入网络计算得到损失函数,然后通过优化器计算和更新训练参数的梯度。
1.下载预训练模型
新建目录model_resnet,通过https://download.mindspore.cn/models/r1.7/resnet50_ascend_v170_imagenet2012_official_cv_top1acc76.97_top5acc93.44.ckpt下载已经在ImageNet数据集上已经训练好的预训练模型文件,并存放在指定目录model_resnet下。
2.加载预训练模型
通过load_checkpoint()接口读取预训练模型文件获取字典格式的参数文件。
3.修改预训练模型参数
修改预训练模型参数的最后一层连接参数(预训练模型是在ImageNet数据集上训练的1001分类任务,而我们当前实验任务是实现flowers数据集的5分类任务)。因此我们需要修改最后一层全连接层的参数。
# 构建ResNet50网络,输出类别数为5,对应5种花的类别
net=resnet50(class_num=cfg.num_class)
#读取预训练模型参数
param_dict = load_checkpoint("model_resnet/resnet50_ascend_v170_imagenet2012_official_cv_top1acc76.97_top5acc93.44.ckpt")
#展示读取的模型参数
print(param_dict)
#通过mindspore.Parameter()修改end_point.weight和end_point.bias对应的shape
param_dict["end_point.weight"] = mindspore.Parameter(Tensor(param_dict["end_point.weight"][:5, :], mindspore.float32), name="variable")
param_dict["end_point.bias"]= mindspore.Parameter(Tensor(param_dict["end_point.bias"][:5,], mindspore.float32), name="variable")
# 设置Softmax交叉熵损失函数
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# 设置学习率
train_step_size = de_train.get_dataset_size()
lr = nn.cosine_decay_lr(min_lr=0.0001, max_lr=0.001, total_step=train_step_size * cfg.epoch_size,step_per_epoch=train_step_size, decay_epoch=cfg.epoch_size)
# 设置动量优化器
opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=cfg.loss_scale_num)
# 损失值平滑,解决训练过程中梯度过小的问题
loss_scale = FixedLossScaleManager(cfg.loss_scale_num, False)
# 模型编译,输入网络结构,损失函数,优化器,损失值平滑,以及模型评估标准
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
# 损失值监控
loss_cb = LossMonitor(per_print_times=train_step_size)
# 模型保存参数,设置每隔多少步保存一次模型,最多保存几个模型
ckpt_config = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=1)
# 模型保存,设置模型保存的名称,路径,以及保存参数
ckpoint_cb = ModelCheckpoint(prefix=cfg.prefix, directory=cfg.directory, config=ckpt_config)
print("============== Starting Training ==============")
# 训练模型,设置训练次数,训练集,回调函数,是否采用数据下沉模式(只可应用于Ascend 和GPU,可加快训练速度)
model.train(cfg.epoch_size, de_train, callbacks=[loss_cb,ckpoint_cb], dataset_sink_mode=True)
# 训练时长大约15-20分钟
# 使用测试集进行模型评估,输出测试集的准确率
metric = model.eval(de_test)
print(metric)
输出
============== Starting Training ==============
epoch: 1 step: 113, loss is 0.23505911231040955
epoch: 2 step: 113, loss is 0.11479882150888443
epoch: 3 step: 113, loss is 0.13273288309574127
epoch: 4 step: 113, loss is 0.42304447293281555
epoch: 5 step: 113, loss is 0.14625898003578186
{'acc': 0.8087912087912088}
步骤七、模型预测
通过训练好的权重文件,调用model.predict()对测试数据进行测试,输出预测结果和真实结果。
# 模型预测,从测试集中取10个样本进行测试,输出预测结果和真实结果
class_names = {0:'daisy',1:'dandelion',2:'roses',3:'sunflowers',4:'tulips'}
for i in range(10):
test_ = de_test.create_dict_iterator().__next__()
test = Tensor(test_['image'], mindspore.float32)
# 模型预测
predictions = model.predict(test)
predictions = predictions.asnumpy()
true_label = test_['label'].asnumpy()
# 显示预测结果
p_np = predictions[0, :]
pre_label = np.argmax(p_np)
print('第' + str(i) + '个sample预测结果:', class_names[pre_label], ' 真实结果:', class_names[true_label[0]])
小结:
本次实验使用MindSpore框架成功实现了基于ResNet50的花卉图像分类,模型经过训练后在仅有5个epochs下可达到80%的准确率,验证了深度学习在图像识别任务中的有效性,为后续优化和应用奠定了基础。