- **🍨 本文为[🔗365天深度学习训练营](https://mp.weixin.qq.com/s/rnFa-IeY93EpjVu0yzzjkw) 中的学习记录博客**
- **🍖 原作者:[K同学啊](https://mtyjkh.blog.csdn.net/)**
目录
前言
本文采用pytorch实现对车牌的识别,本文重点在于自定义数据集的构建,学会自己定义数据集,从而实现对模型的识别和训练。
一.环境准备
本文所用环境如下:
编译环境 pytho==3.8
编译器 Jupyter Lab
torch== 2.1.0+cu118
torchvision == 0.16.0+cu118
可用如下代码进行gpu检验,如果硬件不支持,可使用cpu:
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.models as models
import torch.nn.functional as F
import torch.nn as nn
import torch,torchvision
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
二.构建数据集
(1).预处理
1.对车牌进行识别,首先要知道图片对应的车牌是什么格式的,图片对应的文件名就是对应的车牌,所以首先提取对应的所有的车牌名。也就是提取对应的文件名。
import os,PIL,random,pathlib
import matplotlib.pyplot as plt
# 支持中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
data_dir='./data_10/'
data_dir=pathlib.Path(data_dir)
data_dir
data_path=list(data_dir.glob('*'))
data_path
classNames=[str(path).split('\\')[1].split('_')[1].split('.')[0] for path in data_path]
classNames
2.展示下对应的车牌。由于imread需要读取对应文件的路径,所以先把所有图片文件的路径存储下来。
data_paths=list(data_dir.glob('*'))
data_paths_str=[str(path) for path in data_paths]
data_paths_str
import matplotlib.pyplot as plt
import os,PIL,random,pathlib
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.figure(figsize=(14,5))
plt.suptitle('数据展示',fontsize=15)
for i in range(18):
plt.subplot(3,6,i+1)
images=plt.imread(data_paths_str[i]) #imread用来读路径
# iamges=plt.imread(data_paths_str[i])
plt.imshow(images)
plt.show()
3.将对应的标签储存下来,怎么储存,利用数字离散化,以车牌长度为行,每行所有可能出现的汉字,字母,数字的长度为为列,作为对该车牌的存储。比如一个车牌为‘川W9BR26’,那就在第一行,川这个字所在的列的数组位置上设为1,表示川这个字在第一个出现过。
import numpy as np
# 标签数字化
char_enum = ["京","沪","津","渝","冀","晋","蒙","辽","吉","黑","苏","浙","皖","闽","赣","鲁",\
"豫","鄂","湘","粤","桂","琼","川","贵","云","藏","陕","甘","青","宁","新","军","使"]
number=[str(i) for i in range(0,10)]
alphabet=[chr(i) for i in range(65,91)] #A到Z的字母
char_set=char_enum+number+alphabet
char_set_len=len(char_set) #获取字母长度
label_name_len=len(classNames[0]) #获取车牌长度
# classNames[0]
def text2vec(text):
# 将字符串数字化
vector=np.zeros([label_name_len,char_set_len]) #生成一个vector数组、以label_name_len为行另一个为列生成对应的映射数组
for i,c in enumerate(text): #第一个数索引第二个是内容
# 遍历text
idx=char_set.index(c) #获取c对应的索引(字母)
vector[i][idx]=1.0
return vector
all_labels=[text2vec(i) for i in classNames]
# np.zeros([label_name_len,char_set_len])
(2).构建数据集
数据集的构建必须继承data.Dataset这个抽象基类,data.Dataset只有这个类才能与神经网络的DataLoader等组件配合使用,原data.Dataset类代码如下
from abc import ABC, abstractmethod
class Dataset(ABC):
"""An abstract class representing a Dataset.
All datasets that represent a map from keys to data samples should subclass
this class. All subclasses should overwrite :meth:`__getitem__`, supporting
fetching a data sample for a given key. Subclasses could also optionally
overwrite :meth:`__len__`, which is expected to return the size of the dataset
by many :class:`~torch.utils.data.Sampler` implementations and the default
options of :class:`~torch.utils.data.DataLoader`.
.. note::
:class:`~torch.utils.data.DataLoader` by default constructs a index
sampler that yields integral indices. To make it work with a map-style
dataset with non-integral indices/keys, a custom sampler must be provided.
"""
@abstractmethod
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
sample: data sample at given index
"""
raise NotImplementedError
def __len__(self):
raise NotImplementedError
def __add__(self, other):
return ConcatDataset([self, other])
继承的类中,getitem方式是进行数据加载和预处理,用于连接原始数据和模型的输入,在图像处理中基础的数据信息包括整张图像的数据,这张图像的类别,同时在getitem方法中可以处理数据格式,所以在构建数据集的时候要上传三个变量,包括图像的标签信息(已经储存在二维矩阵中,传入二维矩阵即可),图像的信息(这里图像的信息可以通过读取图像的路径然后转换为对应图像数据即可,所以传入的是图像的路径),转换格式(前面有多次提到,正常的图片转换格式)。
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
import torch.utils.data as data
from PIL import Image
class MyDataset(data.Dataset):
def __init__(self,all_labels,data_paths_str,transform):
self.img_labels=all_labels #标签,目录路径,转换函数
self.img_dir=data_paths_str #为什么要构建这样的数据格式
self.transform=transform
def __len__(self):
return len(self.img_labels) #返回标签本身的长度
def __getitem__(self,index):
image=Image.open(self.img_dir[index]).convert('RGB') #读取图像转为对应的RGB模式
label=self.img_labels[index] #获取图像及其对应的标签
if self.transform:
image=self.transform(image)
return image,label #返回图像和标签
train_transforms=transforms.Compose([
transforms.Resize([224,224]),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485,0.456,0.406],
std=[0.229,0.224,0.225]
)
])
total_data=MyDataset(all_labels,data_paths_str,train_transforms)
total_data
(3)划分数据集
train_size=int(0.8*len(total_data))
test_size=len(total_data)-train_size
train_dataset,test_dataset=torch.utils.data.random_split(total_data,[train_size,test_size])
train_size,test_size
train_loader=torch.utils.data.DataLoader(train_dataset,
batch_size=16,
shuffle=True)
test_loader=torch.utils.data.DataLoader(test_dataset,
batch_size=16,
shuffle=True)
print("The number of images in a training set is: ", len(train_loader)*16)
print("The number of images in a test set is: ", len(test_loader)*16)
print("The number of batches per epoch is: ", len(train_loader))
形状查看
for x,y in test_loader:
print('x的形状',x.shape)
print('y的形状',y.shape)
break
三.模型搭建
class Network_bn(nn.Module):
def __init__(self):
super(Network_bn,self).__init__()
self.conv1=nn.Conv2d(3,12,kernel_size=5,stride=1,padding=0)
self.bn1=nn.BatchNorm2d(12)
self.conv2=nn.Conv2d(12,12,kernel_size=5,stride=1,padding=0)
self.bn2=nn.BatchNorm2d(12)
self.pool=nn.MaxPool2d(2,2)
self.conv4=nn.Conv2d(12,24,5,1,0)
self.bn4=nn.BatchNorm2d(24)
self.conv5=nn.Conv2d(24,24,5,1,0)
self.bn5=nn.BatchNorm2d(24)
self.fc1=nn.Linear(24*50*50,label_name_len*char_set_len)
#转换为对应的长度,在对应转换为数组
self.reshape=Reshape([label_name_len,char_set_len])
def forward(self,x):
# (-1,3,224,224)->(-1,12,220,220)
x=F.relu(self.bn1(self.conv1(x)))
# (-1,12,220,220)->(-1,12,216,216)
x=F.relu(self.bn2(self.conv2(x)))
# (-1,12,216,216)->(-1,12,108,108)
x=self.pool(x)
#(-1,12,108,108)->(-1,24,104,104)
x=F.relu(self.bn4(self.conv4(x)))
#(-1,24,104,104)->(-1,24,100,100)
x=F.relu(self.bn5(self.conv5(x)))
# (-1,24,100,100)->(-1,24,50,50)
x=self.pool(x)
x=x.view(-1,24*50*50)
x=self.fc1(x)
x=self.reshape(x)
return x
class Reshape(nn.Module):
def __init__(self,shape):
super(Reshape,self).__init__()
self.shape=shape
def forward(self,x):
return x.view(x.size(0),*self.shape)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
model = Network_bn().to(device)
model
Using cuda device
Network_bn(
(conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
(bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(12, 12, kernel_size=(5, 5), stride=(1, 1))
(bn2): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv4): Conv2d(12, 24, kernel_size=(5, 5), stride=(1, 1))
(bn4): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv5): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1))
(bn5): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(fc1): Linear(in_features=60000, out_features=483, bias=True)
(reshape): Reshape()
)
参数查看
import torchsummary
torchsummary.summary(model,(3,224,224))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 12, 220, 220] 912
BatchNorm2d-2 [-1, 12, 220, 220] 24
Conv2d-3 [-1, 12, 216, 216] 3,612
BatchNorm2d-4 [-1, 12, 216, 216] 24
MaxPool2d-5 [-1, 12, 108, 108] 0
Conv2d-6 [-1, 24, 104, 104] 7,224
BatchNorm2d-7 [-1, 24, 104, 104] 48
Conv2d-8 [-1, 24, 100, 100] 14,424
BatchNorm2d-9 [-1, 24, 100, 100] 48
MaxPool2d-10 [-1, 24, 50, 50] 0
Linear-11 [-1, 483] 28,980,483
Reshape-12 [-1, 7, 69] 0
================================================================
Total params: 29,006,799
Trainable params: 29,006,799
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 26.56
Params size (MB): 110.65
Estimated Total Size (MB): 137.79
----------------------------------------------------------------
损失函
四.模型训练
(1)训练代码
这里测试准确率采用的是计算每个字符预测的准确率,对整个车牌求平均值。y.argmax(dim=2)表示沿第二个维度找最大值,也就是说从每一列中找概率最大的,即为该行的真实值所在的列,然后将预测值与真实值进行比较,累加结果,最后得出预测准确率。
from torch.autograd import Variable
def test(model, test_loader, loss_model):
size = len(test_loader.dataset) # 测试集总样本数
num_batches = len(test_loader)
model.eval()
test_loss = 0.0
total_prob_accuracy = 0.0 # 累计概率加权准确率
total_chars = 0 # 累计总字符数
with torch.no_grad():
for x, y in test_loader:
x, y = x.to(device), y.to(device)
pred = model(x) # 形状:[-1, 7, 69]
# 1. 计算损失
test_loss += loss_model(pred, y).item()
# 2. 获取真实字符的索引
y_indices = y.argmax(dim=2) # 形状:[-1, 7]
# 3. 计算每个真实字符的预测概率
# 创建索引张量,用于gather操作
batch_size, seq_len = y_indices.shape #batch_size=-1,seq_len=7
indices = y_indices.unsqueeze(2) # 形状:[batch_size, 7, 1]
# 提取每个真实字符的预测概率
# 提取indices中所对应的位置的概率
true_char_probs = torch.gather(pred, dim=2, index=indices).squeeze(2) # 形状:[-1, 7]
# 4. 累加概率准确率和总字符数
total_prob_accuracy += true_char_probs.sum().item()
total_chars += batch_size * seq_len
# 计算平均概率准确率(所有字符的平均预测概率)
prob_accuracy = total_prob_accuracy / total_chars
test_loss /= num_batches
print(f'test_loss: {test_loss:.3f}, prob_accuracy: {prob_accuracy:.3f}')
return prob_accuracy, test_loss
def train(model,train_loader,loss_model,optimizer):
model=model.to(device)
model.train()
for i, (images, labels) in enumerate(train_loader, 0): #0是标起始位置的值。
images = Variable(images.to(device))
labels = Variable(labels.to(device))
optimizer.zero_grad()
outputs = model(images)
loss = loss_model(outputs, labels)
loss.backward()
optimizer.step()
if i % 1000 == 0:
print('[%5d] loss: %.3f' % (i, loss))
(2).运行代码
test_acc_list=[]
test_loss_list=[]
epochs=30
for t in range(epochs):
print(f'Epoch {t+1}\n-----------------')
train(model,train_loader,loss_model,optimizer)
test_acc,test_loss=test(model,test_loader,loss_model)
test_acc_list.append(test_acc)
test_loss_list.append(test_loss)
print('done')
(3).可视化
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
current_time = datetime.now() # 获取当前时间
x = [i for i in range(1,31)]
plt.plot(x, test_loss_list, label="Loss", alpha=0.8)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title(current_time) # 打卡请带上时间戳,否则代码截图无效
plt.legend()
plt.show()