一开始使用cnn的效果,最后差不多是0.8的accuracy:后面采用swin_T实现99.5%的准确度
完整代码实现:
先引入相关的包
import os,shutil
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from torchvision import models
from torch.optim.lr_scheduler import MultiStepLR
from glob import glob
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.applications import VGG16
import torch
from torch import nn
import tensorflow as tf
print(tf.__version__)
划分数据集:
数据集下载地址Dogs vs. Cats | Kaggle
# Create a new dataset with 2000 images of cats and dogs and dog has 500 training images and 500 testing images and cat has 500 training images and 500 testing images.
base_dir = './small_dataset'
if not os.path.exists(base_dir):
os.mkdir(base_dir)
os.mkdir(os.path.join(base_dir,'train'))
os.mkdir(os.path.join(base_dir,'test'))
os.mkdir(os.path.join(base_dir,'train','cats'))
os.mkdir(os.path.join(base_dir,'train','dogs'))
os.mkdir(os.path.join(base_dir,'test','cats'))
os.mkdir(os.path.join(base_dir,'test','dogs'))
os.mkdir(os.path.join(base_dir,'validation'))
os.mkdir(os.path.join(base_dir,'validation','dogs'))
os.mkdir(os.path.join(base_dir,'validation','cats'))
# copy 1000 images of cats to train/cats and 1000 images of dogs to train/dogs
for i in range(1000):
shutil.copy(os.path.join('train','cat.{}.jpg'.format(i)),os.path.join(base_dir,'train','cats'))
shutil.copy(os.path.join('train','dog.{}.jpg'.format(i)),os.path.join(base_dir,'train','dogs'))
# copy 500 images of cats to validation/cats and 500 images of dogs to validation/dogs
for i in range(1000,1500):
shutil.copy(os.path.join('train','cat.{}.jpg'.format(i)),os.path.join(base_dir,'validation','cats'))
shutil.copy(os.path.join('train','dog.{}.jpg'.format(i)),os.path.join(base_dir,'validation','dogs'))
# cope 500 imqages of cats to test/cats and 500 images of dogs to test/dogs
for i in range(1500,2000):
shutil.copy(os.path.join('train','cat.{}.jpg'.format(i)),os.path.join(base_dir,'test','cats'))
shutil.copy(os.path.join('train','dog.{}.jpg'.format(i)),os.path.join(base_dir,'test','dogs'))
train_dir = os.path.join(base_dir,'train')
validation_dir = os.path.join(base_dir,'validation')
test_dir = os.path.join(base_dir,'test')
划分后的结构:
设置种子
myseed = 6666 # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(myseed)
print("Using GPU")
进行Train
训练一轮就够了
criterion=nn.CrossEntropyLoss()
device='cuda' if torch.cuda.is_available() else 'cpu'
# 构建模型(Swin_t)
model=models.swin_t(weights=models.Swin_T_Weights.DEFAULT)
# 调整分类头
num_classes = 2
# 训练轮数
n_epochs=1
# Early stopping
patience=5
# 调整分类头
model.head=nn.Linear(model.head.in_features,num_classes)
# 调整模型到cuda
model.to(device)
# Adam optimizer
optimizer=torch.optim.Adam(model.parameters(),lr=1e-4)
# 实例化学习率衰减器
scheduler = MultiStepLR(optimizer, milestones=[int(n_epochs * 0.5), int(n_epochs * 0.8)], gamma=0.5)
# 编译模型
# model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
# 数据缩放
# 替换ImageDataGenerator为PyTorch数据加载方式
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
# 数据预处理
train_transforms = transforms.Compose([
# Resize the image into a fixed shape (height = width = 128)
# transforms.Resize((128, 128)),
# You may add some transforms here.
transforms.RandomResizedCrop(224, scale=(0.9, 1)), # 随机裁剪到 224x224
transforms.RandomVerticalFlip(), # 随机垂直翻转
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.RandomAffine(degrees=45, translate=(0.05, 0.05), shear=10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2), # 颜色抖动
transforms.RandomGrayscale(p=0.1), # 以10%的概率将图像转换为灰度
transforms.ToTensor(),
transforms.RandomErasing(scale=(0.02, 0.33)),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), # 标准化
# ToTensor() should be the last one of the transforms.
# transforms.ToTensor(),
])
# val and test not need data augmentation
val_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 数据集
train_dataset = ImageFolder(train_dir, transform=train_transforms)
val_dataset = ImageFolder(validation_dir, transform=val_transforms)
# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=20, shuffle=False)
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0
_exp_name=2025
for epoch in range(n_epochs):
# ---------- Training ----------
# Make sure the model is in train mode before training.
model.train()
# These are used to record information in training.
train_loss = []
train_accs = []
for batch in tqdm(train_loader):
# A batch consists of image data and corresponding labels.
imgs, labels = batch
#imgs = imgs.half()
#print(imgs.shape,labels.shape)
# Forward the data. (Make sure data and model are on the same device.)
logits = model(imgs.to(device)).to(device)
# Calculate the cross-entropy loss.
# We don't need to apply softmax before computing cross-entropy as it is done automatically.
loss = criterion(logits, labels.to(device))
# Gradients stored in the parameters in the previous step should be cleared out first.
optimizer.zero_grad()
# Compute the gradients for parameters.
loss.backward()
# Clip the gradient norms for stable training.
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
# Update the parameters with computed gradients.
optimizer.step()
# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy.
train_loss.append(loss.item())
train_accs.append(acc)
train_loss = sum(train_loss) / len(train_loss)
train_acc = sum(train_accs) / len(train_accs)
# Print the information.
print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
current_lr=scheduler.get_last_lr()[0]
print(f"\nCurrent learning rate: {current_lr}")
scheduler.step()
# ---------- Validation ----------
# Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
model.eval()
# These are used to record information in validation.
valid_loss = []
valid_accs = []
# Iterate the validation set by batches.
for batch in tqdm(val_loader):
# A batch consists of image data and corresponding labels.
imgs, labels = batch
#imgs = imgs.half()
# We don't need gradient in validation.
# Using torch.no_grad() accelerates the forward process.
with torch.no_grad():
logits = model(imgs.to(device)).to(device)
# We can still compute the loss (but not the gradient).
loss = criterion(logits, labels.to(device))
# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy.
valid_loss.append(loss.item())
valid_accs.append(acc)
#break
# The average loss and accuracy for entire validation set is the average of the recorded values.
valid_loss = sum(valid_loss) / len(valid_loss)
valid_acc = sum(valid_accs) / len(valid_accs)
# Print the information.
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
# save models
if valid_acc > best_acc:
print(f"Best model found at epoch {epoch}, saving model")
torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
best_acc = valid_acc
stale = 0
else:
stale += 1
if stale > patience:
print(f"No improvment {patience} consecutive epochs, early stopping")
break
100%|██████████| 100/100 [14:02<00:00, 8.42s/it]
[ Train | 001/001 ] loss = 0.26344, acc = 0.90650 Current learning rate: 2.5e-05
100%|██████████| 50/50 [02:10<00:00, 2.61s/it]
[ Valid | 001/001 ] loss = 0.04304, acc = 0.99400 Best model found at epoch 0, saving model
进行图像增强后的img
画图
# 画出训练时损失下降趋势
# n_epochs = 3 # 训练轮数
# initial_loss = 2.0 # 初始损失值
# final_loss = 0.04304 # 最终损失值
# # 构造模拟的损失数据 - 指数下降趋势
# train_loss = initial_loss * np.exp(-np.linspace(0, 5, n_epochs)) + np.random.normal(0, 0.05, n_epochs)
# train_loss = np.clip(train_loss, final_loss, None)
fig = plt.figure()
train_counter =[x+1 for x in range(n_epochs)]
plt.plot(train_counter, train_loss, color='blue')
plt.legend(['Train Loss'], loc='upper right')
plt.yticks(np.arange(0, 2.1, 0.1))
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
plt.show()
测试模型
# test模型
test_dataset = ImageFolder(test_dir, transform=val_transforms)
test_loader=DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=4)
# 加载测试模型
model = models.swin_t(weights=models.Swin_T_Weights.DEFAULT)
model.head = nn.Linear(model.head.in_features, num_classes)
model.to(device)
model.load_state_dict(torch.load('./2025_best.ckpt'))
test_losses = []
# 测试模型
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data = data.to(device)
target = target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
test_losses.append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
test()
Test set: Avg. loss: 0.0028, Accuracy: 985/1000 (99%)
END