Swin Transformer实现catvsdog猫狗分类99.5%准确度

一开始使用cnn的效果,最后差不多是0.8的accuracy:后面采用swin_T实现99.5%的准确度

完整代码实现:

先引入相关的包

import os,shutil
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from torchvision import models
from torch.optim.lr_scheduler import MultiStepLR
from glob import glob
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.applications import VGG16
import torch
from torch import nn
import tensorflow as tf
print(tf.__version__)

划分数据集:

数据集下载地址Dogs vs. Cats | Kaggle

# Create a new dataset with 2000 images of cats and dogs and dog has 500 training images and 500 testing images and cat has 500 training images and 500 testing images.
base_dir = './small_dataset'
if not os.path.exists(base_dir):
    os.mkdir(base_dir)
    os.mkdir(os.path.join(base_dir,'train'))
    os.mkdir(os.path.join(base_dir,'test'))
    os.mkdir(os.path.join(base_dir,'train','cats'))
    os.mkdir(os.path.join(base_dir,'train','dogs'))
    os.mkdir(os.path.join(base_dir,'test','cats'))
    os.mkdir(os.path.join(base_dir,'test','dogs'))
    os.mkdir(os.path.join(base_dir,'validation'))
    os.mkdir(os.path.join(base_dir,'validation','dogs'))
    os.mkdir(os.path.join(base_dir,'validation','cats'))
    # copy 1000 images of cats to train/cats and 1000 images of dogs to train/dogs
    for i in range(1000):
        shutil.copy(os.path.join('train','cat.{}.jpg'.format(i)),os.path.join(base_dir,'train','cats'))
        shutil.copy(os.path.join('train','dog.{}.jpg'.format(i)),os.path.join(base_dir,'train','dogs'))
    # copy 500 images of cats to validation/cats and 500 images of dogs to validation/dogs
    for i in range(1000,1500):
        shutil.copy(os.path.join('train','cat.{}.jpg'.format(i)),os.path.join(base_dir,'validation','cats'))
        shutil.copy(os.path.join('train','dog.{}.jpg'.format(i)),os.path.join(base_dir,'validation','dogs'))

    # cope 500 imqages of cats to test/cats and 500 images of dogs to test/dogs
    for i in range(1500,2000):
        shutil.copy(os.path.join('train','cat.{}.jpg'.format(i)),os.path.join(base_dir,'test','cats'))
        shutil.copy(os.path.join('train','dog.{}.jpg'.format(i)),os.path.join(base_dir,'test','dogs'))
train_dir = os.path.join(base_dir,'train')
validation_dir = os.path.join(base_dir,'validation')
test_dir = os.path.join(base_dir,'test')

划分后的结构:

设置种子

myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)
    print("Using GPU")

进行Train

训练一轮就够了


criterion=nn.CrossEntropyLoss()
device='cuda' if torch.cuda.is_available() else 'cpu'
# 构建模型(Swin_t)
model=models.swin_t(weights=models.Swin_T_Weights.DEFAULT)
# 调整分类头
num_classes = 2
# 训练轮数
n_epochs=1 
# Early stopping
patience=5
# 调整分类头
model.head=nn.Linear(model.head.in_features,num_classes)
# 调整模型到cuda
model.to(device)
# Adam optimizer
optimizer=torch.optim.Adam(model.parameters(),lr=1e-4)
# 实例化学习率衰减器
scheduler = MultiStepLR(optimizer, milestones=[int(n_epochs * 0.5), int(n_epochs * 0.8)], gamma=0.5)

# 编译模型
# model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
# 数据缩放
# 替换ImageDataGenerator为PyTorch数据加载方式
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# 数据预处理
train_transforms = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    # transforms.Resize((128, 128)),

    # You may add some transforms here.
    transforms.RandomResizedCrop(224, scale=(0.9, 1)),  # 随机裁剪到 224x224
    transforms.RandomVerticalFlip(),    # 随机垂直翻转
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomAffine(degrees=45, translate=(0.05, 0.05), shear=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # 颜色抖动
    transforms.RandomGrayscale(p=0.1),  # 以10%的概率将图像转换为灰度

    transforms.ToTensor(),
    transforms.RandomErasing(scale=(0.02, 0.33)),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # 标准化

    # ToTensor() should be the last one of the transforms.
    # transforms.ToTensor(),
])
# val and test not need data augmentation
val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 数据集
train_dataset = ImageFolder(train_dir, transform=train_transforms)
val_dataset = ImageFolder(validation_dir, transform=val_transforms)

# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=20, shuffle=False)

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0
_exp_name=2025

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device)).to(device)

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    current_lr=scheduler.get_last_lr()[0]
    print(f"\nCurrent learning rate: {current_lr}")
    scheduler.step()

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(val_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device)).to(device)

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break
            
            

100%|██████████| 100/100 [14:02<00:00, 8.42s/it]

[ Train | 001/001 ] loss = 0.26344, acc = 0.90650 Current learning rate: 2.5e-05

100%|██████████| 50/50 [02:10<00:00, 2.61s/it]

[ Valid | 001/001 ] loss = 0.04304, acc = 0.99400 Best model found at epoch 0, saving model

进行图像增强后的img

画图

# 画出训练时损失下降趋势
# n_epochs = 3  # 训练轮数
# initial_loss = 2.0  # 初始损失值
# final_loss = 0.04304  # 最终损失值

# # 构造模拟的损失数据 - 指数下降趋势
# train_loss = initial_loss * np.exp(-np.linspace(0, 5, n_epochs)) + np.random.normal(0, 0.05, n_epochs)
# train_loss = np.clip(train_loss, final_loss, None) 
fig = plt.figure()
train_counter =[x+1 for x in range(n_epochs)]
plt.plot(train_counter, train_loss, color='blue')
plt.legend(['Train Loss'], loc='upper right')
plt.yticks(np.arange(0, 2.1, 0.1))
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
plt.show()

测试模型

# test模型
test_dataset = ImageFolder(test_dir, transform=val_transforms)
test_loader=DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=4)
# 加载测试模型
model = models.swin_t(weights=models.Swin_T_Weights.DEFAULT)
model.head = nn.Linear(model.head.in_features, num_classes)
model.to(device)
model.load_state_dict(torch.load('./2025_best.ckpt'))

test_losses = []
# 测试模型
def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data = data.to(device)
      target = target.to(device)
      output = model(data)
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))
test()

Test set: Avg. loss: 0.0028, Accuracy: 985/1000 (99%)
END

以下是使用 Swin Transformer 实现猫狗图像分类的代码示例: ``` import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms from swin_transformer import SwinTransformer # 定义数据预处理 transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((.5, .5, .5), (.5, .5, .5))]) # 加载数据集 trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2) # 定义模型 model = SwinTransformer(img_size=224, patch_size=4, in_chans=3, num_classes=2) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=.001, momentum=.9) # 训练模型 for epoch in range(10): # 训练 10 轮 running_loss = . for i, data in enumerate(trainloader, ): inputs, labels = data optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 200 == 199: # 每 200 个 batch 输出一次损失值 print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) running_loss = . print('Finished Training') # 测试模型 correct = total = with torch.no_grad(): for data in testloader: images, labels = data outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size() correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total)) ``` 注意:这只是一个示例代码,实际使用时需要根据具体情况进行修改。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值