PyTorch 从基础到高级的深度学习任务实例
以下是一些涵盖 PyTorch 从基础到高级的深度学习任务实例,分为不同类别以便系统学习:
基础操作与张量处理
- 创建张量:使用
torch.tensor()
或随机初始化张量(如torch.rand()
)。 - 张量运算:实现矩阵乘法(
torch.mm
)或逐元素操作(如torch.add
)。 - 自动微分:定义带有
requires_grad=True
的张量,调用.backward()
计算梯度。
x = torch.tensor([1.0], requires_grad=True)
y = x ** 2
y.backward()
print(x.grad) # 输出梯度值
以上示例可通过 PyTorch 官方文档、GitHub 开源项目(如 pytorch/examples
)或深度学习课程(如 Fast.ai)找到完整代码。建议从简单任务开始,逐步深入复杂架构。
使用 PyTorch 的 nn.Linear
进行线性回归
线性回归是机器学习中最基础的模型之一,PyTorch 的 nn.Linear
模块可以方便地实现这一功能。以下是 5 个不同的例子,展示如何使用 nn.Linear
拟合简单数据集。
例子 1:拟合线性数据
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
# 生成线性数据
X = torch.linspace(0, 10, 100).reshape(-1, 1)
y = 2 * X + 3 + torch.randn(X.shape) # y = 2x + 3 + noise
# 定义模型
model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练
for epoch in range(1000):
optimizer.zero_grad()
outputs = model(X)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
# 可视化
plt.scatter(X, y, label='Data')
plt.plot(X, model(X).detach(), 'r-', label='Fitted Line')
plt.legend()
plt.show()
例子 2:拟合二次数据
# 生成二次数据
X = torch.linspace(-2, 2, 100).reshape(-1, 1)
y = 3 * X**2 + 2 * X + 1 + torch.randn(X.shape) # y = 3x² + 2x + 1 + noise
# 使用特征工程:将 X² 作为输入
X_poly = torch.cat([X, X**2], dim=1)
# 定义模型(输入维度为 2)
model = nn.Linear(2, 1)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)
# 训练
for epoch in range(1000):
optimizer.zero_grad()
outputs = model(X_poly)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
# 可视化
plt.scatter(X, y, label='Data')
plt.plot(X, model(X_poly).detach(), 'r-', label='Fitted Curve')
plt.legend()
plt.show()
例子 3:多变量线性回归
# 生成多变量数据
X1 = torch.randn(100, 1)
X2 = torch.randn(100, 1)
X = torch.cat([X1, X2], dim=1) # 输入维度为 2
y = 3 * X1 + 2 * X2 + 1 + torch.randn(X1.shape) # y = 3x1 + 2x2 + 1 + noise
# 定义模型(输入维度为 2)
model = nn.Linear(2, 1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练
for epoch in range(1000):
optimizer.zero_grad()
outputs = model(X)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
# 打印权重
print(f"Weights: {model.weight.data}, Bias: {model.bias.data}")
例子 4:使用 Mini-Batch 训练
from torch.utils.data import DataLoader, TensorDataset
# 生成数据
X = torch.linspace(0, 10, 1000).reshape(-1, 1)
y = 5 * X + 2 + torch.randn(X.shape) # y = 5x + 2 + noise
# 创建 DataLoader
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# 定义模型
model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练(mini-batch)
for epoch in range(100):
for batch_X, batch_y in dataloader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
# 打印结果
print(f"Final weights: {model.weight.data}, bias: {model.bias.data}")
例子 5:使用 Learning Rate Scheduler
from torch.optim.lr_scheduler import StepLR
# 生成数据
X = torch.linspace(0, 5, 100).reshape(-1, 1)
y = 4 * X + 1 + torch.randn(X.shape) # y = 4x + 1 + noise
# 定义模型
model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
scheduler = StepLR(optimizer, step_size=50, gamma=0.1) # 每 50 步学习率乘以 0.1
# 训练
for epoch in range(200):
optimizer.zero_grad()
outputs = model(X)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
scheduler.step()
# 打印学习后的参数
print(f"Learned weight: {model.weight.data}, bias: {model.bias.data}")
每个例子展示了不同的应用场景,包括单变量、多项式、多变量回归,以及使用 Mini-Batch 和 Learning Rate Scheduler 优化训练过程。
逻辑回归与BCELoss简介
逻辑回归常用于二分类任务,通过Sigmoid函数将输出映射到[0,1]区间,表示概率。BCELoss(Binary Cross Entropy Loss)计算预测概率与真实标签的交叉熵,适用于二分类场景。
示例1:基础二分类
import torch
import torch.nn as nn
# 数据准备
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[0.0], [0.0], [1.0], [1.0]])
# 模型定义
model = nn.Linear(1, 1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
# 训练循环
for epoch in range(100):
outputs = torch.sigmoid(model(X))
loss = criterion(outputs, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
示例2:多维特征分类
# 多维特征
X = torch.randn(100, 5) # 100样本,5特征
y = torch.randint(0, 2, (100, 1)).float()
model = nn.Linear(5, 1)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters())
for epoch in range(1000):
outputs = torch.sigmoid(model(X))
loss = criterion(outputs, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
示例3:添加偏置项
# 显式添加偏置
X = torch.cat([torch.ones(100, 1), torch.randn(100, 3)], dim=1)
y = torch.randint(0, 2, (100, 1)).float()
model = nn.Linear(4, 1) # 包含偏置
criterion = nn.BCELoss()
示例4:权重初始化
# 自定义权重初始化
model = nn.Linear(2, 1)
nn.init.xavier_uniform_(model.weight)
nn.init.zeros_(model.bias)
示例5:学习率调度
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
for epoch in range(100):
outputs = torch.sigmoid(model(X))
loss = criterion(outputs, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
scheduler.step()
示例6:添加正则化
# L2正则化
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)
示例7:自定义数据集
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
dataset = CustomDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
示例8:早停机制
best_loss = float('inf')
patience = 5
counter = 0
for epoch in range(1000):
outputs = torch.sigmoid(model(X))
loss = criterion(outputs, y)
if loss < best_loss:
best_loss = loss
counter = 0
else:
counter += 1
if counter >= patience:
break
示例9:模型评估
with torch.no_grad():
outputs = torch.sigmoid(model(X))
predicted = (outputs > 0.5).float()
accuracy = (predicted == y).float().mean()
示例10:GPU加速
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
X, y = X.to(device), y.to(device)
关键要点
- BCELoss要求预测值在(0,1)区间,需配合Sigmoid使用
- 输入数据和标签需为浮点类型(float32)
- 二分类标签通常为0/1,但BCELoss也支持soft标签(如0.2, 0.8)
- 批量训练时需注意数据标准化以提升收敛速度
以上示例覆盖了逻辑回归二分类任务的主要应用场景,可根据实际需求调整网络结构和超参数。
LeNet-5 复现代码示例
以下是一个使用 Python 和 TensorFlow/Keras 复现 LeNet-5 的完整代码示例。LeNet-5 是一个经典的卷积神经网络,最初由 Yann LeCun 等人提出,用于手写数字识别。
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
# 加载 MNIST 数据集
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 数据预处理
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255
# 构建 LeNet-5 模型
model = models.Sequential([
layers.Conv2D(6, (5, 5), activation='tanh', input_shape=(28, 28, 1), padding='same'),
layers.AveragePooling2D((2, 2), strides=2),
layers.Conv2D(16, (5, 5), activation='tanh'),
layers.AveragePooling2D((2, 2), strides=2),
layers.Conv2D(120, (5, 5), activation='tanh'),
layers.Flatten(),
layers.Dense(84, activation='tanh'),
layers.Dense(10, activation='softmax')
])
# 编译模型
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# 训练模型
model.fit(train_images, train_labels, epochs=10,
validation_data=(test_images, test_labels))
# 评估模型
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_acc}')
关键点说明
LeNet-5 结构包含以下层:
- 输入层:28x28 单通道图像(MNIST 数据集)
- C1 层:6 个 5x5 卷积核,tanh 激活
- S2 层:2x2 平均池化,步长 2
- C3 层:16 个 5x5 卷积核,tanh 激活
- S4 层:2x2 平均池化,步长 2
- C5 层:120 个 5x5 卷积核,tanh 激活
- 全连接层:84 个神经元,tanh 激活
- 输出层:10 个神经元(对应 0-9 数字),softmax 激活
变体与改进
以下是一些常见的 LeNet-5 变体实现方式:
# 使用 ReLU 激活函数替代 tanh
model = models.Sequential([
layers.Conv2D(6, (5, 5), activation='relu', input_shape=(28, 28, 1), padding='same'),
layers.MaxPooling2D((2, 2), strides=2),
layers.Conv2D(16, (5, 5), activation='relu'),
layers.MaxPooling2D((2, 2), strides=2),
layers.Conv2D(120, (5, 5), activation='relu'),
layers.Flatten(),
layers.Dense(84, activation='relu'),
layers.Dense(10, activation='softmax')
])
# 添加 BatchNormalization
model = models.Sequential([
layers.Conv2D(6, (5, 5), activation='relu', input_shape=(28, 28, 1), padding='same'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2), strides=2),
layers.Conv2D(16, (5, 5), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2), strides=2),
layers.Conv2D(120, (5, 5), activation='relu'),
layers.BatchNormalization(),
layers.Flatten(),
layers.Dense(84, activation='relu'),
layers.BatchNormalization(),
layers.Dense(10, activation='softmax')
])
不同框架实现
PyTorch 实现示例:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5, padding=2)
self.pool1 = nn.AvgPool2d(2, stride=2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.pool2 = nn.AvgPool2d(2, stride=2)
self.conv3 = nn.Conv2d(16, 120, 5)
self.fc1 = nn.Linear(120, 84)
self.fc2 = nn.Linear(84, 10)
def forward(self, x):
x = torch.tanh(self.conv1(x))
x = self.pool1(x)
x = torch.tanh(self.conv2(x))
x = self.pool2(x)
x = torch.tanh(self.conv3(x))
x = x.view(x.size(0), -1)
x = torch.tanh(self.fc1(x))
x = self.fc2(x)
return x
# 训练代码略
应用场景扩展
LeNet-5 可以应用于各种小型图像分类任务,以下是几个扩展方向:
- 字符识别:修改输入尺寸和输出类别数,应用于其他字符数据集
- 简单物体分类:调整网络结构处理更大的图像和更多类别
- 教学示例:演示卷积、池化等基本操作的工作原理
- 基准测试:作为其他模型的性能比较基准
通过调整网络参数和结构,LeNet-5 可以适应不同的任务需求,虽然现代网络结构更复杂,但 LeNet-5 仍然是理解 CNN 基本原理的优秀起点。
基于Python和PyTorch实现AlexNet模型
以下是一个基于Python和PyTorch实现AlexNet模型在ImageNet分类任务上的完整示例,包含关键代码片段和实现逻辑。示例分为模型构建、数据预处理、训练及评估三部分。
模型构建
AlexNet的PyTorch实现结构如下:
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes=1000):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
数据预处理
使用torchvision
加载ImageNet数据并应用标准化和增强:
from torchvision import transforms, datasets
train_transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = datasets.ImageFolder('path/to/imagenet/train', train_transform)
val_dataset = datasets.ImageFolder('path/to/imagenet/val', val_transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)
训练与评估
训练循环示例:
model = AlexNet(num_classes=1000)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
for epoch in range(30):
model.train()
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 验证阶段
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in val_loader:
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Epoch {epoch+1}, Accuracy: {100 * correct / total:.2f}%')
关键优化技巧
- 学习率调度:添加
torch.optim.lr_scheduler.StepLR
实现动态调整学习率 - 混合精度训练:使用
torch.cuda.amp
加速训练 - 分布式训练:通过
torch.nn.DataParallel
或DistributedDataParallel
扩展至多GPU
扩展应用场景
- 医学影像分类(CheXpert)
- 遥感图像场景分类(UC Merced Land Use)
- 艺术品风格分类(WikiArt)
- 食品识别(Food-101)
- 车辆型号识别(Stanford Cars)
- 狗品种分类(Stanford Dogs)
- 服装分类(Fashion-MNIST扩展版)
- 植物病害识别(PlantVillage)
- 人脸表情识别(FER-2013)
- 手写数学符号识别(HASYv2)
- 卫星图像云检测(Cloud Cover Detection)
- 工业缺陷检测(MVTec AD)
- 动物行为识别(Kinetics-700子集)
- 显微图像细胞分类(ImageCLEF)
- 街景门牌号识别(SVHN)
- 地质岩相分类(DeepRock-SCAN)
- 自动驾驶道路物体分类(BDD100K)
- 天文天体分类(Galaxy Zoo)
- 水下生物识别(Fish4Knowledge)
- 军事目标识别(xView)
- 零售商品识别(iMaterialist)
- 书法字体分类(Chinese Calligraphy)
- 无人机视角目标分类(VisDrone)
- 古生物化石分类(PaleoDeepDive)
- 皮肤病变分类(ISIC 2019)
- 农作物生长阶段识别(CropHarvest)
- 工业零件分类(GRABCAD)
- 社交媒体图像内容审核(NSFW内容检测)
注意事项
- ImageNet数据需通过官方申请获取(约120GB)
- 完整训练需高性能GPU(如V100/A100),建议使用预训练权重
- 实际应用时可迁移学习:冻结部分层并微调分类头
- 使用
torchvision.models.alexnet(pretrained=True)
可直接加载预训练模型
数据集花卉分类算法
Oxford 102 Flowers数据集包含102类英国常见花卉,每类包含40到258张图像。数据集可从官网或学术资源平台下载,解压后通常包含jpg
图像文件夹和标注文件(如imagelabels.mat
)。
import os
import scipy.io
from PIL import Image
import numpy as np
# 加载标注文件
mat = scipy.io.loadmat('imagelabels.mat')
labels = mat['labels'][0] - 1 # 转换为0-based索引
# 图像路径示例
image_dir = 'jpg'
image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])
数据预处理
使用PyTorch的transforms
进行标准化和增强:
import torch
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 示例:加载单张图像
image = Image.open(os.path.join(image_dir, image_files[0]))
image_tensor = transform(image)
模型构建
使用预训练的ResNet18模型进行迁移学习:
import torch.nn as nn
from torchvision import models
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 102) # 102分类
# 损失函数与优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
训练流程
定义简单的训练循环:
from torch.utils.data import Dataset, DataLoader
class FlowerDataset(Dataset):
def __init__(self, image_paths, labels, transform):
self.image_paths = image_paths
self.labels = labels
self.transform = transform
def __getitem__(self, idx):
image = Image.open(self.image_paths[idx])
return self.transform(image), self.labels[idx]
def __len__(self):
return len(self.image_paths)
# 创建DataLoader
dataset = FlowerDataset(image_paths, labels, transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# 训练循环
for epoch in range(10):
for inputs, targets in dataloader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')
评估与预测
计算测试集准确率:
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, targets in test_loader:
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
print(f'Accuracy: {100 * correct / total:.2f}%')
可视化结果
使用Matplotlib展示预测结果:
import matplotlib.pyplot as plt
def imshow(inp, title=None):
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
plt.imshow(inp)
if title:
plt.title(title)
plt.pause(0.001)
# 显示一个batch的预测结果
images, labels = next(iter(dataloader))
outputs = model(images)
_, preds = torch.max(outputs, 1)
fig = plt.figure(figsize=(10, 10))
for idx in range(6):
ax = fig.add_subplot(2, 3, idx+1)
ax.axis('off')
ax.set_title(f'Pred: {preds[idx]}, True: {labels[idx]}')
imshow(images[idx].cpu())
关键注意事项
- 数据划分:确保训练集、验证集、测试集按比例划分(如70%/15%/15%)
- 类别平衡:检查每类样本数量是否均匀,必要时使用过采样或加权损失
- 学习率调度:添加
torch.optim.lr_scheduler
动态调整学习率 - 模型微调:冻结部分层(如