深度学习框架实践
课程介绍
本模块介绍PyTorch深度学习框架的使用,通过实际案例学习如何构建、训练和部署深度学习模型。
学习目标
完成本模块学习后,你将能够:
- 使用PyTorch构建神经网络
- 实现模型训练和评估
- 掌握数据加载和预处理
- 进行模型部署和优化
1. PyTorch基础
1.1 张量操作
# 🔥 实战案例:PyTorch张量操作
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
def tensor_operations():
"""PyTorch张量基本操作示例"""
# 创建张量
x = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
y = torch.randn(2, 2)
# 基本运算
print("加法:", x + y)
print("矩阵乘法:", torch.mm(x, y))
print("元素乘法:", x * y)
# GPU支持
if torch.cuda.is_available():
x_gpu = x.cuda()
y_gpu = y.cuda()
print("GPU张量:", x_gpu)
# 梯度计算
x.requires_grad_(True)
z = torch.sum(x ** 2)
z.backward()
print("梯度:", x.grad)
# 自定义数据集
class CustomDataset(Dataset):
"""自定义数据集示例"""
def __init__(self, X, y):
self.X = torch.FloatTensor(X)
self.y = torch.FloatTensor(y)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
1.2 神经网络模块
# 🧠 实战案例:PyTorch神经网络
class SimpleNet(nn.Module):
"""简单神经网络示例"""
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.layer1 = nn.Linear(input_size, hidden_size)
self.layer2 = nn.Linear(hidden_size, output_size)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = self.layer1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer2(x)
return x
# 训练函数
def train_model(model, train_loader, criterion, optimizer, device):
"""模型训练一个epoch"""
model.train()
running_loss = 0.0
for inputs, targets in train_loader:
inputs, targets = inputs.to(device), targets.to(device)
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, targets)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
return running_loss / len(train_loader)
# 评估函数
def evaluate_model(model, val_loader, criterion, device):
"""模型评估"""
model.eval()
running_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, targets in val_loader:
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs)
loss = criterion(outputs, targets)
running_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
accuracy = 100. * correct / total
avg_loss = running_loss / len(val_loader)
return avg_loss, accuracy
2. 高级特性
2.1 自定义层和损失函数
# 🛠️ 实战案例:自定义组件
class FocalLoss(nn.Module):
"""Focal Loss实现"""
def __init__(self, alpha=1, gamma=2):
super().__init__()
self.alpha = alpha
self.gamma = gamma
def forward(self, inputs, targets):
ce_loss = nn.functional.cross_entropy(inputs, targets, reduction='none')
pt = torch.exp(-ce_loss)
focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
return focal_loss.mean()
class ResidualBlock(nn.Module):
"""残差块实现"""
def __init__(self, in_channels):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, in_channels, 3, padding=1)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, in_channels, 3, padding=1)
self.bn2 = nn.BatchNorm2d(in_channels)
def forward(self, x):
residual = x
out = nn.functional.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += residual
return nn.functional.relu(out)
2.2 模型保存和加载
# 💾 实战案例:模型保存和加载
def save_checkpoint(model, optimizer, epoch, filename):
"""保存检查点"""
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}
torch.save(checkpoint, filename)
def load_checkpoint(model, optimizer, filename):
"""加载检查点"""
checkpoint = torch.load(filename)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
return checkpoint['epoch']
3. 模型训练与优化
3.1 训练流程
# 📈 实战案例:完整训练流程
class Trainer:
"""模型训练器"""
def __init__(self, model, criterion, optimizer, device):
self.model = model
self.criterion = criterion
self.optimizer = optimizer
self.device = device
self.history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
def train_epoch(self, train_loader):
"""训练一个epoch"""
self.model.train()
epoch_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(self.device), target.to(self.device)
# 前向传播
self.optimizer.zero_grad()
output = self.model(data)
loss = self.criterion(output, target)
# 反向传播
loss.backward()
self.optimizer.step()
epoch_loss += loss.item()
# 打印进度
if batch_idx % 100 == 0:
print(f'Train Batch: {batch_idx}/{len(train_loader)} '
f'Loss: {loss.item():.6f}')
return epoch_loss / len(train_loader)
def validate(self, val_loader):
"""验证模型"""
self.model.eval()
val_loss = 0
correct = 0
with torch.no_grad():
for data, target in val_loader:
data, target = data.to(self.device), target.to(self.device)
output = self.model(data)
val_loss += self.criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
val_loss /= len(val_loader)
accuracy = 100. * correct / len(val_loader.dataset)
return val_loss, accuracy
def train(self, train_loader, val_loader, epochs,
checkpoint_path='checkpoint.pt'):
"""完整训练流程"""
best_val_loss = float('inf')
for epoch in range(epochs):
print(f'\nEpoch: {epoch+1}/{epochs}')
# 训练和验证
train_loss = self.train_epoch(train_loader)
val_loss, val_acc = self.validate(val_loader)
# 更新历史记录
self.history['train_loss'].append(train_loss)
self.history['val_loss'].append(val_loss)
self.history['val_acc'].append(val_acc)
# 保存最佳模型
if val_loss < best_val_loss:
best_val_loss = val_loss
save_checkpoint(self.model, self.optimizer,
epoch, checkpoint_path)
print(f'Train Loss: {train_loss:.4f}')
print(f'Val Loss: {val_loss:.4f}')
print(f'Val Accuracy: {val_acc:.2f}%')
def plot_history(self):
"""绘制训练历史"""
plt.figure(figsize=(12, 4))
# 损失曲线
plt.subplot(1, 2, 1)
plt.plot(self.history['train_loss'], label='train')
plt.plot(self.history['val_loss'], label='val')
plt.title('Loss History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# 准确率曲线
plt.subplot(1, 2, 2)
plt.plot(self.history['val_acc'])
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
return plt.gcf()
实战项目:图像分类模型
项目描述
使用PyTorch构建和训练一个图像分类模型,包含完整的训练流程和模型优化。
项目代码框架
class ImageClassifier:
def __init__(self, num_classes):
# 使用预训练模型
self.model = models.resnet18(pretrained=True)
# 修改最后一层
self.model.fc = nn.Linear(512, num_classes)
self.transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def train(self, train_dir, val_dir, epochs=10, batch_size=32,
learning_rate=0.001):
"""训练模型"""
# 数据加载
train_dataset = ImageFolder(train_dir, transform=self.transform)
val_dataset = ImageFolder(val_dir, transform=self.transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size,
shuffle=False, num_workers=4)
# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model = self.model.to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
# 创建训练器
trainer = Trainer(self.model, criterion, optimizer, device)
# 训练模型
trainer.train(train_loader, val_loader, epochs)
return trainer.history
def predict(self, image_path):
"""预测单张图片"""
# 加载和预处理图片
image = Image.open(image_path)
image = self.transform(image).unsqueeze(0)
# 预测
self.model.eval()
with torch.no_grad():
outputs = self.model(image)
_, predicted = outputs.max(1)
return predicted.item()
def visualize_predictions(self, image_paths, class_names):
"""可视化预测结果"""
plt.figure(figsize=(15, 3))
for i, path in enumerate(image_paths):
# 加载和预测
pred = self.predict(path)
# 显示图片
image = Image.open(path)
plt.subplot(1, len(image_paths), i+1)
plt.imshow(image)
plt.title(f'Pred: {class_names[pred]}')
plt.axis('off')
return plt.gcf()
练习与作业
- 实现不同的优化策略(学习率调度、早停等)
- 添加数据增强方法
- 尝试不同的预训练模型
扩展阅读
小测验
- PyTorch中张量和NumPy数组的区别是什么?
- 如何处理GPU内存不足的问题?
- 什么情况下应该使用自定义数据集?
下一步学习
- 高级模型架构
- 模型部署和服务
- 分布式训练
常见问题解答
Q: 如何选择批量大小? A: 批量大小需要平衡训练速度和内存使用。通常从16或32开始,根据GPU内存和模型性能调整。
Q: 如何处理过拟合? A: 可以使用正则化、Dropout、数据增强等方法。同时确保验证集的使用正确,适时使用早停。