模块/主题 | 核心类/函数/操作 | 主要用途/说明 |
---|
Tensor操作 | torch.tensor() , torch.randn() , view() , reshape() , to(device) | 创建张量、改变形状、设备转换 |
自动微分 | requires_grad=True , backward() , grad , torch.no_grad() | 设置梯度追踪、计算梯度、清空梯度、禁用梯度计算 |
数据加载与处理 | Dataset , DataLoader | 自定义数据集、批量加载与打乱数据 |
神经网络构建 | torch.nn.Module , nn.Linear , nn.Conv2d , nn.ReLU , nn.Sequential | 模型基类、定义层、激活函数、顺序容器 |
损失函数 | nn.MSELoss , nn.CrossEntropyLoss , nn.NLLLoss | 回归任务、分类任务 |
优化器 | torch.optim.SGD , torch.optim.Adam , zero_grad() , step() | 定义优化算法、清空梯度、执行参数更新 |
模型训练流程 | model.train() , 循环迭代, 前向传播, 损失计算, loss.backward() , optimizer.step() | 训练模式、计算损失、反向传播、更新参数 |
模型验证/测试流程 | model.eval() , with torch.no_grad(): | 评估模式、禁用梯度计算 |
模型保存与加载 | torch.save() , torch.load() , model.state_dict() | 保存模型、加载模型、获取/加载状态字典 |
设备管理 | torch.cuda.is_available() , torch.device() | 检查GPU可用性、设置设备 |
🛠️ 核心操作与概念
1. 🎯 Tensor 操作大全
1.1 Tensor 创建与初始化
import torch
import numpy as np
# 基础创建
x = torch.tensor([1, 2, 3]) # 从列表创建
y = torch.tensor(np.array([1, 2, 3])) # 从numpy创建
# 特殊矩阵
zeros = torch.zeros(2, 3) # 全零矩阵
ones = torch.ones(2, 3) # 全一矩阵
eye = torch.eye(3) # 单位矩阵
arange = torch.arange(0, 10, 2) # 范围张量 [0, 2, 4, 6, 8]
linspace = torch.linspace(0, 1, 5) # 线性间隔 [0.0000, 0.2500, 0.5000, 0.7500, 1.0000]
# 随机张量
rand_uniform = torch.rand(2, 3) # 均匀分布 [0,1]
rand_normal = torch.randn(2, 3) # 标准正态分布
rand_int = torch.randint(0, 10, (2, 3)) # 随机整数
# 类似现有张量
x_like = torch.zeros_like(rand_normal) # 类似形状的全零
same_shape = torch.randn_like(rand_normal) # 类似形状的随机
1.2 Tensor 索引与切片
# 创建示例张量
tensor = torch.randn(4, 5, 6)
# 基础索引
print(tensor[0]) # 第一行
print(tensor[:, 0]) # 第一列
print(tensor[..., 0]) # 最后一维的第一个元素
# 高级索引
mask = tensor > 0.5
filtered = tensor[mask] # 布尔索引
indices = torch.tensor([0, 2, 3])
selected = tensor[indices] # 数组索引
# 切片操作
sliced = tensor[1:3, 2:4] # 行1-2, 列2-3
strided = tensor[::2, ::3] # 每隔2行, 每隔3列
reversed = tensor[::-1, ::-1] # 反转
# 索引赋值
tensor[0, :] = 1.0 # 第一行赋值为1
tensor[mask] = 0.0 # 符合条件的赋值为0
1.3 Tensor 形状操作
x = torch.randn(2, 3, 4)
# 重塑操作
reshaped = x.reshape(6, 4) # 重塑为(6,4)
viewed = x.view(3, 8) # 视图重塑(必须连续)
squeezed = x.squeeze() # 移除维度1的维度
unsqueezed = x.unsqueeze(0) # 在指定维度添加维度1
# 转置与重排
transposed = x.T # 转置
permuted = x.permute(2, 0, 1) # 维度重排
# 连接与分割
a, b = torch.randn(2, 3), torch.randn(2, 3)
cat = torch.cat([a, b], dim=0) # 沿维度0连接 → (4,3)
stack = torch.stack([a, b], dim=0) # 堆叠 → (2,2,3)
# 分割
chunks = torch.chunk(x, 2, dim=0) # 沿维度0分成2块
split = torch.split(x, 2, dim=1) # 沿维度1每2个分割
1.4 Tensor 数学运算
a, b = torch.tensor([1.0, 2.0]), torch.tensor([3.0, 4.0])
# 基础运算
add = a + b # 加法
sub = a - b # 减法
mul = a * b # 逐元素乘法
div = a / b # 除法
pow = a ** 2 # 幂运算
# 矩阵运算
mat_a, mat_b = torch.randn(2, 3), torch.randn(3, 2)
matmul = torch.mm(mat_a, mat_b) # 矩阵乘法
bmm = torch.bmm(mat_a.unsqueeze(0), mat_b.unsqueeze(0)) # 批量矩阵乘
# 归约操作
x = torch.randn(2, 3)
sum_all = x.sum() # 所有元素和
sum_dim = x.sum(dim=0) # 沿维度0求和
mean = x.mean() # 平均值
std = x.std() # 标准差
max_val, max_idx = x.max(dim=1) # 最大值和索引
# 比较操作
eq = torch.eq(a, b) # 相等
gt = torch.gt(a, b) # 大于
lt = torch.lt(a, b) # 小于
2. 🧠 自动微分高级特性
2.1 梯度计算与控制
# 基础梯度计算
x = torch.tensor(2.0, requires_grad=True)
y = x ** 3 + 2 * x + 1
y.backward()
print(x.grad) # 3*x² + 2 = 14.0
# 向量值的梯度
x = torch.tensor([1.0, 2.0], requires_grad=True)
y = x.sum() ** 2
y.backward()
print(x.grad) # [2*sum, 2*sum] = [6.0, 6.0]
# 高阶梯度
x = torch.tensor(3.0, requires_grad=True)
y = x ** 3
grad1 = torch.autograd.grad(y, x, create_graph=True)[0] # 一阶导
grad2 = torch.autograd.grad(grad1, x)[0] # 二阶导
print(grad2) # 6*x = 18.0
2.2 梯度控制上下文
# 梯度禁用
x = torch.tensor(1.0, requires_grad=True)
with torch.no_grad():
y = x * 2 # 不追踪梯度
# y.requires_grad = False
# 梯度检查点(内存优化)
def gradient_checkpointing():
from torch.utils.checkpoint import checkpoint
def custom_forward(x):
return x ** 2
x = torch.tensor(2.0, requires_grad=True)
y = checkpoint(custom_forward, x) # 节省内存
y.backward()
# 梯度累积
optimizer.zero_grad()
for i, (data, target) in enumerate(dataloader):
output = model(data)
loss = criterion(output, target)
loss.backward()
if (i + 1) % 4 == 0: # 每4个batch更新一次
optimizer.step()
optimizer.zero_grad()
3. 🏗️ 神经网络构建大全
3.1 网络层详解
import torch.nn as nn
# 卷积层
conv1d = nn.Conv1d(1, 32, kernel_size=3) # 一维卷积
conv2d = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
conv3d = nn.Conv3d(1, 32, kernel_size=3)
# 池化层
maxpool2d = nn.MaxPool2d(2, stride=2) # 最大池化
avgpool2d = nn.AvgPool2d(2) # 平均池化
adaptive_pool = nn.AdaptiveAvgPool2d((1, 1)) # 自适应池化
# 归一化层
batch_norm = nn.BatchNorm2d(64) # 批归一化
layer_norm = nn.LayerNorm(128) # 层归一化
instance_norm = nn.InstanceNorm2d(32) # 实例归一化
# 循环层
lstm = nn.LSTM(128, 64, num_layers=2, bidirectional=True)
gru = nn.GRU(128, 64, batch_first=True)
rnn = nn.RNN(128, 64, nonlinearity='tanh')
# 注意力机制
multihead_attn = nn.MultiheadAttention(512, 8) # 多头注意力
3.2 复杂网络架构
class ResidualBlock(nn.Module):
"""残差块"""
def __init__(self, channels):
super().__init__()
self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
self.bn1 = nn.BatchNorm2d(channels)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
self.bn2 = nn.BatchNorm2d(channels)
def forward(self, x):
residual = x
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += residual # 残差连接
return self.relu(out)
class ComplexNetwork(nn.Module):
"""复杂网络示例"""
def __init__(self, num_classes=10):
super().__init__()
# 特征提取
self.features = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
ResidualBlock(64),
nn.Conv2d(64, 128, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
)
# 分类器
self.classifier = nn.Sequential(
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.Linear(128, 256),
nn.Dropout(0.5),
nn.ReLU(),
nn.Linear(256, num_classes)
)
# 注意力机制
self.attention = nn.MultiheadAttention(128, 8)
def forward(self, x):
x = self.features(x)
# 应用注意力
b, c, h, w = x.shape
x_attn = x.view(b, c, -1).permute(2, 0, 1) # (h*w, b, c)
attn_out, _ = self.attention(x_attn, x_attn, x_attn)
x = attn_out.permute(1, 2, 0).view(b, c, h, w)
return self.classifier(x)
4. 🔄 训练循环优化
4.1 高级训练技巧
def advanced_training_loop(model, train_loader, val_loader, criterion, optimizer, num_epochs):
"""高级训练循环"""
# 学习率调度器
scheduler = torch.optim.lr_scheduler.OneCycleLR(
optimizer,
max_lr=0.1,
epochs=num_epochs,
steps_per_epoch=len(train_loader)
)
# 梯度缩放(混合精度训练)
scaler = torch.cuda.amp.GradScaler()
# 早停机制
best_loss = float('inf')
patience = 5
patience_counter = 0
for epoch in range(num_epochs):
# 训练阶段
model.train()
train_loss = 0.0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
# 混合精度训练
with torch.cuda.amp.autocast():
output = model(data)
loss = criterion(output, target)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
scheduler.step()
train_loss += loss.item()
# 梯度裁剪
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
# 验证阶段
model.eval()
val_loss = 0.0
correct = 0
with torch.no_grad():
for data, target in val_loader:
data, target = data.cuda(), target.cuda()
output = model(data)
val_loss += criterion(output, target).item()
pred = output.argmax(dim=1)
correct += pred.eq(target).sum().item()
# 早停检查
if val_loss < best_loss:
best_loss = val_loss
patience_counter = 0
torch.save(model.state_dict(), 'best_model.pth')
else:
patience_counter += 1
if patience_counter >= patience:
print("Early stopping triggered!")
break
print(f'Epoch {epoch}: Train Loss: {train_loss/len(train_loader):.4f}, '
f'Val Loss: {val_loss/len(val_loader):.4f}, '
f'Accuracy: {100.*correct/len(val_loader.dataset):.2f}%')
5. 📊 可视化与调试
5.1 TensorBoard 集成
from torch.utils.tensorboard import SummaryWriter
def setup_tensorboard_logging(model, train_loader):
"""设置TensorBoard日志"""
writer = SummaryWriter('runs/experiment1')
# 记录模型图
data_iter = iter(train_loader)
sample_data, _ = next(data_iter)
writer.add_graph(model, sample_data)
# 训练过程中记录指标
for epoch in range(num_epochs):
# ... 训练代码 ...
# 记录损失和准确率
writer.add_scalar('Loss/train', train_loss, epoch)
writer.add_scalar('Accuracy/train', train_accuracy, epoch)
writer.add_scalar('Learning_rate', optimizer.param_groups[0]['lr'], epoch)
# 记录权重分布
for name, param in model.named_parameters():
writer.add_histogram(name, param, epoch)
writer.add_histogram(f'{name}.grad', param.grad, epoch)
writer.close()
5.2 模型调试工具
def model_debugging_tools(model, dataloader):
"""模型调试工具"""
# 钩子函数用于中间层激活
activations = {}
def get_activation(name):
def hook(model, input, output):
activations[name] = output.detach()
return hook
# 注册钩子
hooks = []
for name, layer in model.named_modules():
if isinstance(layer, nn.Conv2d):
hook = layer.register_forward_hook(get_activation(name))
hooks.append(hook)
# 前向传播捕获激活
data, target = next(iter(dataloader))
output = model(data)
# 移除钩子
for hook in hooks:
hook.remove()
# 模型统计
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"总参数: {total_params:,}")
print(f"可训练参数: {trainable_params:,}")
# 内存使用分析
if torch.cuda.is_available():
print(f"GPU内存使用: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
6. 🚀 分布式训练
6.1 多GPU训练
import torch.distributed as dist
import torch.multiprocessing as mp
def setup_ddp(rank, world_size):
"""设置分布式数据并行"""
dist.init_process_group("nccl", rank=rank, world_size=world_size)
torch.cuda.set_device(rank)
def cleanup_ddp():
dist.destroy_process_group()
def train_ddp(rank, world_size, model, dataset):
"""分布式训练函数"""
setup_ddp(rank, world_size)
# 分布式采样器
sampler = torch.utils.data.DistributedSampler(
dataset, num_replicas=world_size, rank=rank
)
dataloader = torch.utils.data.DataLoader(
dataset, batch_size=64, sampler=sampler
)
# 模型移到GPU并包装
model = model.to(rank)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
# 训练循环
for epoch in range(epochs):
sampler.set_epoch(epoch)
for batch in dataloader:
# ... 训练代码 ...
pass
cleanup_ddp()
# 启动分布式训练
def main():
world_size = torch.cuda.device_count()
mp.spawn(train_ddp, args=(world_size, model, dataset),
nprocs=world_size, join=True)
7. 🎯 模型部署与优化
7.1 模型量化与优化
def model_quantization(model, calibration_loader):
"""模型量化"""
model.eval()
# 动态量化
quantized_model = torch.quantization.quantize_dynamic(
model, {nn.Linear, nn.Conv2d}, dtype=torch.qint8
)
# 静态量化准备
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_prepared = torch.quantization.prepare(model, inplace=False)
# 校准
with torch.no_grad():
for data, _ in calibration_loader:
model_prepared(data)
# 转换
model_quantized = torch.quantization.convert(model_prepared)
return model_quantized
def model_pruning(model, pruning_amount=0.3):
"""模型剪枝"""
parameters_to_prune = []
for name, module in model.named_modules():
if isinstance(module, nn.Conv2d):
parameters_to_prune.append((module, 'weight'))
elif isinstance(module, nn.Linear):
parameters_to_prune.append((module, 'weight'))
# 全局剪枝
torch.nn.utils.prune.global_unstructured(
parameters_to_prune,
pruning_method=torch.nn.utils.prune.L1Unstructured,
amount=pruning_amount,
)
return model
7.2 ONNX导出与推理
def export_to_onnx(model, sample_input, onnx_path="model.onnx"):
"""导出模型到ONNX格式"""
model.eval()
torch.onnx.export(
model,
sample_input,
onnx_path,
export_params=True,
opset_version=13,
do_constant_folding=True,
input_names=['input'],
output_names=['output'],
dynamic_axes={
'input': {0: 'batch_size'},
'output': {0: 'batch_size'}
}
)
print(f"Model exported to {onnx_path}")
def onnx_inference(onnx_path, input_data):
"""ONNX模型推理"""
import onnxruntime as ort
ort_session = ort.InferenceSession(onnx_path)
# 准备输入
ort_inputs = {ort_session.get_inputs()[0].name: input_data.numpy()}
# 推理
ort_outs = ort_session.run(None, ort_inputs)
return torch.tensor(ort_outs[0])
8. 🔧 实用工具函数
8.1 训练辅助工具
class ModelCheckpoint:
"""模型检查点管理器"""
def __init__(self, save_dir, mode='min', patience=5):
self.save_dir = save_dir
self.mode = mode
self.patience = patience
self.best_score = None
self.counter = 0
def __call__(self, score, model, optimizer, epoch):
if self.best_score is None:
self.best_score = score
self.save_checkpoint(model, optimizer, epoch, True)
elif (self.mode == 'min' and score < self.best_score) or \
(self.mode == 'max' and score > self.best_score):
self.best_score = score
self.save_checkpoint(model, optimizer, epoch, True)
self.counter = 0
else:
self.counter += 1
if self.counter >= self.patience:
return True # 早停
return False
def save_checkpoint(self, model, optimizer, epoch, is_best):
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'best_score': self.best_score
}
torch.save(checkpoint, f'{self.save_dir}/checkpoint.pth')
if is_best:
torch.save(model.state_dict(), f'{self.save_dir}/best_model.pth')
总结:
- 动态计算图:PyTorch使用动态计算图,这意味着图在运行时构建,提供了极大的灵活性。
- 梯度累积:对于大模型,可以通过多次前向传播后一次反向传播来模拟更大的batch size。
- 学习率调度:使用
torch.optim.lr_scheduler
来调整学习率。 - TensorBoard可视化:集成TensorBoard来跟踪实验指标。
- 混合精度训练:使用
torch.cuda.amp
进行自动混合精度训练,节省显存并加速。