自定义损失函数¶
参考:Pytorch如何自定义损失函数(Loss Function)?
PyTorch
在torch.nn
模块提供了许多常用的损失函数,同时也提供了自定义方法。以交叉熵损失(CrossEntropyLoss
)为例
交叉熵损失¶
参考:
交叉熵损失用于计算预测结果和正确分类的概率损失度量
损失函数计算如下:
J(\theta)= (-1)\cdot \frac{1}{m}\cdot \sum_{i=1}^{m} \sum_{j=1}^{k} 1\left\{y_{i}=j\right\} \ln p\left(y_{i}=j | x_{i}; \theta\right)
= (-1)\cdot \frac{1}{m}\cdot \sum_{i=1}^{m} \sum_{j=1}^{k} 1\left\{y_{i}=j\right\} \ln \frac{e^{\theta_{j}^{T} x_{i}}}{\sum_{l=1}^{k} e^{\theta_{l}^{T} x_{i}}}
其中m是批量大小,k是类别数,y_{i}表示每个样本x_{i}所属的正确类别,1\{y_{i}=j\}是示性函数(相等时为1
,不相等为0
)
求导结果如下:
\frac{\varphi J(\theta)}{\varphi \theta_{s}}
=(-1)\cdot \frac{1}{m}\cdot \sum_{i=1}^{m}\cdot \left[ 1\left\{y_{i}=s \right\} - p\left(y_{i}=s | x_{i}; \theta\right) \right]\cdot x_{i}
pytorch实现¶
使用pytorch
自带的函数进行交叉熵损失的前向实现,那么自动实现了反向求导功能,不需要重写backward
方法
import torch
import torch.nn as nn
class CustomCrossEntropyLoss(nn.Module):
def forward(self, inputs, targets):
assert len(inputs.shape) == 2 and len(targets) == inputs.shape[0]
loss = 0.0
total = torch.log(torch.sum(torch.exp(inputs), dim=1))
batch = inputs.shape[0]
loss = torch.sum(total - inputs[torch.arange(batch), targets])
return loss / batch
numpy实现¶
参考:CREATING EXTENSIONS USING NUMPY AND SCIPY
使用numpy
实现交叉熵损失,需要重新实现前向和反向功能
import torch
import torch.nn as nn
from torch.autograd import Function
import numpy as np
class NumpyCrossEntropyLossFunction(Function):
@staticmethod
def forward(ctx, inputs, labels):
# 保存反向传播时需要使用的数据
ctx.save_for_backward(inputs.detach(), labels.detach())
# 注意转换数据格式
scores = inputs.detach().numpy()
labels = labels.detach().numpy()
assert len(scores.shape) == 2
assert len(labels.shape) == 1
scores -= np.max(scores, axis=1, keepdims=True)
expscores = np.exp(scores)
probs = expscores / np.sum(expscores, axis=1, keepdims=True)
N = labels.shape[0]
correct_probs = probs[range(N), labels]
loss = -1.0 / N * np.sum(np.log(correct_probs))
return torch.as_tensor(loss, dtype=inputs.dtype)
@staticmethod
def backward(ctx, grad_output):
grad_output = grad_output.detach().numpy()
inputs, labels = ctx.saved_tensors
scores = inputs.numpy()
labels = labels.numpy()
scores -= np.max(scores, axis=1, keepdims=True)
expscores = np.exp(scores)
probs = expscores / np.sum(expscores, axis=1, keepdims=True)
grad_out = probs
N = labels.shape[0]
grad_out[range(N), labels] -= 1
return torch.from_numpy(grad_out / N), None
class NumpyCrossEntropyLoss(nn.Module):
def forward(self, inputs, labels):
return NumpyCrossEntropyLossFunction.apply(inputs, labels)
前向传输需要注意:
- 输出结果为
torch.Tensor
格式数据, - 输入参数需要先调用
detach()
,再调用numpy()
转换成numpy
格式 - 输入参数可以调用函数
save_for_backward
保存,用于反向求导
反向求导需要注意:
- 输入参数为下一层的梯度,同样需要调用
detach().numpy()
函数转换。如果本身是最后一层,则返回1.0
- 调用函数
saved_tensors
得到保存的前向数据 - 返回结果个数由前向输入参数决定
测试一¶
inputs = torch.Tensor([[1.1785, -0.0969, 0.5756, -1.2113, -0.1120],
[-0.5199, -0.8051, 1.0953, 0.1480, 0.2879],
[2.3401, 0.6403, 1.4306, 0.0982, -0.7363]])
inputs.requires_grad = True
targets = torch.Tensor([4, 3, 1]).type(torch.long)
criterion = nn.CrossEntropyLoss()
loss = criterion(inputs, targets)
print(loss)
loss.backward()
print(inputs.grad)
inputs.grad = None
criterion2 = NumpyCrossEntropyLoss()
loss = criterion2.forward(inputs, targets)
print(loss)
loss.backward()
print(inputs.grad)
结果如下:
tensor(2.0187, grad_fn=<NllLossBackward>)
tensor([[ 0.1520, 0.0424, 0.0832, 0.0139, -0.2915],
[ 0.0304, 0.0228, 0.1528, -0.2741, 0.0681],
[ 0.1918, -0.2983, 0.0772, 0.0204, 0.0088]])
tensor(2.0187, grad_fn=<NumpyCrossEntropyLossFunctionBackward>)
tensor([[ 0.1520, 0.0424, 0.0832, 0.0139, -0.2915],
[ 0.0304, 0.0228, 0.1528, -0.2741, 0.0681],
[ 0.1918, -0.2983, 0.0772, 0.0204, 0.0088]])
测试二¶
使用LeNet-5
模型训练数据集FASHION-MNIST
# -*- coding: utf-8 -*-
"""
@author: zj
@file: loss_lenet-5.py
@time: 2020-01-16
"""
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
logging.basicConfig(format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', level=logging.DEBUG)
def load_data():
transform = transforms.Compose([
transforms.Grayscale(),
transforms.Resize(size=(32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5,), std=(0.5,))
])
train_dataset = FashionMNIST('./data/fashionmnist/', train=True, download=True, transform=transform)
test_dataset = FashionMNIST('./data/fashionmnist', train=False, download=True, transform=transform)
train_dataloader = data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_dataloader = data.DataLoader(test_dataset, batch_size=128, shuffle=True, num_workers=4)
return train_dataloader, test_dataloader
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0, bias=True)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0, bias=True)
self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1, padding=0, bias=True)
self.pool = nn.MaxPool2d((2, 2), stride=2)
self.fc1 = nn.Linear(in_features=120, out_features=84, bias=True)
self.fc2 = nn.Linear(84, 10, bias=True)
def forward(self, input):
x = self.pool(F.relu(self.conv1(input)))
x = self.pool(F.relu(self.conv2(x)))
x = self.conv3(x)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
return self.fc2(x)
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
def compute_accuracy(loader, net, device):
total_accuracy = 0
num = 0
for item in loader:
data, labels = item
data = data.to(device)
labels = labels.to(device)
scores = net.forward(data)
predicted = torch.argmax(scores, dim=1)
total_accuracy += torch.mean((predicted == labels).float()).item()
num += 1
return total_accuracy / num
if __name__ == '__main__':
train_dataloader, test_dataloader = load_data()
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
net = LeNet5().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimer = optim.Adam(net.parameters(), lr=1e-3)
logging.info("开始训练")
epoches = 5
for i in range(epoches):
num = 0
total_loss = 0
for j, item in enumerate(train_dataloader, 0):
data, labels = item
data = data.to(device)
labels = labels.to(device)
scores = net.forward(data)
loss = criterion.forward(scores, labels)
total_loss += loss.item()
optimer.zero_grad()
loss.backward()
optimer.step()
num += 1
avg_loss = total_loss / num
logging.info('epoch: %d loss: %.6f' % (i + 1, total_loss / num))
train_accuracy = compute_accuracy(train_dataloader, net, device)
test_accuracy = compute_accuracy(test_dataloader, net, device)
logging.info('train accuracy: %f test accuracy: %f' % (train_accuracy, test_accuracy))
训练结果如下:
2020-01-16 16:41:08,667 loss_lenet-5.py[line:94] INFO 开始训练
2020-01-16 16:41:21,641 loss_lenet-5.py[line:114] INFO epoch: 1 loss: 0.643288
2020-01-16 16:41:26,312 loss_lenet-5.py[line:117] INFO train accuracy: 0.843500 test accuracy: 0.833267
2020-01-16 16:41:39,899 loss_lenet-5.py[line:114] INFO epoch: 2 loss: 0.401418
2020-01-16 16:41:44,489 loss_lenet-5.py[line:117] INFO train accuracy: 0.868387 test accuracy: 0.858287
2020-01-16 16:41:57,647 loss_lenet-5.py[line:114] INFO epoch: 3 loss: 0.348542
2020-01-16 16:42:02,498 loss_lenet-5.py[line:117] INFO train accuracy: 0.866899 test accuracy: 0.850672
2020-01-16 16:42:15,939 loss_lenet-5.py[line:114] INFO epoch: 4 loss: 0.318400
2020-01-16 16:42:20,790 loss_lenet-5.py[line:117] INFO train accuracy: 0.890231 test accuracy: 0.879055
2020-01-16 16:42:34,125 loss_lenet-5.py[line:114] INFO epoch: 5 loss: 0.299220
2020-01-16 16:42:39,302 loss_lenet-5.py[line:117] INFO train accuracy: 0.899481 test accuracy: 0.882812
使用自定义CustomCrossEntropyLoss
替换,实现如下:
from custom_cross_entropy_loss import NumpyCrossEntropyLoss
criterion = NumpyCrossEntropyLoss().to(device)
# criterion = nn.CrossEntropyLoss().to(device)
当前使用Nvidia
进行训练,其张量操作在显卡内进行,需要先转换成CPU Tensor
再转换成numpy
格式,否则出现以下错误
TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the
同时还需要将backward
返回的结果转换成cuda Tensor
格式,修改如下:
class NumpyCrossEntropyLossFunction(Function):
@staticmethod
def forward(ctx, inputs, labels):
...
# 注意转换数据格式
scores = inputs.detach().cpu().numpy()
labels = labels.detach().cpu().numpy()
...
...
return torch.as_tensor(loss, dtype=inputs.dtype)
@staticmethod
def backward(ctx, grad_output):
...
...
scores = inputs.cpu().numpy()
labels = labels.cpu().numpy()
...
...
return torch.from_numpy(grad_out / N).cuda(), None
训练结果如下:
2020-01-17 15:37:52,056 loss_lenet-5.py[line:97] INFO 开始训练
2020-01-17 15:37:57,420 loss_lenet-5.py[line:117] INFO epoch: 1 loss: 0.640523
2020-01-17 15:38:01,180 loss_lenet-5.py[line:120] INFO train accuracy: 0.847009 test accuracy: 0.840190
2020-01-17 15:38:06,456 loss_lenet-5.py[line:117] INFO epoch: 2 loss: 0.400140
2020-01-17 15:38:10,617 loss_lenet-5.py[line:120] INFO train accuracy: 0.873534 test accuracy: 0.866495
2020-01-17 15:38:16,634 loss_lenet-5.py[line:117] INFO epoch: 3 loss: 0.346358
2020-01-17 15:38:20,566 loss_lenet-5.py[line:120] INFO train accuracy: 0.887332 test accuracy: 0.873616
2020-01-17 15:38:25,908 loss_lenet-5.py[line:117] INFO epoch: 4 loss: 0.318044
2020-01-17 15:38:29,827 loss_lenet-5.py[line:120] INFO train accuracy: 0.893812 test accuracy: 0.884197
2020-01-17 15:38:35,480 loss_lenet-5.py[line:117] INFO epoch: 5 loss: 0.299681
2020-01-17 15:38:39,493 loss_lenet-5.py[line:120] INFO train accuracy: 0.897655 test accuracy: 0.885186
小结¶
自定义损失函数时,尽量使用pytorch
实现,这样就不需要自己实现自动求导功能