《动手学深度学习》(PyTorch版)代码注释 - 51 【Style_transfer】
发布日期:2021-05-19 18:03:19 浏览次数:21 分类:精选文章

本文共 5752 字,大约阅读时间需要 19 分钟。

说明

本博客代码来自开源项目,并在博主学习的基础上对代码进行了注释解释,方便理解各个函数的原理和用途。

配置环境

使用环境:Python 3.8

平台:Windows 10
开发工具:PyCharm

目录

此节说明

此节对应书本中的样式迁移部分。由于该部分较为复杂,代码注释较多。

代码解释

import matplotlib.pyplot as plt
import time
import torch
import torch.nn.functional as F
import torchvision
import numpy as np
from PIL import Image
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

样式迁移

本节将介绍如何使用PyTorch实现样式迁移技术。样式迁移是一种图像修复技术,主要用于将不同风格的图片进行迁移,使其风格与源图一致。

代码解释

# 本书链接:https://tangshusen.me/Dive-into-DL-PyTorch/
# 作者:黄文俊
# 邮箱:hurri_cane@qq.com
from matplotlib import pyplot as plt
import time
import torch
import torch.nn.functional as F
import torchvision
import numpy as np
from PIL import Image
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
d2l.set_figsize()

内容准备

content_img = Image.open('F:/PyCharm/Learning_pytorch/data/img/rainier.jpg')
d2l.plt.imshow(content_img)
plt.show()
d2l.set_figsize()
style_img = Image.open('F:/PyCharm/Learning_pytorch/data/img/autumn_oak.jpg')
d2l.plt.imshow(style_img)
plt.show()

风格迁移初步处理

rgb_mean = np.array([0.485, 0.456, 0.406])
rgb_std = np.array([0.229, 0.224, 0.225])
def preprocess(PIL_img, image_shape):
process = torchvision.transforms.Compose([
torchvision.transforms.Resize(image_shape),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=rgb_mean, std=rgb_std)
])
return process(PIL_img).unsqueeze(dim=0) # (batch_size, 3, H, W)
def postprocess(img_tensor):
invnormalize = torchvision.transforms.Normalize(
mean= -rgb_mean / rgb_std,
std= 1 / rgb_std
)
to_PIL_image = torchvision.transforms.ToPILImage()
return to_PIL_image(invnormalize(img_tensor[0].cpu()).clamp(0, 1))

预训练网络加载

pretrained_net = torchvision.models.vgg19(pretrained=True, progress=True)
print(pretrained_net)

网络结构准备

style_layers, content_layers = [0, 5, 10, 19, 28], [25]
net_list = []
a = content_layers + style_layers # [25, 0, 5, 10, 19, 28]
b = max(a) + 1
for i in range(max(content_layers + style_layers) + 1):
net_list.append(pretrained_net.features[i])
net = torch.nn.Sequential(*net_list)

特征提取

def extract_features(X, content_layers, style_layers):
contents = []
styles = []
for i in range(len(net)):
X = net[i](X)
if i in style_layers:
styles.append(X)
if i in content_layers:
contents.append(X)
return contents, styles

获取内容特征

def get_contents(image_shape, device):
content_X = preprocess(content_img, image_shape).to(device)
contents_Y, _ = extract_features(content_X, content_layers, style_layers)
return content_X, contents_Y
def get_styles(image_shape, device):
style_X = preprocess(style_img, image_shape).to(device)
_, styles_Y = extract_features(style_X, content_layers, style_layers)
return style_X, styles_Y

内容损失计算

def content_loss(Y_hat, Y):
return F.mse_loss(Y_hat, Y)

风格损失计算

def gram(X):
num_channels, n = X.shape[1], X.shape[2] * X.shape[3]
X = X.view(num_channels, n)
return torch.matmul(X, X.t()) / (num_channels * n)
def style_loss(Y_hat, gram_Y):
return F.mse_loss(gram(Y_hat), gram_Y)

总变差损失

def tv_loss(Y_hat):
return 0.5 * (F.l1_loss(Y_hat[:, :, 1:, :], Y_hat[:, :, :-1, :]) +
F.l1_loss(Y_hat[:, :, :, 1:], Y_hat[:, :, :, :-1]))

损失函数定义

content_weight, style_weight, tv_weight = 1, 1e4, 20

损失计算函数

def compute_loss(X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram):
contents_l = [content_loss(Y_hat, Y) * content_weight for Y_hat, Y in zip(
contents_Y_hat, contents_Y)]
styles_l = [style_loss(Y_hat, Y) * style_weight for Y_hat, Y in zip(
styles_Y_hat, styles_Y_gram)]
tv_l = tv_loss(X) * tv_weight
l = sum(styles_l) + sum(contents_l) + tv_l
return contents_l, styles_l, tv_l, l

合成图像模型

class GeneratedImage(torch.nn.Module):
def __init__(self, img_shape):
super(GeneratedImage, self).__init__()
self.weight = torch.nn.Parameter(torch.rand(*img_shape))
def forward(self):
return self.weight

初始化和训练模型

def get_inits(X, device, lr, styles_Y):
gen_img = GeneratedImage(X.shape).to(device)
gen_img.weight.data = X.data
optimizer = torch.optim.Adam(gen_img.parameters(), lr=lr)
styles_Y_gram = [gram(Y) for Y in styles_Y]
return gen_img(), styles_Y_gram, optimizer

训练过程

def train(X, contents_Y, styles_Y, device, lr, max_epochs, lr_decay_epoch):
print("training on ", device)
X, styles_Y_gram, optimizer = get_inits(X, device, lr, styles_Y)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, lr_decay_epoch, gamma=0.1)
for i in range(max_epochs):
start = time.time()
contents_Y_hat, styles_Y_hat = extract_features(
X, content_layers, style_layers)
contents_l, styles_l, tv_l, l = compute_loss(
X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram)
optimizer.zero_grad()
l.backward(retain_graph=True)
optimizer.step()
scheduler.step()
if i % 50 == 0 and i != 0:
d2l.plt.imshow(postprocess(X.detach()))
plt.show()
print('epoch %3d, content loss %.2f, style loss %.2f, '
'TV loss %.2f, %.2f sec'
% (i, sum(contents_l).item(), sum(styles_l).item(), tv_l.item(),
time.time() - start))
return X.detach()

结果展示

image_shape = (150, 225)
net = net.to(device)
content_X, contents_Y = get_contents(image_shape, device)
style_X, styles_Y = get_styles(image_shape, device)
output = train(content_X, contents_Y, styles_Y, device, 0.01, 200, 200)
d2l.plt.imshow(postprocess(output))
plt.show()
print("*" * 50)
上一篇:《动手学深度学习》(PyTorch版)代码注释 - 52 【World2Vec_Learning】
下一篇:《动手学深度学习》(PyTorch版)代码注释 - 50 【Semantic_segmentation】

发表评论

最新留言

路过,博主的博客真漂亮。。
[***.116.15.85]2025年04月11日 13时04分42秒