目錄
前言
專案結構
核心代碼
總結
前言
接著我上一篇開源機器學習的使用:如何將照片變成卡通圖,animegan2-pytorch機器學習專案使用 | 機器學習_阿良的博客-CSDN博客
我還是繼續把專案稍微魔改一下,依然變為一個python檔案就可以執行單一圖片的處理,變為可以直接拿去使用的工具,
專案github地址:github地址
專案結構
samples目錄里面有一些樣例圖片,可以測驗用,weights目錄放了原專案的4個模型,python環境需要安裝一些依賴,主要是pytorch,pytorch的環境安裝可以參考我的另一篇文章:機器學習基礎環境部署 | 機器學習系列_阿良的博客-CSDN博客


核心代碼
不廢話,上核心代碼了,
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2021/12/4 22:34
# @Author : 劍客阿良_ALiang
# @Site :
# @File : image_cartoon_tool.py
from PIL import Image
import torch
from torchvision.transforms.functional import to_tensor, to_pil_image
from torch import nn
import os
import torch.nn.functional as F
import uuid
# -------------------------- hy add 01 --------------------------
class ConvNormLReLU(nn.Sequential):
def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, pad_mode="reflect", groups=1, bias=False):
pad_layer = {
"zero": nn.ZeroPad2d,
"same": nn.ReplicationPad2d,
"reflect": nn.ReflectionPad2d,
}
if pad_mode not in pad_layer:
raise NotImplementedError
super(ConvNormLReLU, self).__init__(
pad_layer[pad_mode](padding),
nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=bias),
nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True),
nn.LeakyReLU(0.2, inplace=True)
)
class InvertedResBlock(nn.Module):
def __init__(self, in_ch, out_ch, expansion_ratio=2):
super(InvertedResBlock, self).__init__()
self.use_res_connect = in_ch == out_ch
bottleneck = int(round(in_ch * expansion_ratio))
layers = []
if expansion_ratio != 1:
layers.append(ConvNormLReLU(in_ch, bottleneck, kernel_size=1, padding=0))
# dw
layers.append(ConvNormLReLU(bottleneck, bottleneck, groups=bottleneck, bias=True))
# pw
layers.append(nn.Conv2d(bottleneck, out_ch, kernel_size=1, padding=0, bias=False))
layers.append(nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True))
self.layers = nn.Sequential(*layers)
def forward(self, input):
out = self.layers(input)
if self.use_res_connect:
out = input + out
return out
class Generator(nn.Module):
def __init__(self, ):
super().__init__()
self.block_a = nn.Sequential(
ConvNormLReLU(3, 32, kernel_size=7, padding=3),
ConvNormLReLU(32, 64, stride=2, padding=(0, 1, 0, 1)),
ConvNormLReLU(64, 64)
)
self.block_b = nn.Sequential(
ConvNormLReLU(64, 128, stride=2, padding=(0, 1, 0, 1)),
ConvNormLReLU(128, 128)
)
self.block_c = nn.Sequential(
ConvNormLReLU(128, 128),
InvertedResBlock(128, 256, 2),
InvertedResBlock(256, 256, 2),
InvertedResBlock(256, 256, 2),
InvertedResBlock(256, 256, 2),
ConvNormLReLU(256, 128),
)
self.block_d = nn.Sequential(
ConvNormLReLU(128, 128),
ConvNormLReLU(128, 128)
)
self.block_e = nn.Sequential(
ConvNormLReLU(128, 64),
ConvNormLReLU(64, 64),
ConvNormLReLU(64, 32, kernel_size=7, padding=3)
)
self.out_layer = nn.Sequential(
nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False),
nn.Tanh()
)
def forward(self, input, align_corners=True):
out = self.block_a(input)
half_size = out.size()[-2:]
out = self.block_b(out)
out = self.block_c(out)
if align_corners:
out = F.interpolate(out, half_size, mode="bilinear", align_corners=True)
else:
out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=False)
out = self.block_d(out)
if align_corners:
out = F.interpolate(out, input.size()[-2:], mode="bilinear", align_corners=True)
else:
out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=False)
out = self.block_e(out)
out = self.out_layer(out)
return out
# -------------------------- hy add 02 --------------------------
def load_image(image_path, x32=False):
img = Image.open(image_path).convert("RGB")
if x32:
def to_32s(x):
return 256 if x < 256 else x - x % 32
w, h = img.size
img = img.resize((to_32s(w), to_32s(h)))
return img
def handle(image_path: str, output_dir: str, type: int, device='cpu'):
_ext = os.path.basename(image_path).strip().split('.')[-1]
if type == 1:
_checkpoint = './weights/paprika.pt'
elif type == 2:
_checkpoint = './weights/face_paint_512_v2.pt'
else:
raise Exception('type not support')
os.makedirs(output_dir, exist_ok=True)
net = Generator()
net.load_state_dict(torch.load(_checkpoint, map_location="cpu"))
net.to(device).eval()
image = load_image(image_path)
with torch.no_grad():
image = to_tensor(image).unsqueeze(0) * 2 - 1
out = net(image.to(device), False).cpu()
out = out.squeeze(0).clip(-1, 1) * 0.5 + 0.5
out = to_pil_image(out)
result = os.path.join(output_dir, '{}.{}'.format(uuid.uuid1().hex, _ext))
out.save(result)
return result
if __name__ == '__main__':
print(handle('samples/images/fengjing.jpg', 'samples/images_result/', 1))
print(handle('samples/images/renxiang.jpg', 'samples/images_result/', 2))
代碼說明
1、handle方法可以將一張圖片變為卡通化圖片,入參為:圖片路徑、輸出目錄、型別(1為景色型別圖片、2為人物人像圖片)、設備型別(默認cpu,可以選擇cuda)
2、按照我上一篇文章的測驗,適合風景的模型和適合人像的模型不太一樣,所以做了區分,
3、輸出結果圖片名字為了不重復,使用uuid,
驗證一下
先發一下準備的圖片


執行結果

效果如下


OK,沒什么問題,
總結
整體效果還不錯,最近在想要不要把操作程序錄制成視頻,可能會讓人更好理解,只是不知道有沒有必要,也征求一下意見,可以私信或者評論告訴我,
這個專案我還會改改,讓輸入變為視頻不是更香嗎?
分享:
我想成為一個溫柔的人,因為曾被溫柔的人那樣對待,深深了解那種被溫柔相待的感覺,
· ——《夏目友人帳》
如果本文對你有幫助的話,給個贊吧,謝謝!

轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/374554.html
標籤:AI
上一篇:R語言ggplot2可視化自動換行適配長文本圖例(legend)實戰:Multiple Lines for Text per Legend Label
下一篇:【歷史上的今天】12 月 5 日:分布式系統的“三駕馬車”;世界上第一篇計算機科學博士論文;IBM 推出“深藍”計算機
