如何解决如何将Conv2d泛化为MaxPool2d到ConvTranspose2d?
我已经概括了PyTorch自动编码器的实现。我使用的一种主要策略是对序列进行重新缩放,该序列指示每层节点。这样,我可以进行各种网络规模的实验。
为了MRE,我将在可行的情况下提供原始的非通用代码,在无效的情况下提供原始代码。我正在搜索Conv2d,MaxPool2d和ConvTranspose2d之间的关系。
# imports
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
# Original autoencoder class.
class autoencoder(nn.Module):
def __init__(self):
super().__init__()
# encoder layers
self.enc1 = nn.Conv2d(1,64,kernel_size=3,padding=1)
self.enc2 = nn.Conv2d(64,32,padding=1)
self.enc3 = nn.Conv2d(32,16,padding=1)
#######################################################
# The relationship here is beyond me.
self.enc4 = nn.Conv2d(16,8,padding=1)
self.pool = nn.MaxPool2d(2,2)
# decoder layers
self.dec1 = nn.ConvTranspose2d(8,stride=2)
#######################################################
self.dec2 = nn.ConvTranspose2d(8,stride=2)
self.dec3 = nn.ConvTranspose2d(16,kernel_size=2,stride=2)
self.dec4 = nn.ConvTranspose2d(32,stride=2)
self.out = nn.Conv2d(64,1,padding=1)
def forward(self,x):
# encode
x = F.relu(self.enc1(x))
x = self.pool(x)
x = F.relu(self.enc2(x))
x = self.pool(x)
x = F.relu(self.enc3(x))
x = self.pool(x)
x = F.relu(self.enc4(x))
x = self.pool(x)
# decode
x = F.relu(self.dec1(x))
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
x = F.relu(self.dec4(x))
x = F.sigmoid(self.out(x))
return x
def train(network,trainloader):
train_loss = list()
for epoch in range(10):
running_loss = 0.0
for data in trainloader:
img,_ = data
img_noisy = img + 0.5 * torch.randn(img.shape)
img_noisy = np.clip(img_noisy,0.,1.)
img_noisy = img_noisy.to(device)
optimizer.zero_grad()
outputs = network(img_noisy)
transform = transforms.Compose([transforms.ToTensor(),transforms.normalize((0.5,),(0.5,))])
trainset = datasets.FashionMNIST(
root='./data',train=True,download=True,transform=transform
)
testset = datasets.FashionMNIST(
root='./data',train=False,transform=transform
)
trainloader = DataLoader(
trainset,batch_size=16,shuffle=True
)
testloader = DataLoader(
testset,shuffle=True
)
net = autoencoder()
device = 'cpu'
net.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(),lr = 1e-3)
train_loss = train(net,trainloader)
# MRE for the autoencoder class I generalize.
# I have only added one more layer,and not scaled
# the node per layer sequence,but expanded to 128.
class autoencoder(nn.Module):
def __init__(self):
super().__init__()
# encoder layers
self.enc0 = nn.Conv2d(1,128,padding=1)
self.enc1 = nn.Conv2d(128,padding=1)
self.enc4 = nn.Conv2d(16,stride=2)
self.dec2 = nn.ConvTranspose2d(8,stride=2)
self.dec5 = nn.ConvTranspose2d(64,stride=2)
self.out = nn.Conv2d(128,x):
# encode
x = F.relu(self.enc0(x))
x = self.pool(x)
x = F.relu(self.enc1(x))
x = self.pool(x)
x = F.relu(self.enc2(x))
x = self.pool(x)
x = F.relu(self.enc3(x))
x = self.pool(x)
x = F.relu(self.enc4(x))
x = self.pool(x)
# decode
x = F.relu(self.dec1(x))
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
x = F.relu(self.dec4(x))
x = F.relu(self.dec5(x))
x = F.sigmoid(self.out(x))
return x
def train(network,trainloader)
# Error
Traceback (most recent call last):
File "C:\Users\User\Desktop\ml_paper\mre.py",line 89,in <module>
train_loss = train(net,trainloader)
File "C:\Users\User\Desktop\ml_paper\mre.py",line 58,in train
outputs = network(img_noisy)
File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\module.py",line 722,in _call_impl
result = self.forward(*input,**kwargs)
File "C:\Users\User\Desktop\ml_paper\mre.py",line 38,in forward
x = self.pool(x)
File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\module.py",**kwargs)
File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\pooling.py",line 159,in forward
self.return_indices)
File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\_jit_internal.py",line 247,in fn
return if_false(*args,**kwargs)
File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\functional.py",line 576,in _max_pool2d
input,kernel_size,stride,padding,dilation,ceil_mode)
RuntimeError: Given input size: (8x1x1). Calculated output size: (8x0x0). Output size is too small
问题可能是我如何推断解码层中的内核大小。
import math
import torch.nn as nn
# infer kernel sizes for decoding layers.
series = [1,128]
kernel_set = 3
tmp = nn.ModuleList()
for layer in range(1,len(series)):
if layer == 1:
tmp.append(nn.ConvTranspose2d(series[layer],series[layer],kernel_set,stride = 2))
else:
if layer > math.floor(len(series)/2):
tmp.append(nn.ConvTranspose2d(series[layer - 1],kernel_set - 1,stride = 2))
else:
tmp.append(nn.ConvTranspose2d(series[layer - 1],stride = 2))
print(tmp)
ModuleList(
(0): ConvTranspose2d(8,kernel_size=(3,3),stride=(2,2))
(1): ConvTranspose2d(8,2))
(2): ConvTranspose2d(16,2))
(3): ConvTranspose2d(32,kernel_size=(2,2),2))
(4): ConvTranspose2d(64,2))
)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。