如何将Conv2d泛化为MaxPool2d到ConvTranspose2d?

如何解决如何将Conv2d泛化为MaxPool2d到ConvTranspose2d?

我已经概括了PyTorch自动编码器的实现。我使用的一种主要策略是对序列进行重新缩放,该序列指示每层节点。这样,我可以进行各种网络规模的实验。

为了MRE,我将在可行的情况下提供原始的非通用代码,在无效的情况下提供原始代码。我正在搜索Conv2d,MaxPool2d和ConvTranspose2d之间的关系。

# imports
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets    
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
# Original autoencoder class.
class autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        # encoder layers
        self.enc1 = nn.Conv2d(1,64,kernel_size=3,padding=1)
        self.enc2 = nn.Conv2d(64,32,padding=1)
        self.enc3 = nn.Conv2d(32,16,padding=1)
        #######################################################
        # The relationship here is beyond me.
        self.enc4 = nn.Conv2d(16,8,padding=1)
        self.pool = nn.MaxPool2d(2,2)
        # decoder layers
        self.dec1 = nn.ConvTranspose2d(8,stride=2)  
        #######################################################
        self.dec2 = nn.ConvTranspose2d(8,stride=2)
        self.dec3 = nn.ConvTranspose2d(16,kernel_size=2,stride=2)
        self.dec4 = nn.ConvTranspose2d(32,stride=2)
        self.out = nn.Conv2d(64,1,padding=1)
        
    def forward(self,x):
        # encode
        x = F.relu(self.enc1(x))
        x = self.pool(x)
        x = F.relu(self.enc2(x))
        x = self.pool(x)
        x = F.relu(self.enc3(x))
        x = self.pool(x)
        x = F.relu(self.enc4(x))
        x = self.pool(x)
        # decode
        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
        x = F.relu(self.dec4(x))
        x = F.sigmoid(self.out(x))
        return x
    
def train(network,trainloader):
    train_loss = list()
    for epoch in range(10):
        running_loss = 0.0
        for data in trainloader:
            img,_ = data
            img_noisy = img + 0.5 * torch.randn(img.shape)
            img_noisy = np.clip(img_noisy,0.,1.)
            img_noisy = img_noisy.to(device)
            optimizer.zero_grad()
            outputs = network(img_noisy)

transform = transforms.Compose([transforms.ToTensor(),transforms.normalize((0.5,),(0.5,))])
trainset = datasets.FashionMNIST(
    root='./data',train=True,download=True,transform=transform
)
testset = datasets.FashionMNIST(
    root='./data',train=False,transform=transform
)
trainloader = DataLoader(
    trainset,batch_size=16,shuffle=True
)
testloader = DataLoader(
    testset,shuffle=True
)

net = autoencoder()
device = 'cpu'
net.to(device)  
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(),lr = 1e-3)
train_loss = train(net,trainloader)
# MRE for the autoencoder class I generalize.
# I have only added one more layer,and not scaled
# the node per layer sequence,but expanded to 128.
class autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        # encoder layers
        self.enc0 = nn.Conv2d(1,128,padding=1)
        self.enc1 = nn.Conv2d(128,padding=1)
        self.enc4 = nn.Conv2d(16,stride=2)  
        self.dec2 = nn.ConvTranspose2d(8,stride=2)
        self.dec5 = nn.ConvTranspose2d(64,stride=2)
        self.out = nn.Conv2d(128,x):
        # encode
        x = F.relu(self.enc0(x))
        x = self.pool(x)
        x = F.relu(self.enc1(x))
        x = self.pool(x)
        x = F.relu(self.enc2(x))
        x = self.pool(x)
        x = F.relu(self.enc3(x))
        x = self.pool(x)
        x = F.relu(self.enc4(x))
        x = self.pool(x)
        # decode
        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
        x = F.relu(self.dec4(x))
        x = F.relu(self.dec5(x))
        x = F.sigmoid(self.out(x))
        return x
    
def train(network,trainloader)
# Error
Traceback (most recent call last):
  File "C:\Users\User\Desktop\ml_paper\mre.py",line 89,in <module>
    train_loss = train(net,trainloader)
  File "C:\Users\User\Desktop\ml_paper\mre.py",line 58,in train
    outputs = network(img_noisy)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\module.py",line 722,in _call_impl
    result = self.forward(*input,**kwargs)
  File "C:\Users\User\Desktop\ml_paper\mre.py",line 38,in forward
    x = self.pool(x)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\module.py",**kwargs)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\pooling.py",line 159,in forward
    self.return_indices)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\_jit_internal.py",line 247,in fn
    return if_false(*args,**kwargs)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\functional.py",line 576,in _max_pool2d
    input,kernel_size,stride,padding,dilation,ceil_mode)
RuntimeError: Given input size: (8x1x1). Calculated output size: (8x0x0). Output size is too small

问题可能是我如何推断解码层中的内核大小。

import math
import torch.nn as nn
# infer kernel sizes for decoding layers.
series = [1,128]
kernel_set = 3
tmp = nn.ModuleList()
for layer in range(1,len(series)):
    if layer == 1:
        tmp.append(nn.ConvTranspose2d(series[layer],series[layer],kernel_set,stride = 2))
    else:
        if layer > math.floor(len(series)/2):
            tmp.append(nn.ConvTranspose2d(series[layer - 1],kernel_set - 1,stride = 2))
        else:
            tmp.append(nn.ConvTranspose2d(series[layer - 1],stride = 2))
print(tmp)
ModuleList(
  (0): ConvTranspose2d(8,kernel_size=(3,3),stride=(2,2))
  (1): ConvTranspose2d(8,2))
  (2): ConvTranspose2d(16,2))
  (3): ConvTranspose2d(32,kernel_size=(2,2),2))
  (4): ConvTranspose2d(64,2))
)

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐


Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其他元素将获得点击?
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。)
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbcDriver发生异常。为什么?
这是用Java进行XML解析的最佳库。
Java的PriorityQueue的内置迭代器不会以任何特定顺序遍历数据结构。为什么?
如何在Java中聆听按键时移动图像。
Java“Program to an interface”。这是什么意思?
Java在半透明框架/面板/组件上重新绘画。
Java“ Class.forName()”和“ Class.forName()。newInstance()”之间有什么区别?
在此环境中不提供编译器。也许是在JRE而不是JDK上运行?
Java用相同的方法在一个类中实现两个接口。哪种接口方法被覆盖?
Java 什么是Runtime.getRuntime()。totalMemory()和freeMemory()?
java.library.path中的java.lang.UnsatisfiedLinkError否*****。dll
JavaFX“位置是必需的。” 即使在同一包装中
Java 导入两个具有相同名称的类。怎么处理?
Java 是否应该在HttpServletResponse.getOutputStream()/。getWriter()上调用.close()?
Java RegEx元字符(。)和普通点?