如何解决“RuntimeError:张量的扩展大小118必须与非单维1处的现有大小135相匹配”Pytorch机器翻译
使用此 German to English Pytorch Seq2Seq Machine Translatior,我一直在尝试使用自定义数据集创建中古英语到现代英语 Seq2Seq 机器翻译。除了创建自定义数据集之外,我虚拟地复制并粘贴了德语模板并将德语替换为我(中古英语)和英语替换为 pdf(现代英语),但是在尝试训练模型时我收到了两条错误消息.当我运行德语版本的代码时,它训练得很好。以下是我的代码和两条错误信息。
我的代码:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext import data,datasets
from torchtext.data import Field,BucketIterator,TabularDataset
import numpy as np # other useful math-related libraries and modules
import spacy # this is where he gets his nlp datasets
import spacy.cli
import random
from utils2 import translate_sentence,bleu,save_checkpoint,load_checkpoint
spacy_eng = spacy.load('en') # loading up the English tokenizer
def tokenizer_eng(text): # English tokenizer function for the fields
return [tok.text for tok in spacy_eng.tokenizer(text)]
# constructing the fields for pde and me
me = Field(sequential=True,use_vocab=True,tokenize=tokenizer_eng,lower=True,init_token='sos',eos_token='<eos>')
pde = Field(sequential=True,eos_token='<eos>')
fields = {'Middle English': ('me',me),'Present Day English': ('pde',pde)}
train_data,validation_data,test_data = TabularDataset.splits(
path='/Users/user/Desktop/Code/datasets',train='train.tsv',test='test.tsv',validation='validation.tsv',format='tsv',fields=fields
)
#print(test_data[0].__dict__.keys())
#print(validation_data[1].__dict__.values())
# Building the vocabulary
me.build_vocab(train_data)
pde.build_vocab(train_data)
# Building the seq2seq and encoder decoder models
class Encoder(nn.Module): # first LSTM
def __init__(self,input_size,embedding_size,hidden_size,num_layers,p):
super(Encoder,self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = nn.Dropout(p)
self.embedding = nn.Embedding(input_size,embedding_size)
self.rnn = nn.LSTM(embedding_size,dropout=p)
def forward(self,x): # defines the vector of indices
# x vector shape: (seq_length,N)
embedding = self.dropout(self.embedding(x))
# embedding vector shape: (seq_length,N,embedding_size)
outputs,(hidden,cell) = self.rnn(embedding)
return hidden,cell
class Decoder(nn.Module):
def __init__(self,output_size,p):
# output_size should be the same as input_size,because the size of the vocabulary should change.
super(Decoder,self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = nn.Dropout(p)
self.embedding = nn.Embedding(input_size,embedding_size)
self.rnn = nn.LSTM(embedding_size,dropout=p)
self.fc = nn.Linear(hidden_size,output_size) # fc stands for fully connected
def forward(self,x,hidden,cell):
x = x.unsqueeze(0) #this adds another dimension
embedding = self.dropout(self.embedding(x))
# embedding shape: (1,embedding_size)
outputs,cell) = self.rnn(embedding,cell))
# outputs is what we think this next word should be
# shape of the outputs: (1,hidden_size)
predictions = self.fc(outputs)
# shape of predictions: (1,length_of_vocab)
# this will be sent to the loss function
predictions = predictions.squeeze(0)
return predictions,cell
class Seq2Seq(nn.Module): # combines the encoder and decoder
def __init__(self,encoder,decoder):
super(Seq2Seq,self).__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self,source,target,teacher_force_ratio=0.5):
batch_size = source.shape[1]
# batch_size should look like (trg_len,N)
target_len = target.shape[0]
target_vocab_size = len(pde.vocab) #make sure to change this to pde
outputs = torch.zeros(target_len,batch_size,target_vocab_size).to(device)
# predicts one word at a time,but each word predicts an entire batch and every prediction is a vector of the entire vocabulary size
hidden,cell = self.encoder(source)
x = target[0]
for t in range(1,target_len):
output,cell = self.decoder(x,cell)
outputs[t] = output
# the output will look like (N,english_vocab_size)
best_guess = output.argmax(1)
x = target[t] if random.random() < teacher_force_ratio else best_guess
return outputs
# hyperparameters for the training model
num_epochs = 2
learning_rate = 3e-4
batch_size = 32 # batch size must be smaller than total amount of data
# model hyperparameters
save_model = True
load_model = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size_encoder = len(me.vocab)
input_size_decoder = len(pde.vocab)
output_size = len(me.vocab)
encoder_embedding_size = 50
decoder_embedding_size = 50
hidden_size = 100
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5
# writing iterators
train_iterator,valid_iterator,test_iterator = BucketIterator.splits(
(train_data,test_data),# make sure these are in same order as declared variables
batch_size=batch_size,sort_within_batch = True,sort_key = lambda x: len(x.me),# formerly x.src
device=device
)
# running the encoder decoder models
encoder_net = Encoder(input_size_encoder,encoder_embedding_size,enc_dropout).to(device)
decoder_net = Decoder(input_size_decoder,decoder_embedding_size,dec_dropout).to(device)
model = Seq2Seq(encoder_net,decoder_net).to(device)
optimizer = optim.Adam(model.parameters(),lr=learning_rate)
pad_idx = pde.vocab.stoi['<pad>']
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
if load_model:
load_checkpoint(torch.load("my_checkpoint2.pth.tar"),model,optimizer)
# This is an example sentence.
sentence = (
"Is owr dyner dyght?"
)
for epoch in range(num_epochs):
print(f'Epoch [{epoch} / {num_epochs}]')
if save_model:
checkpoint = {
"state_dict": model.state_dict(),"optimizer": optimizer.state_dict(),}
save_checkpoint(checkpoint)
model.eval()
translated_sentence = translate_sentence(
model,sentence,me,pde,device,max_length=50
)
print(f"Translated example sentence: \n {translated_sentence}")
model.train()
for batch_idx,batch in enumerate(train_iterator):
inp_data = batch.me.to(device)
target = batch.pde.to(device)
# this was formerly batch.src and batch.trg
output = model(inp_data,target)
# Output is of shape (trg_len,output_dim) but Cross Entropy Loss
# doesn't take input in that form. Here we can view it in a similar
# way so we have output_words * batch_size that we want to send in into
# our cost function,so we need to do some reshaping."
output = output[1:].reshape(-1,output.shape[2])
target = target[1:].reshape(-1)
optimizer.zero_grad()
loss = criterion(output,target)
loss.backward()
optimizer.step()
print("run complete")
错误一 我认为这个错误主要与输出没有按预期成形有关,但我不知道如何使它成为正确的形状。我认为问题可能在于我如何创建数据集,我如何创建 Seq2Seq2 类,或者程序的最后一个 for 循环。
Traceback (most recent call last):
File "/Users/user/PycharmProjects/pythonProject/ME-PDE.py",line 250,in <module>
output = model(inp_data,target)
File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/module.py",line 727,in _call_impl
result = self.forward(*input,**kwargs)
File "/Users/user/PycharmProjects/pythonProject/ME-PDE.py",line 150,in forward
outputs[t] = output
RuntimeError: The expanded size of the tensor (118) must match the existing size (135) at non-singleton dimension 1. Target sizes: [20,118]. Tensor sizes: [20,135]
错误二 有时会随机出现此错误。我认为这可能是因为我的超参数对于我的样本量来说太大了,但我不确定为什么有时只出现这个错误消息而不是第一个。
Traceback (most recent call last):
File "/Users/user/PycharmProjects/pythonProject/ME-PDE.py",line 235,in <module>
translated_sentence = translate_sentence(
File "/Users/user/PycharmProjects/pythonProject/utils2.py",line 53,in translate_sentence
output,cell = model.decoder(prevIoUs_word,cell)
File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/module.py",line 105,in forward
embedding = self.dropout(self.embedding(x))
File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/module.py",**kwargs)
File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/sparse.py",line 124,in forward
return F.embedding(
File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/functional.py",line 1852,in embedding
return torch.embedding(weight,input,padding_idx,scale_grad_by_freq,sparse)
IndexError: index out of range in self
截至目前,我在 test.tsv 中只有 25 个翻译,train.tsv 中有 20 个,validation.tsv 中有 5 个。我会得到更多,但我希望程序能够首先成功运行。 来自validation.tsv的示例
Middle English Present Day English
Than make I buter ferther on the day Then I make butter later in the day.
Ye wold say,'they be prowde!' You would say,‘they are proud!’
Whyll yow slepe fulle stylle,While you sleep soundly,Kype wylle owr chelderne and let them not wepe. Keep our children well and don’t let them weep.
Yet I have not a feyr word whan that I have done. Yet I don’t get any kind words when I have done that.
非常感谢您提供的任何帮助!
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。