CharEmbeddings和CNNCharEmbeddings / Encoding有什么区别？

如何解决CharEmbeddings和CNNCharEmbeddings / Encoding有什么区别？

在研究ML模型时，我发现了两个看似不同的模块来执行CharEmbedding和CharEncoding，但我不清楚为什么两者都需要以及它们有什么区别。

CharEmbedding如下，并且通过LSTM完成，就像我一直相信的那样：

class CharEmbeddings(nn.Module):  
    def __init__(self,char_vocab,embedding_dim,hidden_size,which_cuda=0):
        super().__init__()

        self.embedding_dim = embedding_dim 
        self.vocab = char_vocab
        self.hidden_size = hidden_size 

        self.embeddings = nn.Embedding(
            num_embeddings = len(self.vocab),embedding_dim = embedding_dim,padding_idx = self.vocab.pad
        )

        self.bilstm = nn.LSTM(
            input_size = embedding_dim,hidden_size = self.hidden_size,num_layers = 1,bidirectional = True
        )

    def forward(self,sentence_batch):
        # char2index + padding
        words,lengths,unsort_idx = self.prepare(sentence_batch)
        # words -> (n_pad_words,max_word_length)
        # sort + remove 0 lenth words (pads)
        non_zero_words,non_zero_lengths = self.remove_pad_words(words,lengths)
        # non_zero_words -> (n_nonpad_words,max_word_length)

        embeddings = self.embeddings(non_zero_words).to(self.device)
        # embeddings -> (n_nonpad_words,max_word_length,embeddings_dim)
        # pack
        x = torch.nn.utils.rnn.pack_padded_sequence(embeddings,non_zero_lengths,batch_first=True)
        # pass through lstm
        output,hidden = self.bilstm(x)
        x,_ = torch.nn.utils.rnn.pad_packed_sequence(output,batch_first=True)
        # embeddings -> (n_nonpad_words,hidden_size*2)

        # take the output of the lstm correctly
        # filter idx is the id of the last character in each word
        # this aims to find the output of the lshtm on the last non pad char
        # ex: ['h','e','l','o','<pad>']
        # instead of getting the output of '<pad>' we want the output of 'o'
        filter_idx = non_zero_lengths.long().view(-1,1,1).expand(-1,self.hidden_size*2) -1
        # filter_idx -> (n_nonpad_words),hidden_size*2)
        forward_out = x.gather(1,filter_idx).squeeze(1)[:,:self.hidden_size]
        # filter_idx -> (n_nonpad_words,hidden_size)

        # get the output of the first character
        backward_out = x[:,self.hidden_size:]
        # concat first char's output last hidden state part with the last char's output first hidden state part
        x = torch.cat([forward_out,backward_out],1)
        # x -> (n_nonpad_words,hidden_size*2)
        #
        x = torch.cat([x,torch.zeros(len(words)-len(non_zero_words),self.hidden_size*2).to(self.device)],0)
        # x -> (n_pad_words,hidden_size*2)
        # unsort
        x = x[unsort_idx]
        # reshape to sentence size
        x = x.view(len(sentence_batch),-1,self.hidden_size*2)
        # x -> (batch_size,max_sentence_size,hidden_size*2)
        return x
    
    
    def prepare(self,sentence_batch):
    # receibes a batch of sentences and return a batch of words and their lengths sorted in decreasing order
        return padded_words,sort_word_len,unsort_idx
    
    
    def remove_pad_words(self,padded_words,word_lengths):
    # this function remove pad words from a batch of words
    # note that the index returned is the size of the batch before filtering pads
    # so,before unsorting,padded values must be re-added to the tensor
        return padded_words,sort_word_len

忽略了代码的细节，对我来说，这就像是普通的CharEmbedding。反之亦然，我不明白为什么您必须使用CNN来进行CharEncoding，如下所示：

class CNNCharEmbeddings(CharEmbeddings):
    def __init__(self,cnn_embeddings_size,cnn_ce_kernel_size,cnn_ce_out_channels,which_cuda=0):
        torch.backends.cudnn.deterministic = True
        CharEmbeddings.__init__(self,which_cuda=which_cuda)

        self.embedding_dim = cnn_embeddings_size
        self.vocab = char_vocab

        self.cnn_ce_kernel_size = cnn_ce_kernel_size
        self.cnn_ce_out_channels = cnn_ce_out_channels

        self.cnn = nn.Conv1d(
            in_channels=self.embedding_dim,out_channels=self.cnn_ce_out_channels,kernel_size=cnn_ce_kernel_size,stride=1,padding=int((cnn_ce_kernel_size-1)/2),)

    def forward(self,sentence_batch):

        words,unsort_idx = self.prepare(sentence_batch)
        non_zero_words,lengths)
        embeddings = self.embeddings(non_zero_words).to(self.device)

        x = self.cnn(embeddings.transpose(1,2))
        mask = (torch.arange(x.shape[2]).expand(x.shape).to(self.device) < non_zero_lengths.unsqueeze(1).unsqueeze(1).to(self.device)).float()
        x_min = (torch.arange(x.shape[2]).expand(x.shape).to(self.device) >= non_zero_lengths.unsqueeze(1).unsqueeze(1).to(self.device)).float() * torch.min(x)
        x_min = x_min.detach().float()
        x = x * mask + x_min
        x = F.relu(x)
        x = nn.MaxPool1d(
            kernel_size=x.shape[2]
        )(x)

        x=x.view([x.shape[0],x.shape[1]])
        x = torch.cat([x,self.cnn_ce_out_channels).to(self.device)],0)

        x = x[unsort_idx]
        x = x.view(len(sentence_batch),self.cnn_ce_out_channels)

        return x

通常，使用这两个模块对模型的改进要比仅使用其中一个对模型的改进大。但是，如果我已经拥有CharEncoding，那么我一般就无法理解CNN和CharEmbedding的用处。与encoder-decoder有关系吗？

谢谢！