如何解决AttributeError: '' 对象没有属性 '' - 无法识别类中声明的变量
对于给定的标记化文本语料库,我想使用多种权重技术进行词权重。为此,我创建了以下类:
if(old.lower() in name.lower() and 'subfolder2' not in path.lower()):
<rest of the code>
现在,当我跑步时:
class Weighing:
def __init__(self,input_file,word_weighing):
self.input_file_ = input_file #List in which each element is a list of tokens
self.word_weighing_ = word_weighing
self.num_documents = len(self.input_file_)
#Set with all unique words from the corpus
self.vocabulary = set()
for text in self.input_file_:
self.vocabulary.update(text)
self.vocabulary_size = len(self.vocabulary)
#Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
self.word_to_index = dict()
self.index_to_word = dict()
for i,word in enumerate(self.vocabulary):
self.word_to_index[word] = i
self.index_to_word[i] = word
#Create sparse Document-Term Matrix (DTM)
self.sparse_dtm = dok_matrix((self.num_documents,self.vocabulary_size),dtype=np.float32)
for doc_index,document in enumerate(self.input_file_):
document_counter = Counter(document)
for word in set(document):
self.sparse_dtm[doc_index,self.word_to_index[word]] = document_counter[word] # Update element
#Get word count for all documents to calculate sparse_p_ij
self.sum_words = Counter()
for doc in self.input_file_:
self.sum_words.update(Counter(doc))
#Create probability of word i in document j. Format: sparse matrix
def create_sparse_p_ij (self):
sparse_p_ij = dok_matrix((self.num_documents,dtype=np.float32)
for j in range(self.num_documents):
row_counts = self.sparse_dtm.getrow(j).toarray()[0]
word_index = row_counts.nonzero()[0]
non_zero_row_counts = row_counts[row_counts != 0]
for i,count in enumerate(non_zero_row_counts):
word = self.index_to_word[word_index[i]]
prob_ij = count/self.sum_words[word]
sparse_p_ij[j,word_index[i]] = prob_ij
return sparse_p_ij
#Create a binary sparse dtm. Format: sparse matrix
def create_sparse_binary_dtm(self):
binary_sparse_dtm = dok_matrix((self.num_documents,dtype=np.float32)
for doc_index,document in enumerate(self.input_file_):
document_counter = dict.fromkeys(document,1)
for word in set(document):
binary_sparse_dtm[doc_index,self.word_to_index[word]] = document_counter[word] # Update element
return binary_sparse_dtm
#2) Calculate Global Term weighting (4 methods: entropy,IDF,Probabilistic IDF,normal)
def calc_entropy(self):
sparse_p_ij = self.create_sparse_p_ij()
summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])
def calc_idf(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
def calc_normal(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
def calc_probidf (self):
binary_sparse_dtm = self.create_sparse_binary_dtm()
summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
if self.word_weighing_ == 1:
gtw = self.calc_entropy()
elif self.word_weighing_ == 2:
gtw = self.calc_idf()
elif self.word_weighing_ == 3:
gtw = self.calc_normal()
elif self.word_weighing_ == 4:
gtw = self.calc_probidf()
With model = Weighing(input_file = data_list,word_weighing = 1)
是带有标记词的列表。
我收到以下错误:
data_list
我查看了其他一些类似的 SO 链接[1、2、3、4],但这些链接在这里似乎都不适用。
我该怎么做才能克服这个错误?
编辑:
我已将代码更新为:
Traceback (most recent call last):
File "<ipython-input-621-b0a9caec82d4>",line 4,in <module>
word_weighing = 1)
File "<ipython-input-617-6f3fdcecd170>",line 90,in __init__
gtw = self.calc_entropy()
AttributeError: 'Weighing' object has no attribute 'calc_entropy'
但是,我仍然收到错误:
class Weighing:
def __init__(self,word_weighing):
self.input_file_ = input_file #List in which each element is a list of tokens
self.word_weighing_ = word_weighing
self.num_documents = len(self.input_file_)
#Set with all unique words from the corpus
self.vocabulary = set()
for text in self.input_file_:
self.vocabulary.update(text)
self.vocabulary_size = len(self.vocabulary)
#Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
self.word_to_index = dict()
self.index_to_word = dict()
for i,word in enumerate(self.vocabulary):
self.word_to_index[word] = i
self.index_to_word[i] = word
#Create sparse Document-Term Matrix (DTM)
self.sparse_dtm = dok_matrix((self.num_documents,self.word_to_index[word]] = document_counter[word] # Update element
if self.word_weighing_ == 1:
self.gtw = self.calc_entropy()
elif self.word_weighing_ == 2:
self.gtw = self.calc_idf()
elif self.word_weighing_ == 3:
self.gtw = self.calc_normal()
elif self.word_weighing_ == 4:
self.gtw = self.calc_probidf()
#Get word count for all documents to calculate sparse_p_ij
self.sum_words = Counter()
for doc in self.input_file_:
self.sum_words.update(Counter(doc))
#Create probability of word i in document j. Format: sparse matrix
def create_sparse_p_ij (self):
sparse_p_ij = dok_matrix((self.num_documents,dtype=np.float32)
for j in range(self.num_documents):
row_counts = self.sparse_dtm.getrow(j).toarray()[0]
word_index = row_counts.nonzero()[0]
non_zero_row_counts = row_counts[row_counts != 0]
for i,count in enumerate(non_zero_row_counts):
word = self.index_to_word[word_index[i]]
prob_ij = count/self.sum_words[word]
sparse_p_ij[j,word_index[i]] = prob_ij
return sparse_p_ij
#Create a binary sparse dtm. Format: sparse matrix
def create_sparse_binary_dtm(self):
binary_sparse_dtm = dok_matrix((self.num_documents,document in enumerate(self.input_file_):
document_counter = dict.fromkeys(document,1)
for word in set(document):
binary_sparse_dtm[doc_index,self.word_to_index[word]] = document_counter[word] # Update element
return binary_sparse_dtm
#2) Calculate Global Term weighting (4 methods: entropy,normal)
def calc_entropy(self):
sparse_p_ij = self.create_sparse_p_ij()
summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])
def calc_idf(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
def calc_normal(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
def calc_probidf (self):
binary_sparse_dtm = self.create_sparse_binary_dtm()
summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
现在,我在初始化之前调用了一个函数。如何更改我的代码,以便在初始化 AttributeError: 'Weighing' object has no attribute 'calc_entropy'
之前初始化 def calc_entropy
?
解决方法
这似乎是一个缩进问题:您在 calc_entropy()
函数中而不是在类中定义了 __init__()
等方法函数。
应该是:
class Weighing:
def __init__(self):
# your init
def calc_entropy(self):
# your method
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。