如何解决Sklearn ValueError:K-ways Spectral partitioning function不支持复杂数据
我正在研究光谱聚类,偶然发现 Satyaki Sikdar 的一篇关于光谱社区检测的论文。 来源:https://www3.nd.edu/~kogge/courses/cse60742-Fall2018/Public/StudentWork/KernelPaperFinal/SCD-Sikdar-final.pdf
在那篇论文中,有一个连通图 G 的 k 路谱分区的 Python 实现。 所以我试试看。
import networkx as nx
import numpy as np
import scipy.sparse.linalg
from sklearn.cluster import KMeans
import sklearn.preprocessing
def graphinit() -> nx.Graph:
G = nx.Graph()
G.add_edges_from([
(1,2),(1,3),4),(2,5),(3,(4,7),9),10),(5,6),8),(6,(7,(8,12),(9,13),(11,14),15),18),(12,16),(13,19),(14,17),(15,(16,(17,(18,20)
])
return G
# This function is copy-paste from the paper.
def k_way_spectral(G,k):
assert nx.is_connected(G),"the graph must be connnected"
clusters = []
if G.order() < k:
clusters = list(G.nodes())
else:
L = nx.laplacian_matrix(G)
# compute the first k + 1 eigenvectors
_,eigenvecs = scipy.sparse.linalg.eigs(L.asfptype(),k=k+1,which='SM')
# discard the first trivial eigenvector
eigenvecs = eigenvecs[:,1:]
# normalize each row by its L2 norm
eigenvecs = sklearn.preprocessing.normalize(eigenvecs)
# run K-means
kmeans = KMeans(n_clusters=k).fit(eigenvecs)
cluster_labels = kmeans.labels_
clusters = [[] for _ in range(max(cluster_labels) + 1)]
for node_id,cluster_id in zip(G.nodes(),cluster_labels):
clusters[cluster_id].append(node_id)
return clusters
k_way_spectral(graphinit(),2)
我遇到了这个奇怪的错误。
---------------------------------------------------------------------------
ComplexWarning Traceback (most recent call last)
c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array,accept_sparse,accept_large_sparse,dtype,order,copy,force_all_finite,ensure_2d,allow_nd,ensure_min_samples,ensure_min_features,estimator)
597 else:
--> 598 array = np.asarray(array,order=order,dtype=dtype)
599 except ComplexWarning:
c:\users\taextream\anaconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a,order)
82 "\""
---> 83 return array(a,copy=False,order=order)
84
ComplexWarning: Casting complex values to real discards the imaginary part
During handling of the above exception,another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-4-00f95d3b8e31> in <module>
----> 1 k_way_spectral(graphinit(),2)
<ipython-input-3-b9dd84b7165d> in k_way_spectral(G,k)
12 eigenvecs = eigenvecs[:,1:]
13 # normalize each row by its L2 norm
---> 14 eigenvecs = sklearn.preprocessing.normalize(eigenvecs)
15 # run K-means
16 kmeans = KMeans(n_clusters=k).fit(eigenvecs)
c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args,**kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k,arg in zip(sig.parameters,args)})
---> 72 return f(**kwargs)
73 return inner_f
74
c:\users\taextream\anaconda3\lib\site-packages\sklearn\preprocessing\_data.py in normalize(X,norm,axis,return_norm)
1709
1710 X = check_array(X,accept_sparse=sparse_format,copy=copy,-> 1711 estimator='the normalize function',dtype=FLOAT_DTYPES)
1712 if axis == 0:
1713 X = X.T
c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args,args)})
---> 72 return f(**kwargs)
73 return inner_f
74
c:\users\taextream\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array,estimator)
599 except ComplexWarning:
600 raise ValueError("Complex data not supported\n"
--> 601 "{}\n".format(array))
602
603 # It is possible that the np.array(..) gave no warning. This happens
ValueError: Complex data not supported
[[-0.30232058+0.j -0.17231718+0.j]
[-0.26779348+0.j -0.0786565 +0.j]
[-0.31487312+0.j -0.21823149+0.j]
[-0.24889819+0.j -0.05462207+0.j]
[-0.18537559+0.j 0.06648773+0.j]
[-0.22911675+0.j 0.05705642+0.j]
[-0.12660584+0.j 0.06675646+0.j]
[-0.19060563+0.j 0.07010246+0.j]
[-0.19070607+0.j 0.1009091 +0.j]
[-0.11016539+0.j 0.1131071 +0.j]
[ 0.15350034+0.j 0.10633514+0.j]
[ 0.15003078+0.j 0.12096495+0.j]
[ 0.20614164+0.j 0.11914376+0.j]
[ 0.24117332+0.j 0.17252741+0.j]
[ 0.20554584+0.j 0.1161731 +0.j]
[ 0.22904745+0.j -0.03467156+0.j]
[ 0.20211962+0.j 0.1012013 +0.j]
[ 0.22206629+0.j 0.10004164+0.j]
[ 0.25168529+0.j 0.11662205+0.j]
[ 0.30515005+0.j -0.86892982+0.j]]
解决方法
我现在发现我需要使我的特征向量具有真正的 float32 类型。
所以我将特征向量线从 eigenvecs = eigenvecs[:,1:]
更改为 eigenvecs = eigenvecs[:,1:].real.astype(np.float32)
。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。