如何解决运行 pyspark mllib 示例的 IllegalArgumentException 错误
我正在关注 Spark MLexample here,
from pyspark.mllib.linalg import Vectors
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.param import Param,Params
# Prepare training data from a list of (label,features) tuples.
training = sqlContext.createDataFrame([
(1.0,Vectors.dense([0.0,1.1,0.1])),(0.0,Vectors.dense([2.0,1.0,-1.0])),1.3,1.0])),(1.0,1.2,-0.5]))],["label","features"])
# Create a LogisticRegression instance. This instance is an Estimator.
lr = LogisticRegression(maxIter=10,regParam=0.01)
# Print out the parameters,documentation,and any default values.
print "LogisticRegression parameters:\n" + lr.explainParams() + "\n"
# Learn a LogisticRegression model. This uses the parameters stored in lr.
model1 = lr.fit(training)
但是,model1 = lr.fit(training)
给出了以下错误消息。
---------------------------------------------------------------------------
IllegalArgumentException Traceback (most recent call last)
<ipython-input-14-3e398ce8c8bd> in <module>
1 # Learn a LogisticRegression model. This uses the parameters stored in lr.
----> 2 model1 = lr.fit(training)
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\base.py in fit(self,dataset,params)
127 return self.copy(params)._fit(dataset)
128 else:
--> 129 return self._fit(dataset)
130 else:
131 raise ValueError("Params must be either a param map or a list/tuple of param maps,"
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit(self,dataset)
319
320 def _fit(self,dataset):
--> 321 java_model = self._fit_java(dataset)
322 model = self._create_model(java_model)
323 return self._copyValues(model)
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit_java(self,dataset)
316 """
317 self._transfer_params_to_java()
--> 318 return self._java_obj.fit(dataset._jdf)
319
320 def _fit(self,dataset):
C:\spark\spark-3.0.2-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py in __call__(self,*args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer,self.gateway_client,self.target_id,self.name)
1306
1307 for temp_arg in temp_args:
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\sql\utils.py in deco(*a,**kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\sql\utils.py in raise_from(e)
IllegalArgumentException: requirement Failed: Column features must be of type struct<type:tinyint,size:int,indices:array<int>,values:array<double>> but was actually struct<type:tinyint,values:array<double>>.
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。