
运行 pyspark mllib 示例的 IllegalArgumentException 错误

如何解决运行 pyspark mllib 示例的 IllegalArgumentException 错误

我正在关注 Spark MLexample here

from pyspark.mllib.linalg import Vectors
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.param import Param,Params

# Prepare training data from a list of (label,features) tuples.
training = sqlContext.createDataFrame([

# Create a LogisticRegression instance. This instance is an Estimator.
lr = LogisticRegression(maxIter=10,regParam=0.01)
# Print out the parameters,documentation,and any default values.
print "LogisticRegression parameters:\n" + lr.explainParams() + "\n"

# Learn a LogisticRegression model. This uses the parameters stored in lr.
model1 = lr.fit(training)

但是,model1 = lr.fit(training) 给出了以下错误消息。

IllegalArgumentException                  Traceback (most recent call last)
<ipython-input-14-3e398ce8c8bd> in <module>
      1 # Learn a LogisticRegression model. This uses the parameters stored in lr.
----> 2 model1 = lr.fit(training)

C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\base.py in fit(self,dataset,params)
    127                 return self.copy(params)._fit(dataset)
    128             else:
--> 129                 return self._fit(dataset)
    130         else:
    131             raise ValueError("Params must be either a param map or a list/tuple of param maps,"

C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit(self,dataset)
    320     def _fit(self,dataset):
--> 321         java_model = self._fit_java(dataset)
    322         model = self._create_model(java_model)
    323         return self._copyValues(model)

C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit_java(self,dataset)
    316         """
    317         self._transfer_params_to_java()
--> 318         return self._java_obj.fit(dataset._jdf)
    320     def _fit(self,dataset):

C:\spark\spark-3.0.2-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py in __call__(self,*args)
   1303         answer = self.gateway_client.send_command(command)
   1304         return_value = get_return_value(
-> 1305             answer,self.gateway_client,self.target_id,self.name)
   1307         for temp_arg in temp_args:

C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\sql\utils.py in deco(*a,**kw)
    132                 # Hide where the exception came from that shows a non-Pythonic
    133                 # JVM exception message.
--> 134                 raise_from(converted)
    135             else:
    136                 raise

C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\sql\utils.py in raise_from(e)

IllegalArgumentException: requirement Failed: Column features must be of type struct<type:tinyint,size:int,indices:array<int>,values:array<double>> but was actually struct<type:tinyint,values:array<double>>.

