如何解决使用 MLPRegressor 与 KerasRegressor 获得的结果明显不同
当使用相同的回归数据集时,我看到 MLPRegressor 与 KerasRegressor 之间获得的结果截然不同。当然,这可能是我对库的理解,因此我将不胜感激任何可以提供的见解。
以下导入对于这两个示例就足够了:
from sklearn.datasets import make_classification,make_regression
from sklearn.model_selection import train_test_split
from sklearn.utils.multiclass import type_of_target
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasClassifier,KerasRegressor
数据集创建:
X,y = make_regression(n_samples=590,n_features=180,n_targets=1,n_informative=99,random_state=1)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.10,random_state = 99)
# Check target variable(s) type.
print(f'Training target: {type_of_target(y_train)}')
print(f'Test / validation target: {type_of_target(y_test)}')
MLPRegressor:
def create_mlpregressor_nn_model(features=180,classes = 1,activation = 'relu',solver = 'adam'):
# Set number of neurons in hidden layer to (number of attributes + number of classes) / 2 + 1.
num_hl_neurons = int(((features + classes) / 2) + 1)
hl_neurons = (num_hl_neurons,)
# Create NN model,setting dimensions according to the number of features + classes.
model = MLPRegressor(activation=activation,solver=solver,hidden_layer_sizes=hl_neurons)
return model
model = create_mlpregressor_nn_model(features=180,classes = 1)
# For MLP only
batch_size = [int(len(X_train)*.09),int(len(X_train)*.2),int(len(X_train)*.45),int(len(X_train)*.9)]
solver = ['sgd','adam','lbfgs']
activation = ['identity','logistic','relu','tanh']
# For regression.
kfold = KFold(n_splits = 10)
scale = StandardScaler()
clf = Pipeline([('scaler',scale),('model',model)])
param_grid = dict(model__solver=solver,model__activation=activation,model__batch_size=batch_size)
grid = RandomizedSearchCV(estimator=clf,param_distributions=param_grid,n_jobs=-1,cv=kfold,n_iter = 15,refit = True)
grid_result = grid.fit(X_train,y_train)
print("Best accuracy of : %f with parameters: %s" % (grid_result.best_score_,grid_result.best_params_))
acc_means = grid_result.cv_results_['mean_test_score']
acc_stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean,stdev,param in zip(acc_means,acc_stds,params):
print("%f (%f) with: %r" % (mean,param))
运行 Jupyter notebook 时,我得到:
Best accuracy of : 1.000000 with parameters: {'model__solver': 'lbfgs','model__batch_size': 238,'model__activation': 'identity'}
nan (nan) with: {'model__solver': 'sgd','model__batch_size': 47,'model__activation': 'relu'}
nan (nan) with: {'model__solver': 'sgd','model__batch_size': 106,'model__activation': 'relu'}
1.000000 (0.000000) with: {'model__solver': 'lbfgs','model__activation': 'identity'}
0.899176 (0.020401) with: {'model__solver': 'sgd','model__activation': 'logistic'}
nan (nan) with: {'model__solver': 'lbfgs','model__activation': 'tanh'}
nan (nan) with: {'model__solver': 'lbfgs','model__activation': 'logistic'}
0.101420 (0.024553) with: {'model__solver': 'adam','model__activation': 'logistic'}
-0.022715 (0.025394) with: {'model__solver': 'adam','model__batch_size': 477,'model__activation': 'logistic'}
0.024706 (0.038960) with: {'model__solver': 'adam','model__activation': 'tanh'}
0.892599 (0.020349) with: {'model__solver': 'sgd','model__activation': 'logistic'}
0.107719 (0.025551) with: {'model__solver': 'adam','model__activation': 'tanh'}
nan (nan) with: {'model__solver': 'sgd','model__activation': 'relu'}
0.213053 (0.025807) with: {'model__solver': 'adam','model__activation': 'logistic'}
我在运行这段代码时确实发现可以执行,在 Python 3.7.4
命令行中执行时,我遇到了许多错误和失败:
/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
AttributeError: 'str' object has no attribute 'decode'
FitFailedWarning)
/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/extmath.py:151: RuntimeWarning: overflow encountered in matmul
ret = a @ b
/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/extmath.py:151: RuntimeWarning: invalid value encountered in matmul
ret = a @ b
/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/neural_network/_base.py:195: RuntimeWarning: overflow encountered in square
return ((y_true - y_pred) ** 2).mean() / 2
/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/extmath.py:151: RuntimeWarning: overflow encountered in matmul
ret = a @ b
/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/extmath.py:151: RuntimeWarning: invalid value encountered in matmul
ret = a @ b
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py",line 431,in _process_worker
r = call_item()
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py",line 285,in __call__
return self.fn(*self.args,**self.kwargs)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py",line 595,in __call__
return self.func(*args,**kwargs)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py",line 263,in __call__
for func,args,kwargs in self.items]
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py",in <listcomp>
for func,kwargs in self.items]
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py",line 544,in _fit_and_score
test_scores = _score(estimator,y_test,scorer)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py",line 591,in _score
scores = scorer(estimator,y_test)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_scorer.py",line 89,in __call__
score = scorer(estimator,*args,**kwargs)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_scorer.py",line 371,in _passthrough_scorer
return estimator.score(*args,**kwargs)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/metaestimators.py",line 116,in <lambda>
out = lambda *args,**kwargs: self.fn(obj,**kwargs)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py",line 619,in score
return self.steps[-1][-1].score(Xt,**score_params)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/base.py",line 424,in score
y_type,_,_ = _check_reg_targets(y,y_pred,None)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_regression.py",line 86,in _check_reg_targets
y_pred = check_array(y_pred,ensure_2d=False,dtype=dtype)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py",line 578,in check_array
allow_nan=force_all_finite == 'allow-nan')
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py",line 60,in _assert_all_finite
msg_dtype if msg_dtype is not None else X.dtype)
ValueError: Input contains NaN,infinity or a value too large for dtype('float64').
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>",line 1,in <module>
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py",line 710,in fit
self._run_search(evaluate_candidates)
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py",line 1484,in _run_search
random_state=self.random_state))
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py",line 689,in evaluate_candidates
cv.split(X,groups)))
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py",line 1054,in __call__
self.retrieve()
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py",line 933,in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py",line 542,in wrap_future_result
return future.result(timeout=timeout)
File "/Users/me/opt/anaconda3/lib/python3.7/concurrent/futures/_base.py",line 428,in result
return self.__get_result()
File "/Users/me/opt/anaconda3/lib/python3.7/concurrent/futures/_base.py",line 384,in __get_result
raise self._exception
ValueError: Input contains NaN,infinity or a value too large for dtype('float64').
这些已经在 Jupyter 中被屏蔽了。
KerasRegressor:
# Function to create Keras NN model
def create_nn_model(features=180,problem_type = 'regression',hl_act = 'relu',optimizer = 'SGD'):
if problem_type == 'bin_class':
ol_act = 'sigmoid'
loss = 'binary_crossentropy'
metrics = ['accuracy']
ol_neurons = 1
if problem_type == 'multi_class':
ol_act = 'softmax'
loss = 'categorical_crossentropy'
metrics = ['accuracy']
ol_neurons = classes
if problem_type == 'regression':
ol_act = 'linear'
loss = 'mean_squared_logarithmic_error'
# loss = 'mean_absolute_error'
# https://machinelearningmastery.com/how-to-choose-loss-functions-when-training-deep-learning-neural-networks/
metrics = ['accuracy']
ol_neurons = 1
# Set number of neurons in hidden layer to (number of attributes + number of classes) / 2 + 1.
hl_neurons = ((features + classes) / 2) + 1
# Create NN model,setting dimensions according to the number of features.
model = Sequential()
model.add(Dense(hl_neurons,input_dim=features,activation=hl_act))
model.add(Dense(ol_neurons,activation=ol_act))
# Compile model
model.compile(loss=loss,optimizer=optimizer,metrics=metrics)
return model
model = KerasRegressor(build_fn=create_nn_model,epochs=100,verbose=0)
batch_size = [int(len(X_train)*.10),int(len(X_train)*.25),int(len(X_train)*.5),len(X_train)]
optimizer = ['SGD','RMSprop','Adam','Adamax']
hl_act = ['relu','tanh','sigmoid']
# For regression.
kfold = KFold(n_splits = 10)
scale = StandardScaler()
clf = Pipeline([('scaler',model)])
param_grid = dict(model__batch_size=batch_size,model__optimizer=optimizer,model__hl_act=hl_act)
grid = RandomizedSearchCV(estimator=clf,param))
从中我得到了更合理的结果:
Best accuracy of : -4.229532 with parameters: {'model__optimizer': 'RMSprop','model__hl_act': 'sigmoid','model__batch_size': 53}
-7.041340 (0.853295) with: {'model__optimizer': 'RMSprop','model__hl_act': 'tanh','model__batch_size': 265}
-4.229532 (0.371514) with: {'model__optimizer': 'RMSprop','model__batch_size': 53}
-8.552750 (2.028292) with: {'model__optimizer': 'SGD','model__batch_size': 53}
-7.737438 (0.674679) with: {'model__optimizer': 'SGD','model__hl_act': 'relu','model__batch_size': 265}
-4.977238 (0.629483) with: {'model__optimizer': 'Adam','model__batch_size': 53}
-10.943363 (1.486120) with: {'model__optimizer': 'Adamax','model__batch_size': 531}
-7.668005 (0.871349) with: {'model__optimizer': 'Adam','model__batch_size': 265}
-6.301838 (0.568332) with: {'model__optimizer': 'Adam','model__batch_size': 132}
-8.018414 (1.006902) with: {'model__optimizer': 'Adamax','model__batch_size': 53}
-9.676513 (4.129568) with: {'model__optimizer': 'RMSprop','model__batch_size': 531}
-4.614404 (0.351684) with: {'model__optimizer': 'Adam','model__batch_size': 53}
-9.935071 (1.156778) with: {'model__optimizer': 'Adamax','model__batch_size': 265}
-7.074733 (0.820876) with: {'model__optimizer': 'RMSprop','model__batch_size': 531}
-6.467388 (0.719459) with: {'model__optimizer': 'Adam','model__batch_size': 265}
-10.112093 (1.242853) with: {'model__optimizer': 'Adamax','model__batch_size': 53}
我使用 MLPClassifier 和 KerasClassifier 对二进制和多类数据集做了类似的处理,结果都如预期的那样,结果非常相似。
使用相同的数据集,并且仅根据所使用的 docs 使用有效的超参数,我对这个问题感到茫然。
谁能给点建议?
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。