如何解决如何使用 Symfit 或 curve_fit 对分段模型执行交叉验证?
我正在尝试使用 Symfit 对分段模型执行交叉验证:
我的数据:
x_data = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],dtype=float)
y_data = np.array([1,10])
要使用交叉验证,我们需要创建一个估算器,所以我尝试将拟合打包在一个类中
from sklearn.model_selection import cross_validate,cross_val_score
class model:
def __init__( self,a=None,b=None,c=None ):
self.a = a
self.b = b
self.c = c
def _model_background(self,X,a,b,c):
y1 = a*x + b-a*c
y2 = b
return Model({y: Piecewise((y1,x <= c),(y2,x > c))})
def predict( self,X ):
return self._model_background( X,self.a,self.b,self.c )
def fit( self,y ):
from symfit import parameters,variables,Fit,Piecewise,Model
fit = Fit(self._model_background,x=x_data,y=y_data)
fit_result = fit.execute()
self.a = fit_result.value(a)
self.b = fit_result.value(b)
self.c = fit_result.value(c)
return self
def get_params( self,deep=False ):
return { 'a':self.a,'b':self.b,'c':self.c }
def set_params( self,**parameters ):
for parameter,value in parameters.intems():
setattr( self,parameter,value )
return self
然后我执行交叉验证行
cross_validate( symfitmodel(),x_data,y_data,cv=5,scoring='neg_mean_squared_error' )
但似乎我没有按照应有的方式创建类 错误信息:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:548: FitFailedWarning: Estimator fit Failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py",line 282,in __get__
return getattr(obj,self.cache_attr)
AttributeError: 'Model' object has no attribute '_cached_connectivity_mapping'
During handling of the above exception,another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py",line 531,in _fit_and_score
estimator.fit(X_train,y_train,**fit_params)
File "<ipython-input-3-db68eb82746d>",line 18,in fit
fit = Fit(self._model_background,y=y_data)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py",line 423,in wrapped_func
return func(*bound_args.args,**bound_args.kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\fit.py",line 374,in __init__
self.model = Model(model)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py",line 875,in __init__
super(HessianModel,self).__init__(*args,**kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py",line 824,in __init__
super(GradientModel,line 125,in __init__
self._init_from_dict(model)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py",line 651,in _init_from_dict
super(BaseCallableModel,self)._init_from_dict(model_dict)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py",line 309,in _init_from_dict
ordered = list(toposort(self.connectivity_mapping))
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py",line 285,in __get__
setattr(obj,self.cache_attr,self.fget(obj))
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py",line 383,in connectivity_mapping
vars,params = seperate_symbols(expr)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py",line 82,in seperate_symbols
for symbol in func.free_symbols:
AttributeError: 'function' object has no attribute 'free_symbols'
warnings.warn("Estimator fit Failed. The score on this train-test"
我用curve_fit尝试过,但没有成功:
class piecewise:
def __init__( self,x0=None,b=None ):
self.x0 = x0
self.a = a
self.b = b
def _piecewise_background(self,x0,b):
return np.piecewise(X,[X < x0],[lambda X:a*X + b-a*x0,lambda X:b])
def predict( self,X ):
return self._piecewise_background( X,self.x0,self.b )
def fit( self,y ):
from scipy.optimize import curve_fit
popt,pcov = curve_fit( self._piecewise_background,y )
self.x0 = popt[0]
self.a = popt[1]
self.b = popt[2]
return self
def get_params( self,deep=False ):
return { 'x0':self.x0,'a':self.a,'b':self.b }
def set_params( self,value )
return self
有什么想法吗?
解决方法
这不是回答而是评论。
论文中显示了一种直接方法(非迭代,没有猜测的初始值):https://fr.scribd.com/document/380941024/Regression-par-morceaux-Piecewise-Regression-pdf(本例中的第 8 页)。
当然,问题中给出的数据很少有人感兴趣,因为没有散点。所以不需要数值演算,结果是显而易见的。但这可以用来测试分段回归的方法。
散点数据示例
为了更具有代表性,以上数据进行了分散。下面的例子:
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。