如何解决从零开始的多类逻辑回归
我试图从头开始实现多类逻辑回归,但我的实现返回了糟糕的结果。我相信梯度函数和成本函数的定义很好。这些函数与 minimize
函数的交互方式可能存在问题。我试过了,但我无法找出问题所在。请你投点光好吗?
您可以添加带有参数的估算器 'myLR': myLR(**par_dict),
par_dict= {'alpha': 0.1,'maxit': 2000,'opt_method': 'bfgs','positive': False,'penalty': None,'verbose': True,'seed': 3}
在此 example 或任何这些 examples 中进行测试。
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self,alpha=0.1,reltol=1e-8,maxit=1000,opt_method=None,verbose=True,seed=0):
self.alpha = alpha
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self,w,n_classes):
return np.reshape(w,(-1,n_classes),order='F')
def softmax(self,W,X):
a = np.exp(X @ W)
o = a / np.sum(a,axis=1,keepdims=True)
return o
def cost_wraper(self,W):
return self.cost(W,self.X,self.T,self.n_samples,self.n_classes)
def cost(self,X,T,n_samples,n_classes):
W = self.w_2d(W,n_classes)
log_O = np.log(self.softmax(W,X))
reg = self.apha * np.linalg.norm(W,ord='fro')
c = -np.sum([np.vdot(T[[i]],log_O[[i]]) for i in range(n_samples)]) / n_samples + reg
return c
def gradient_wraper(self,W):
return self.gradient(W,self.n_classes)
def gradient(self,n_classes)
O = self.softmax(W,X)
reg = self.alpha * W
grad = -X.T.dot(T - O) / n_samples + reg
return grad.flatten()
def fit(self,y=None):
self.n_classes = len(np.unique(y))
self.n_samples,n_features = X.shape
if self.n_classes == 2:
self.T = np.zeros((self.n_samples,self.n_classes),dtype=np.float64)
for i,cls in enumerate(range(self.n_classes)):
self.T[y == cls,i] = 1
else:
self.T = self.lbin.fit_transform(y)
self.X = X
np.random.seed(self.seed)
W_0 = np.random.random(n_features * self.n_classes)
options = {'disp': self.verbose,'maxiter': self.maxit}
f_min = minimize(fun=self.cost_wraper,x0=W_0,method=self.opt_method,jac=self.gradient_wraper,options=options)
self.coef_ = self.w_2d(f_min.x,self.n_classes)
self.W_ = self.coef_
return self
def predict_proba(self,X):
O = self.softmax(self.coef_,X)
return O
def predict(self,X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma,axis=1)
return y_pred
编辑:包括正则化项。
解决方法
我认为它现在可以使用以下代码。
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self,reltol=1e-8,maxit=1000,opt_method=None,verbose=True,seed=0):
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self,w,n_classes):
return np.reshape(w,(n_classes,-1))
def softmax(self,W,X):
a = np.exp(X @ W.T)
o = a / np.sum(a,axis=1,keepdims=True)
return o
def squared_norm(self,x):
x = np.ravel(x,order='K')
return np.dot(x,x)
def cost(self,X,T,n_samples,n_classes):
W = self.w_2d(W,n_classes)
log_O = np.log(self.softmax(W,X))
c = -(T * log_O).sum()
return c / n_samples
def gradient(self,n_classes)
O = self.softmax(W,X)
grad = -(T - O).T.dot(X)
return grad.ravel() / n_samples
def fit(self,y=None):
n_classes = len(np.unique(y))
n_samples,n_features = X.shape
if n_classes == 2:
T = np.zeros((n_samples,n_classes),dtype=np.float64)
for i,cls in enumerate(np.unique(y)):
T[y == cls,i] = 1
else:
T = self.lbin.fit_transform(y)
np.random.seed(self.seed)
W_0 = np.random.random((self.n_classes,self.n_features))
options = {'disp': self.verbose,'maxiter': self.maxit}
f_min = minimize(fun=self.cost,x0=W_0,args=(X,method=self.opt_method,jac=self.gradient,options=options)
self.coef_ = self.w_2d(f_min.x,n_classes)
self.W_ = self.coef_
return self
def predict_proba(self,X):
O = self.softmax(self.W_,X)
return O
def predict(self,X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma,axis=1)
return y_pred
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。