如何解决为什么我不能从sklearn的ridge复制L2正则化?
我不确定为什么无法获得岭OLS回归的SGD变体的接近结果,而没有L2正则化的相同模型是否可以与sklearn
获得完全匹配。
准备数据
import pandas as pd
import numpy as np
import torch
from sklearn.linear_model import Ridge,LinearRegression
from sklearn import datasets
torch.manual_seed(123)
np.random.seed(123)
# load data
X,y,coef = datasets.make_regression(n_samples=1000,n_features=2,n_informative=2,noise=5,coef=True,random_state=0)
# set df
df = pd.DataFrame({'x1': X[:,0],'x2': X[:,1],'y': y})
# assign features and target
X = torch.tensor(df[['x1','x2']].values,dtype=torch.float)
y = torch.tensor(df['y'].values,dtype=torch.float)
# Shuffling & train/test split
shuffle_idx = torch.randperm(y.size(0),dtype=torch.long)
# assign shuffle
X,y = X[shuffle_idx],y[shuffle_idx]
# split
percent80 = int(shuffle_idx.size(0)*0.8)
X_train,X_test = X[shuffle_idx[:percent80]],X[shuffle_idx[percent80:]]
y_train,y_test = y[shuffle_idx[:percent80]],y[shuffle_idx[percent80:]]
# normalize (mean zero,unit variance)
mu,sigma = X_train.mean(dim=0),X_train.std(dim=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma
常规OLS
class LM1():
def __init__(self,num_features,LAMBDA=0.0):
self.num_features = num_features
self.weights = torch.zeros(num_features,1,dtype=torch.float)
self.bias = torch.zeros(1,dtype=torch.float)
self.LAMBDA = LAMBDA
def forward(self,x):
netinputs = torch.add(torch.mm(x,self.weights),self.bias)
activations = netinputs
return activations.view(-1)
def backward(self,x,yhat,y):
grad_loss_yhat = y - yhat
grad_yhat_weights = x
grad_yhat_bias = 1.
grad_loss_weights = (2* -torch.mm(grad_yhat_weights.t(),grad_loss_yhat.view(-1,1)) +
( 2 * self.LAMBDA * self.weights)) / y.size(0)
grad_loss_bias = 2* -torch.sum(grad_yhat_bias*grad_loss_yhat) / y.size(0)
return (-1)*grad_loss_weights,(-1)*grad_loss_bias
def loss(self,y):
return torch.mean((yhat - y)**2)
def train(self,num_epochs,learning_rate=0.01):
cost = []
for e in range(num_epochs):
# forward
yhat = self.forward(x)
# backward
negative_grad_w,negative_grad_b = self.backward(x,y)
# update
self.weights += learning_rate * negative_grad_w
self.bias += learning_rate * negative_grad_b
# logs
curr_loss = self.loss(yhat,y)
print('Epoch: %03d' % (e+1),end="")
print(' | MSE: %.5f' % curr_loss)
cost.append(curr_loss)
return None
model = LM1(num_features=X_train.size(1),LAMBDA=0.0)
model.train(X_train,y_train,num_epochs=100,learning_rate=0.1)
print('predicted',model.weights) # [40.1894],[39.6800]
print('true weights:',coef)
# identical to sklearn
reg = LinearRegression().fit(X_train,y_train)
reg.coef_ # [40.1894],[39.6800]
Ridge L2回归
# with l2 lambda
model1 = LM1(num_features=X_train.size(1),LAMBDA=125.0)
model1.train(X_train,model1.weights) # [34.7150],[34.2740]
# sklearn ridge
from sklearn.linear_model import Ridge
LAMBDA = 125.0
# C = inverse of lambda
C = 1/LAMBDA
# alpha = 1 / (2C)
alpha = 1 / (2*C)
reg = Ridge(alpha=alpha).fit(X_train,y_train)
print('sklearn ridge:',reg.coef_) # [37.25,36.77]
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。