深高斯过程超参数

如何解决深高斯过程超参数

我使用 GPytorch 运行深度高斯过程回归回归模型，并使用自动 Adam 优化器来优化超参数。

这是我正在使用的合成数据集的创建：

train_x = []
train_y = []

train_x1 = torch.linspace(0,0.2,100)
train_x1=torch.FloatTensor(train_x1)
train_y1 = torch.sin(train_x1 * (2 * math.pi)) + torch.randn(train_x1.size()) * math.sqrt(0.04)

train_x2 = torch.linspace(0.35,0.5,3400)
train_2=torch.FloatTensor(train_x2)
train_y2 = train_x2 * (2 * math.pi) + torch.randn(train_x2.size()) * math.sqrt(0.005)

train_x3 = torch.linspace(0.7,1,2500)
train_x3=torch.FloatTensor(train_x3)
train_y3 = train_x3 * train_x3 + torch.randn(train_x3.size()) * math.sqrt(0.1)

train_x.extend(train_x1)
train_x.extend(train_x2)
train_x.extend(train_x3)

train_x=torch.FloatTensor(train_x)

train_y.extend(train_y1)
train_y.extend(train_y2)
train_y.extend(train_y3)

train_y=torch.FloatTensor(train_y)

当我运行这一行来找出超参数时，出现错误：只有一个元素张量可以转换为 Python 标量

for param_name,param in model.named_parameters():
    print(f'Parameter name: {param_name:42} value = {param.item()}')

有人知道我该如何解决这个问题吗？这是我的深度 GP 回归模型的定义：

#define an example deep GP hidden layer.
#Instead of extending ApproximateGP,we extend DeepGPLayer.
#DeepGPLayers need a number of input dimensions,a number of output dimensions,and a number of samples. This is kind of like a 
#linear layer in a standard neural network – input_dims defines how many inputs this hidden layer will expect,and output_dims 
#defines how many hidden GPs to create outputs for.
#In this particular example,we make a particularly fancy DeepGPLayer that has “skip connections” with prevIoUs layers,similar 
#to a resnet

class ToyDeepGPHiddenLayer(DeepGPLayer):
    def __init__(self,input_dims,output_dims,num_inducing=128,mean_type='constant'):   #num_inducing=128(before)
        if output_dims is None:
            inducing_points = torch.randn(num_inducing,input_dims)
            batch_shape = torch.Size([])
        else:
            inducing_points = torch.randn(output_dims,num_inducing,input_dims)
            batch_shape = torch.Size([output_dims])

        variational_distribution = CholeskyVariationaldistribution(
            num_inducing_points=num_inducing,batch_shape=batch_shape
        )

        variational_strategy = VariationalStrategy(
            self,inducing_points,variational_distribution,learn_inducing_locations=True
        )

        super(ToyDeepGPHiddenLayer,self).__init__(variational_strategy,output_dims)

        if mean_type == 'constant':
            self.mean_module = ConstantMean(batch_shape=batch_shape)
        else:
            self.mean_module = LinearMean(input_dims)
            #Scalekernel???
        self.covar_module = ScaleKernel(
            RBFKernel(batch_shape=batch_shape,ard_num_dims=input_dims),batch_shape=batch_shape,ard_num_dims=None
        )

    def forward(self,x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return Multivariatenormal(mean_x,covar_x)

    def __call__(self,x,*other_inputs,**kwargs):
        """
        Overriding __call__ isn't strictly necessary,but it lets us add concatenation based skip connections
        easily. For example,hidden_layer2(hidden_layer1_outputs,inputs) will pass the concatenation of the first
        hidden layer's outputs and the input data to hidden_layer2.
        """
        if len(other_inputs):
            if isinstance(x,gpytorch.distributions.MultitaskMultivariatenormal):
                x = x.rsample()

            processed_inputs = [
                inp.unsqueeze(0).expand(self.num_samples,*inp.shape)
                for inp in other_inputs
            ]

            x = torch.cat([x] + processed_inputs,dim=-1)

        return super().__call__(x,are_samples=bool(len(other_inputs)))

#Building the deep GP

num_output_dims = 2

class DeepGP(DeepGP):
    def __init__(self,train_x_shape):
        hidden_layer_1 = ToyDeepGPHiddenLayer(
            input_dims=train_x_shape[-1],output_dims=num_output_dims,mean_type='lmeaninear',)
        
        hidden_layer_2 = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_1.output_dims,mean_type='linear',)
        
        hidden_layer_3 = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_2.output_dims,)
        hidden_layer_4 = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_3.output_dims,)
        last_layer = ToyDeepGPHiddenLayer(
            input_dims=hidden_layer_4.output_dims,output_dims=None,mean_type='constant',)

        super().__init__()

        self.hidden_layer_1 = hidden_layer_1
        self.hidden_layer_2 = hidden_layer_2
        self.hidden_layer_3 = hidden_layer_3
        self.hidden_layer_4 = hidden_layer_4
        self.last_layer = last_layer
        
#         self. likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.GreaterThan(1e-11))
#         self.likelihood.register_prior("noise_prior",gpytorch.priors.HorseshoePrior(0.2),"noise")
#         self.likelihood.noise = 1e-2
        self.likelihood = GaussianLikelihood()

    def forward(self,inputs):
        hidden_rep1 = self.hidden_layer_1(inputs)
        hidden_rep2 = self.hidden_layer_2(hidden_rep1)
        hidden_rep3= self.hidden_layer_3(hidden_rep2)
        hidden_rep4= self.hidden_layer_4(hidden_rep3)
        output = self.last_layer(hidden_rep4)
        return output

    def predict(self,test_loader):
        with torch.no_grad():
            mus = []
            variances = []
            lls = []
            for x_batch,y_batch in test_loader:
                preds = self.likelihood(self(x_batch))
                mus.append(preds.mean)
                variances.append(preds.variance)
                lls.append(model.likelihood.log_marginal(y_batch,model(x_batch)))
                print(x_batch)
                print(mus)

        return torch.cat(mus,dim=-1),torch.cat(variances,torch.cat(lls,dim=-1)

    
model = DeepGP(train_x.shape)
if torch.cuda.is_available():
    model = model.cuda()

训练代码：

#training the data
num_epochs = 50
num_samples = 3 if smoke_test else 10


optimizer = torch.optim.Adam([
    {'params': model.parameters()},],lr=0.1)
mll = DeepApproximateMLL(VariationalELBO(model.likelihood,model,train_x.shape[-2]))

epochs_iter = tqdm.notebook.tqdm(range(num_epochs),desc="Epoch")
for i in epochs_iter:
    # Within each iteration,we will go over each minibatch of data
    minibatch_iter = tqdm.notebook.tqdm(train_loader,desc="Minibatch",leave=False)
    for x_batch,y_batch in minibatch_iter:
        with gpytorch.settings.num_likelihood_samples(num_samples):
            optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output,y_batch)
            loss.backward()
            optimizer.step()
            minibatch_iter.set_postfix(loss=loss.item())

测试代码：

import gpytorch
import math


test_dataset = TensorDataset(test_x,test_y)
test_loader = DataLoader(test_dataset,batch_size=1024)
print(test_x.shape)

model.eval()
predictive_means,predictive_variances,test_lls = model.predict(test_loader)

rmse = torch.mean(torch.pow(predictive_means.mean(0) - test_y,2)).sqrt()
print(f"RMSE: {rmse.item()},NLL: {-test_lls.mean().item()}")

非常感谢。

深高斯过程超参数

如何解决深高斯过程超参数

相关推荐