预测失败：sklearn 预测期间出现异常：“HistGradientBoostingClassifier”对象没有属性“n_features_”

如何解决预测失败：sklearn 预测期间出现异常：“HistGradientBoostingClassifier”对象没有属性“n_features_”

我正在尝试在部署在 Google Cloud AI Platform 上的模型的云函数中调用 predict 方法，但出现此错误，告诉我 HistGradientBoostingClassifier 没有属性 n_features_ .

我看到当HistGradientBoostingClassifier在gradient_boosting.py的第1100行调用predict(self,X)方法时，它然后调用self.predict_proba(X)（第1114行），后者调用_raw_predict（行1130) 并在此函数中访问 self.n_features_（第 646 行）。 _n_features 在来自 BaseHistGradientBoosting 的训练数据集 X 的 fit 方法中的第 143 行分配，其中 HistGradientBoostingClassifier。

我正在通过 python API 调用 predict 方法：service.projects().predict(name=name,body={'instances': instances}).execute()

HistGradientBoostingClassifier 不会有这个属性，或者我的训练集有什么问题，以至于当我第一次创建模型和版本时它没有被传递到 fit 方法： ml.projects().models().create(parent=project_id_model,body=model_request_dict).execute()ml.projects().models().versions().create(parent=project_id_version,body=version_request_dict).execute()?

这是云函数日志中的完整错误：

Traceback (most recent call last): 
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",line 449,in run_background_function _function_handler.invoke_user_function(event_object) 
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",line 268,in invoke_user_function return call_user_function(request_or_event) 
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",line 265,in call_user_function event_context.Context(**request_or_event.context)) 
File "/user_code/main.py",line 41,in request_prediction raise RuntimeError(response['error']) 
RuntimeError: Prediction Failed: Exception during sklearn prediction: 'HistGradientBoostingClassifier' object has no attribute 'n_features_'

发送请求的完整云函数：

from google.cloud import storage
from google.cloud import firestore
import os
import pandas
import numpy as np
import math
import json

def request_prediction(event,context):
    # print("its working")
    PROJECT_ID = 'algae-model'
    MODEL_NAME = 'AlgaePredictor'
    VERSION_NAME = 'v1'

    ## setup file to store prediction data in cloud storage
    storage_client = storage.Client()
    bucket = storage_client.get_bucket('algae-mod-bucket1')
    resultBlob = bucket.blob('prediction-data')

    resultBlob.upload_from_string(str(event) + "\n\n\n" + str(context))


    ## hardcoded data and data from excel sheet
    # instances = [[22.6,277.7,8.49,4.04,18.06,19.06,5.77,0.02,5.45,0.83],[math.nan,math.nan,3.07,4.25,10.72,3.59,0.27,6.23,0.84],[23.2,284.6,8.05,3.12,10.41,16.07,47.27,0.52,4.1,1.51],[25.4,306.9,7.3,3.47,0.81,8.47,0.0,7.27,0.68],[17.9,440.5,8.61,1.93,0.018,2.39,12.21,1.21,2.5,3.79],[18.57,397.99,8.75,1.36,1.25,6.2,80.36,1.206,5.0,3.11],[25.2,413.6,7.86,4.5,3.2,6.5,42.79,2.14,math.nan],[24.5,340.3,8.97,9.02,5.64,23.4,29.13,0.18,6.95,1.0],[13.2,344.4,9.77,15.69,18.76,0.4,0.002,11.87,[22.9,362.8,8.62,4.07,17.03,15.8,0.76,6.67,1.39],[21.4,439.1,7.57,3.69,5.35,15.08,3.39,1.05,6.28,[18.2,333.8,8.24,14.1,11.52,25.37,6.46,0.05,19.7,1.16],[23.8,328.0,6.88,0.83,0.57,12.44,1.46,0.85],[18.7,228.0,5.39,9.85,11.31,23.03,4.82,0.01,11.85,1.11],[18.6,245.9,9.05,3.34,1.24,7.67,20.94,8.1,0.53],[17.8,249.0,8.45,9.39,3.26,13.13,3.65,15.44,[24.7,301.5,6.03,10.38,17.16,24.22,0.11,9.17,0.92],343.8,1.04,3.86,2.83,0.53,2.28,0.91],[19.9,323.3,8.26,4.72,0.95,8.22,10.05,7.05,1.13],[21.9,123.4,6.39,6.9,19.67,20.69,10.08,0.03,9.4,0.6],[22.6,4.69,11.82,11.2,1.99,6.25,0.69],[21.1,31.35,18.41,1.88,12.63,1.26],[21.3,352.1,9.45,12.2,88.26,3.04,3.08,0.45,17.27,[22.8,251.3,8.53,2.66,22.7,3.23,0.47],[26.1,373.5,7.88,7.4,4.39,13.76,1.09,4.63,[21.6,3.4,1.56,6.63,0.54],[19.7,290.3,7.87,1.7,84.95,2.08,6.3,8.38],[22.7,380.9,6.93,7.24,34.6,34.52,4.05,0.74,11.1,1.59],[17.7,327.7,8.03,2.72,52.68,0.54,3.24,1.52],[25.5,345.6,7.51,5.42,2.7,17.44,10.42,0.97],[18.63,323.14,8.41,0.6,1.41,35.35,0.453,1.96,1.04],280.9,9.57,1.02,0.09,0.97,2.36,0.26,0.46],[21.0,21.5,21.79,2.26,12.17,1.08],[24.2,322.9,8.86,6.04,2.33,17.75,221.26,0.29,7.8,0.82],[23.38,358.44,7.22,15.35,16.51,3.22,0.477,6.52,1.61],308.3,7.97,3.35,0.77,8.87,3.64,6.83,0.8],[20.3,430.2,6.76,2.48,4.33,6.22,28.41,1.23,2.74,[15.6,331.0,7.63,0.85,1.11,21.22,0.65,0.86,1.4],[21.7,254.4,5.95,5.34,8.84,15.74,124.61,6.55,0.63],304.4,8.37,6.26,12.34,2.31,1.18],[22.1,285.6,7.02,7.6,31.63,25.19,3.43,10.18,0.61],[20.9,451.0,1.42,1.63,9.93,4.85,0.67,3.87,1.87],351.3,7.58,3.41,12.22,12.16,0.49,5.56,[24.53,433.68,16.43,20.15,2.173,6.53,3.47],5.83,12.79,16.98,5.03,6.8,0.5],339.9,5.71,1.69,2.51,4.6,5.89,0.78,1.72,[21.8,195.5,7.59,1.0,0.71,10.01,0.38,1.67,0.78],261.9,6.87,2.87,12.9,19.62,8.8,0.57],118.1,7.93,5.04,0.13,9.99,6.32,7.38,1.65],[24.4,287.2,7.92,2.63,11.28,0.93,0.39,11.93,[9.7,276.9,9.48,2.0,0.14,2.82,0.45],[19.5,301.4,8.13,2.73,1.48,6.18,15.99,3.58,354.7,6.12,9.21,2.19,17.15,12.67,[22.3,277.9,9.6,39.59,26.79,11.97,1.03],305.4,7.03,6.7,7.26,13.56,3.45,10.0,0.86],[15.14,236.7,14.24,[26.4,340.9,7.99,11.66,18.01,7.56,10.93,[20.6,332.6,8.67,12.7,79.92,44.14,2.65,0.28,20.6,[19.8,409.8,9.84,0.62,0.92,11.7,7.08,1.9,382.3,1.29,0.16,1.52,0.48],[25.1,7.07,7.49,26.42,0.21,13.47,1.6]]
    # instances = pandas.read_excel(excel_file).drop('Date_Time',axis=1).values.tolist()

    ## parse data from firestore
    vals = [list(val.values()) for val in event['value']['fields'].values()]
    instances = list([list(np.array(np.concatenate(vals,axis=0)))])


    ## call predict method on model
    service = googleapiclient.discovery.build('ml','v1')
    name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID,MODEL_NAME,VERSION_NAME)
    response = service.projects().predict(name=name,body={'instances': instances}).execute()


    ## handle the response
    if 'error' in response:
        raise RuntimeError(response['error'])
    else:
        features = ['microcystin','feature2','anotherFeature']
        values = [100,2.3,312]

        predictions = {}
        if (len(features) == len(values)):
            for feature in features:
                for value in values:
                    predictions[feature] = value
                    values.remove(value)
                    break
            
        db = firestore.Client()
        doc_ref = db.collection(u'users').document(u'user-id/alert_prediction/create_time')
        doc_ref.delete()
        doc_ref.set({
            u'predictions': predictions
        })

创建/部署模型/版本的云函数：

import requests
from googleapiclient.discovery import build
from google.cloud import storage
import pandas as pd
import saginawBayFileSetup
import saginawBayModeling
from sklearn.ensemble import HistGradientBoostingClassifier


def init_model(event,context):
    # Setup access to Google Cloud services
    service_account_file = 'service-account.json'
    ScopES = ['https://www.googleapis.com/auth/cloud-platform']
    BUCKET_NAME = 'algae-mod-bucket1'
    credentials = service_account.Credentials.from_service_account_file(service_account_file,scopes=ScopES)
    ml = build('ml','v1',credentials=credentials)

    # get training set
    dataCleaningValues = ['',' ','nd','.','n/a']   # List of erroneous values in columns,used in import of file for na_values = dataCleaningValues.
    df = pd.read_excel('gs://algae-mod-bucket1/source-data/source-data-saginaw-bay-habs-2012-2019MergedFinal.xlsx',parse_dates=[['Date','Time']],sheet_name='saginaw_bay_habs_2012_2019',na_values=dataCleaningValues)

    print(df)
    # ignore
    # storage_client = storage.Client()
    # blob = storage.bucket(BUCKET_NAME).get_blob(data_file_location)
    # data = blob.download_as_string()
    # f = io.StringIO(str(data))

    # run setup with custom alert levels
    fields = event['value']['fields']
    keys = list(fields.keys())
    alertLevels = {}
    for key in keys:
        if (str(key).startswith("alert_level")):
            type = list(fields[key])[0]
            value = fields[key][type]
            alertLevels[key] = float(value)

    # print(alertLevels)

    cleaned_data = saginawBayFileSetup.run_setup(df,alertLevels["alert_level_1"],alertLevels["alert_level_2"],alertLevels["alert_level_3"])
    saginawBayModeling.gen_model(cleaned_data)

    # model = HistGradientBoostingClassifier()

    




    PROJECT_NAME = "algae-model"
    MODEL_NAME = "AlgaePredictor"
    VERSION_NAME = "v1"

    # Delete version and model
    project_id_model = 'projects/{}'.format(PROJECT_NAME)
    project_id_version = 'projects/{}/models/{}'.format(PROJECT_NAME,MODEL_NAME)
    # delete_version_body = {
    #     "name": project_id_version
    # }
    # delete_model_body = {
    #     "name": project_id_model
    # }
    # ml.projects.models().versions().delete(parent=project_id_version,body=delete_version_body).execute()
    # ml.projects().models().delete(parent=project_id_model,body=delete_model_body).execute()


    ## Create version and model
    model_request_dict = {
        "name": MODEL_NAME,"description": "Algae prediction model","regions": [
            "us-central1"
        ],"onlinePredictionLogging": True,"onlinePredictionConsoleLogging": True
    }
    version_request_dict = {
        "name": VERSION_NAME,"deploymentUri": "gs://algae-mod-bucket1/model","runtimeVersion": "2.2","pythonVersion": "3.7"
    }
    response = ml.projects().models().create(parent=project_id_model,body=model_request_dict).execute()
    print(response)
    response = ml.projects().models().versions().create(parent=project_id_version,body=version_request_dict).execute()
    print(response)