我无法将 mlrun 集成到我的代码中？

如何解决我无法将 mlrun 集成到我的代码中？

我正在尝试集成 MLRun 以使用 keras 代码对我的有害评论进行分类，但我无法与我的代码集成。

谁能告诉我在哪里修改我的代码以将 MLRun 集成到我的代码中。我在下面附上我的代码。

MLRun - link

MLRun 是一个用于跟踪模型和部署的开源软件。我能够训练模型，但无法与 mlrun 集成和部署，我发现文档很难。请帮忙

import matplotlib.pyplot as plt
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

from sklearn import preprocessing

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,MaxPool1D,Dropout,Dense,GlobalMaxPooling1D,Embedding,Activation
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import pandas as pd

train_data = pd.read_csv('/content/toxic_train.csv')
test_data = pd.read_csv('/content/toxic_test.csv')

# drop unnamed column

train_data = train_data.drop(columns=['Unnamed: 0'])
train_data.head()

test_data = test_data.drop(columns=['Unnamed: 0'])
test_data.head()

def preprocess_text(sen):
    # lower the character
    sentence = sen.lower()
    
    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]',' ',sen)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+",sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+',sentence)
    
    stops = stopwords.words('english')
    
    for word in sentence.split():
        if word in stops:
            sentence = sentence.replace(word,'')
    return sentence

import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

# preprocess data

train_data['comment_text'] = train_data['comment_text'].apply(lambda x : preprocess_text(x))
test_data['comment_text'] = test_data['comment_text'].apply(lambda x : preprocess_text(x))

# tokenize the data

token = Tokenizer(28164)
token.fit_on_texts(train_data['comment_text'])
text = token.texts_to_sequences(train_data['comment_text'])
text = pad_sequences(text,maxlen=100)

y = train_data['toxic'].values

# split the data into training and testing data

X_train,X_test,y_train,y_test = train_test_split(text,y,test_size=0.2,random_state=1,stratify=y)

# build the model

max_features = 28164
embedding_dim = 32

model = Sequential()
model.add(Embedding(max_features,embedding_dim))
model.add(Dropout(0.2))
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.summary()

# compile and train model

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit(X_train,batch_size=1024,validation_data=(X_test,y_test),epochs=5)

#mlrun
from cloudpickle import dumps
model_data = dumps(model)
context.log_model(key='my_model',body=model_data,model_file='my_model.pkl')

def train_iris(context: MLClientCtx,dataset: DataItem,label_column: str = "labels"):
    raw,labels,header = get_sample(dataset,sample=-1,label=label_column)
    # Basic scikit-learn Iris data-set SVM model
    X_train,y_test = train_test_split( raw,random_state=42)
    
    context.log_dataset('train_set',df=pd.concat([X_train,y_train.to_frame()],axis=1),format='csv',index=False,artifact_path=context.artifact_subpath('data'))

    context.log_dataset('test_set',df=pd.concat([X_test,y_test.to_frame()],labels={"data-type": "held-out"},artifact_path=context.artifact_subpath('data'))
    
    model = linear_model.LogisticRegression(max_iter=10000)
    model.fit(X_train,y_train)
    
    # Evaluate model results and get the evaluation metrics
    eval_metrics = eval_model_v2(context,y_test,model)
    
    # Log model
    context.log_model("model",body=dumps(model),artifact_path=context.artifact_subpath("models"),extra_data=eval_metrics,model_file="model.pkl",metrics=context.results,labels={"class": "sklearn.linear_model.LogisticRegression"})

解决方法

您的代码看起来可以使用 MLRun 执行了。根据 MLRun 的安装方式，您必须配置您的环境才能找到 API。看看这里https://docs.mlrun.org/en/latest/install.html

这是一个可用于测试的简短脚本。

from mlrun import code_to_function

fn = code_to_function('train_iris',handler='train_iris',kind='job',filename="<YOUR PYTHON FILE PATH>")
# RUN LOCAL
fn.run(project='iris',local=True)

# RUN IN KUBERNETES (if you are running MLRUN in Kubernetes)
fn.run(project='iris',local=True)