微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

运行一些数据分析程序,但最后一个数字没有显示?

如何解决运行一些数据分析程序,但最后一个数字没有显示?

有一些数据分析工作需要我们处理。这是关于一些行业背景数据。目标是分析这些数据并从这些数据中挖掘一些模式。数据文件是简单的以简单文本格式呈现的文件。它是关于一些火力发电并预测每种情况下产生的蒸汽。 数据分析程序运行如下:



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


###import warnings
###warnings.filterwarnings("ignore")





import logging
logging.basicConfig(format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s',level=logging.DEBUG)
logging.debug('debug info')
logging.info('info info')
logging.warning('warning info')
logging.error('error info')
logging.critical('critial info')





##%matplotlib inline



train_data_file = "./steam_train.txt"
test_data_file = "./steam_test.txt"



train_data = pd.read_csv(train_data_file,sep='\t',encoding='utf-8')
test_data = pd.read_csv(test_data_file,encoding='utf-8')



train_data.info()
test_data.info()


test_data.describe()
test_data.describe()


train_data.head()
test_data.head()


fig = plt.figure(figsize=(4,6))
sns.Boxplot(train_data['V0'],orient="v",width=0.5)



column = train_data.columns.tolist()[:39]
fig = plt.figure(figsize=(20,40))
for i in range(38):
    plt.subplot(13,3,i + 1)
    sns.Boxplot(train_data[column[i]],width=0.5)
    plt.ylabel(column[i],fontsize=8)
plt.show()


def find_outliers(model,X,y,sigma=3):
    # predict y values using model
    try:
        y_pred = pd.Series(model.predict(X),index=y.index)
    # if predicting fails,try fitting the model first
    except:
        model.fit(X,y)
        y_pred = pd.Series(model.predict(X),index=y.index)

    # calculate residuals between the model prediction and true y values
    resid = y - y_pred
    mean_resid = resid.mean()
    std_resid = resid.std()

    # calculate z statistic,define outliers to be where |z|>sigma
    z = (resid - mean_resid) / std_resid
    outliers = z[abs(z) > sigma].index

    # print and plot the results
    print('R2=',model.score(X,y))
    print('Mse=',mean_squared_error(y,y_pred))
    print('-------------------------------------------------------')

    print(len(outliers),'outliers;',' ALL data shape:',X.shape)

    plt.figure(figsize=(15,5))
    ax_131 = plt.subplot(1,1)
    plt.plot(y,y_pred,'.')
    plt.plot(y.loc[outliers],y_pred.loc[outliers],'ro')
    plt.legend(['Accepted','Outlier'])
    plt.xlabel('y')
    plt.ylabel('y_pred');

    ax_132 = plt.subplot(1,2)
    plt.plot(y,y - y_pred,y.loc[outliers] - y_pred.loc[outliers],'Outlier'])
    plt.xlabel('y')
    plt.ylabel('y - y_pred');

    ax_133 = plt.subplot(1,3)
    z.plot.hist(bins=50,ax=ax_133)
    z.loc[outliers].plot.hist(color='r',bins=50,ax=ax_133)
    plt.legend(['Accepted','Outlier'])
    plt.xlabel('z')

    plt.savefig('outliers.png')

    return outliers



from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
X_train = train_data.iloc[:,0:-1]
y_train = train_data.iloc[:,-1]
outliers = find_outliers(Ridge(),X_train,y_train)





plt.figure(figsize=(10,5))

ax=plt.subplot(1,2,1)
sns.distplot(train_data['V0'],fit=stats.norm)
###plt.show()
ax=plt.subplot(1,2)
res = stats.probplot(train_data['V0'],plot=plt)
plt.show()


'''
train_cols = len(train_data.columns)
plt.figure(figsize=( train_cols,4 ))

i = 0
for col in train_data.columns:
    i += 1
    ax = plt.subplot( train_cols,4,i)
    sns.distplot(train_data[col],fit=stats.norm)

    i += 1
    ax = plt.subplot( train_cols,i)
    res = stats.probplot(train_data[col],plot=plt)
plt.tight_layout()
plt.show()
'''


train_cols = 6
train_rows = len(train_data.columns)
plt.figure(figsize=(4 * train_cols,4 * train_rows))

i = 0
for col in train_data.columns:
    i += 1
    ax = plt.subplot(train_rows,train_cols,fit=stats.norm)

    i += 1
    ax = plt.subplot(train_rows,plot=plt)
plt.show()

但是运行最后一行代码后,并没有显示最后一个数字。上面的代码有什么问题?

关于所需的数据文件可以从这里下载:data files

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。