Scipy 和 Sklearn Yeo-Johnson 归一化结果不匹配

如何解决Scipy 和 Sklearn Yeo-Johnson 归一化结果不匹配

我正在运行 Yeo Johnson Transform，并遵循了 Scipy 网站上给出的示例。 Scipy link 我还将它与 Sklearn 实现进行了比较。这是代码：我

    import seaborn as sns
    from sklearn.preprocessing import PowerTransformer
    from scipy import stats
    import matplotlib.pyplot as plt
    import numpy as np

    fig = plt.figure( figsize=(10,10))
    ax1 = fig.add_subplot(421)
    x = stats.loggamma.rvs(5,size=500) + 5
    prob = stats.probplot(x,dist=stats.norm,plot=ax1)
    ax1.set_xlabel('')
    ax1.set_title('Probplot')

    ax2 = fig.add_subplot(422)
    sns.distplot(x,color="skyblue")
    ax2.set_title('distribution of Data')

    ax3 = fig.add_subplot(423)
    xt_scipy,lmbda = stats.yeojohnson(x)
    prob = stats.probplot(xt_scipy,plot=ax3)
    ax3.set_xlabel('')
    ax3.set_title('Probplot:Yeo-Johnson:Scipy')

    ax4 = fig.add_subplot(424)
    sns.distplot(xt_scipy,color="skyblue")
    ax4.set_title('distribution of Transformed Data')

    ax5 = fig.add_subplot(425)
    pt = PowerTransformer(method = 'yeo-johnson',standardize = True)
    xt_sklearn = pt.fit_transform(x.reshape(-1,1))
    prob = stats.probplot(xt_sklearn.flatten(),plot=ax5)
    ax5.set_xlabel('')
    ax5.set_title('Probplot:Yeo-Johnson:Sklearn')

    ax6 = fig.add_subplot(426)
    sns.distplot(xt_sklearn,color="skyblue")
    ax6.set_title('distribution of Transformed Data')
    plt.tight_layout(h_pad=0.9,w_pad=0.9)
    plt.show()

看附图，可以看出两种方法似乎都按预期对数据进行了归一化，从分位数上可以看出。
但是，两个库的转换数据分布图虽然形状相同，但具有不同的值范围。为什么转换后的值不同？哪一个对应于真正的 Yeo Johnson 公式？

塞迪

解决方法

这是我的错误。我没有意识到 Sklearn 默认在 Power 变换后执行标准比例。这是创建匹配结果的代码的修改。

    import seaborn as sns
    import sklearn.preprocessing
    from sklearn.preprocessing import PowerTransformer,StandardScaler
    from scipy import stats
    import matplotlib.pyplot as plt
    import numpy as np

    ss = StandardScaler()
    fig = plt.figure( figsize=(10,10))
    ax1 = fig.add_subplot(441)
    x = stats.loggamma.rvs(5,size=500) + 5
    prob = stats.probplot(x,dist=stats.norm,plot=ax1)
    ax1.set_xlabel('')
    ax1.set_title('Probplot')

    ax2 = fig.add_subplot(442)
    sns.distplot(x,color="skyblue")
    ax2.set_title('Distribution of Data')

    ax5 = fig.add_subplot(445)
    xt_scipy,lmbda = stats.yeojohnson(x)
    prob = stats.probplot(xt_scipy,plot=ax5)
    ax5.set_xlabel('')
    ax5.set_title('Probplot:Yeo-Johnson:Scipy')

    ax6 = fig.add_subplot(446)
    sns.distplot(xt_scipy,color="skyblue")
    ax6.set_title('Distribution of Transformed Data')

    ax7 = fig.add_subplot(447)
    xt_scipy_ss,lmbda = stats.yeojohnson(x)
    xt_scipy_ss = ss.fit_transform(xt_scipy_ss.reshape(-1,1))
    prob = stats.probplot(xt_scipy_ss.flatten(),plot=ax7)
    ax7.set_xlabel('')
    ax7.set_title('Probplot:Yeo-Johnson + Stand Scal:Scipy')

    ax8 = fig.add_subplot(448)
    sns.distplot(xt_scipy_ss,color="skyblue")
    ax8.set_title('Distribution of Transformed Data')

    ax9 = fig.add_subplot(449)
    pt = PowerTransformer(method = 'yeo-johnson',standardize = False)
    xt_sklearn = pt.fit_transform(x.reshape(-1,1))
    prob = stats.probplot(xt_sklearn.flatten(),plot=ax9)
    ax9.set_xlabel('')
    ax9.set_title('Probplot:Yeo-Johnson:Sklearn')

    ax10 = fig.add_subplot(4,4,10)
    sns.distplot(xt_sklearn,color="skyblue")
    ax10.set_title('Distribution of Transformed Data')

    ax11 = fig.add_subplot(4,11)
    pt = PowerTransformer(method='yeo-johnson',standardize=True)
    xt_sklearn_ss = pt.fit_transform(x.reshape(-1,1))
    prob = stats.probplot(xt_sklearn_ss.flatten(),plot=ax11)
    ax11.set_xlabel('')
    ax11.set_title('Probplot:Yeo-Johnson:Sklearn with Stand Scal')

    ax12 = fig.add_subplot(4,12)
    sns.distplot(xt_sklearn_ss,color="skyblue")
    ax12.set_title('Distribution of Transformed Data')
    plt.tight_layout(h_pad=0.9,w_pad=0.9)
    plt.show()