在前馈的输出中,我总是有一个超大的数字,但其他数字接近零

如何解决在前馈的输出中,我总是有一个超大的数字,但其他数字接近零

我有一个具有 10 个输出感知器(cnn 的全连接层)。这些输出总是有一两个大的,而其他的接近于零。我使用 ReLU 和 softmax 来计算输出概率。这些奇怪输出的一些例子:


  1. 0.03676021
  2. 0.1569262
  3. 99.48537
  4. 0.03676021
  5. 0.03676021
  6. 0.03676021
  7. 0.03676021
  8. 0.10039
  9. 0.03676021
  10. 0.03676021

  1. 99.9898
  2. 5.67829E-11
  3. 3.183056E-11
  4. 9.487049E-12
  5. 0.004471419
  6. 4.597222E-11
  7. 0.005729798
  8. 5.412427E-11
  9. 1.847427E-12
  10. 6.115809E-09

  1. 0.06430105
  2. 0.06430105
  3. 0.06430105
  4. 98.25629
  5. 0.06430105
  6. 0.9340076
  7. 0.06430105
  8. 0.06430105
  9. 0.3595946
  10. 0.06430105

您还可以看到值非常相似。这是我的感知器代码

    biasesWeights = new float[3];
    for (int i = 0; i < 3; i++)
    {
        biasesWeights[i] = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100;
    }   
    neurons = new float[4][];
    neurons = InitializationJaggedArr(neurons,4,512,256,32,10);
    weights = new float[3][,];
    weights = InitializationJaggedArr(weights,3,10);
}
private float[][] InitializationJaggedArr(float[][] arr,int length,params int[] x )
{
    for (int i = 0; i < length; i++)
    {
        arr[i] = new float[x[i]];
    }
    return arr;
}
private float[][,] InitializationJaggedArr(float[][,] arr,params int[] x)
{
    int p = 0;
    for (int i = 0; i < length; i++)
    {
        arr[i] = new float[x[p],x[++p]];
    }
    return arr;
}
public void RandomInitializationOfWeights()
{
    for (int i = 0; i < 512; i++)
    {
        for (int j = 0; j < 256; j++)
        {
            float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100;
            if(val == 0)
                val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100;

            weights[0][i,j] = val;
        }
    }
    for (int i = 0; i < 256; i++)
    {
        for (int j = 0; j < 32; j++)
        {
            float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100;
            if (val == 0)
                val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100;

            weights[1][i,j] = val;
        }
    }
    for (int i = 0; i < 32; i++)
    {
        for (int j = 0; j < 10; j++)
        {
            float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100;

            weights[2][i,j] = val;
        }
    }
}
private float ReLU(float val)
{
    if (val > 0)
    {
        return val;
    }
    else
    {
        return 0;
    }
}
private float[] softmax(float[] arr)
{
    float[] results = new float[10];
    float val = 0;
    for (int i = 0; i < 10; i++)
    {
        val += (float)Math.Exp(arr[i]);
    }
    for (int i = 0; i < 10; i++)
    {
        results[i] = (float)Math.Exp(arr[i]) / val; 
    }
    return results;
}
public float[] FeedForward(float[] inputArr)
{
    neurons[0] = inputArr;
    for (int j = 0; j < neurons[1].Length; j++)
    {
        for (int i = 0; i < neurons[0].Length; i++)
        {
            neurons[1][j] += neurons[0][i] * weights[0][i,j];
        }
        neurons[1][j] += 1 * biasesWeights[0];
        neurons[1][j] = ReLU(neurons[1][j]);
    }
    for (int j = 0; j < neurons[2].Length; j++)
    {
        for (int i = 0; i < neurons[1].Length; i++)
        {
            neurons[2][j] += neurons[1][i] * weights[1][i,j];
        }
        neurons[2][j] += 1 * biasesWeights[1];
        neurons[2][j] = ReLU(neurons[2][j]);
    }
    for (int j = 0; j < neurons[3].Length; j++)
    {
        for (int i = 0; i < neurons[2].Length; i++)
        {
            neurons[3][j] += neurons[2][i] * weights[2][i,j];
        }
        neurons[3][j] += 1 * biasesWeights[2];
        neurons[3][j] = ReLU(neurons[3][j]);
    }
    return softmax(neurons[3]);
}

卷积层的代码

private static float[][][,] filters;
public ConvolutinalLayer()
{
    filters = new float[2][][,];
    filters = InitiaizationJaggedMatrixArr(filters,5);
}
public void RandomSetFilters()
{
    for (int i = 0; i < 2; i++)
    {
        for (int j = 0; j < 32; j++)
        {
            for (int o = 0; o < 5; o++)
            {
                for (int h = 0; h < 5; h++)
                {
                    filters[i][j][o,h] = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f,0.5f) * 100) / 100; 
                }
            }
        }
    }
}
public float[] FeedForward(Digitimage image)
{
    float[][,] arrImage = new float[1][,];
    arrImage = InitiaizationJaggedMatrixArr(arrImage,28);
    for (int i = 0; i < 28; i++)
    {
        for (int j = 0; j < 28; j++)
        {
            arrImage[0][i,j] = (image.pixels[i][j]*2/255)+-1;
        }
    }


    float[][,] conv1 = ConvolutionalLayer(arrImage,24,0);
    float[][,] active1 = ActivationLayer(conv1,24);
    float[][,] pool1 = PoolingLayer(active1,12);
    float[][,] conv2 = ConvolutionalLayer(pool1,8,1);
    float[][,] active2 = ActivationLayer(conv2,8);
    float[][,] pool2 = PoolingLayer(active2,4);
    return FlattingLayer(pool2,4);
}
private float[][,] InitiaizationJaggedMatrixArr(float[][,int scale)
{
    for (int i = 0; i < arr.Length; i++)
    {
        arr[i] = new float[scale,scale];
    }
    return arr;
}
private float[][][,] InitiaizationJaggedMatrixArr(float[][][,int scale)
{
    arr[0] = new float[32][,];
    arr[1] = new float[32][,];

    for (int i = 0; i < 2; i++)
    {
        for (int j = 0; j < 32; j++)
        {
            arr[i][j] = new float[scale,scale];
        }
    }
    return arr;
}
private float[][,] ConvolutionalLayer(float[][,] layer,int depthOutput,int scale,int numFilter)
{
    float[][,] arr = new float[depthOutput][,];
    arr = InitiaizationJaggedMatrixArr(arr,scale);
    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                float val = 0;
                for (int o = 0; o < 5; o++)
                {
                    for (int e = 0; e < 5; e++)
                    {
                        if (numFilter == 0)
                        {
                            val += layer[0][i + o,j + e] * filters[numFilter][h][o,e];
                        }
                        else
                        {
                            val += layer[h][i + o,e];
                        }
                    }
                }
                arr[h][i,j] = val;
            }
        }
    }

    return arr;
}
private float[][,int numFilter,int zeroPadding)
{
    float[][,scale);
    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                float val = 0;
                for (int o = 0; o < filters.GetLength(2); o++)
                {
                    for (int e = 0; e < filters.GetLength(2); e++)
                    {
                        if (numFilter == 0)
                        {
                            val += layer[0][i + o,] ActivationLayer(float[][,int scale)
{
    float[][,scale);
    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                arr[h][i,j] = ReLU(layer[h][i,j]);
            }
        }
    }
    return arr;
}
private float ReLU(float val)
{
    if (val > 0)
    {
        return val;
    }
    else
    {
        return 0;
    }
}
private float[][,] PoolingLayer(float[][,scale);

    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i = i + 2)
        {
            for (int j = 0; j < scale; j = j + 2)
            {
                float val = 0;
                for (int o = 0; o < 2; o++)
                {
                    for (int e = 0; e < 2; e++)
                    {
                        if (val < layer[h][i + o,j + e])
                        {
                            val = layer[h][i + o,j + e];
                        }
                    }
                }
                arr[h][i/2,j/2] = val;
            }
        }
    }
    return arr;
}
private float[] FlattingLayer(float[][,int depthInput,int scale)
{
    float[] arr = new float[512];
    int p = 0;
    for (int h = 0; h < depthInput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                arr[p] = layer[h][i,j];
                p++;
            }
        }
    }
    return arr;
}

那么它实际上是一个问题,如果是如何解决它 附注我还没有训练过cnn

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐


Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其他元素将获得点击?
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。)
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbcDriver发生异常。为什么?
这是用Java进行XML解析的最佳库。
Java的PriorityQueue的内置迭代器不会以任何特定顺序遍历数据结构。为什么?
如何在Java中聆听按键时移动图像。
Java“Program to an interface”。这是什么意思?
Java在半透明框架/面板/组件上重新绘画。
Java“ Class.forName()”和“ Class.forName()。newInstance()”之间有什么区别?
在此环境中不提供编译器。也许是在JRE而不是JDK上运行?
Java用相同的方法在一个类中实现两个接口。哪种接口方法被覆盖?
Java 什么是Runtime.getRuntime()。totalMemory()和freeMemory()?
java.library.path中的java.lang.UnsatisfiedLinkError否*****。dll
JavaFX“位置是必需的。” 即使在同一包装中
Java 导入两个具有相同名称的类。怎么处理?
Java 是否应该在HttpServletResponse.getOutputStream()/。getWriter()上调用.close()?
Java RegEx元字符(。)和普通点?