微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

尽管梯度检查为特定输入返回不同的梯度,但我的反向传播实现是否正确?

如何解决尽管梯度检查为特定输入返回不同的梯度,但我的反向传播实现是否正确?

自从我上次在这里提问以来已经有一段时间了,所以如果有任何错误或遗漏,请指出,我会尽力解决它! :)

所以我的问题简而言之,对于特定的边缘情况,我的反向传播算法计算的梯度几乎不等于数值计算的梯度。

更多信息

最后,您可以找到我对神经元网络和反向传播的整个实现,其中特定函数可能是错误的,称为 calcDeltas2D 和 calcDeltasAndAdjustments(我正在努力命名)。

在 calcDeltas2D 中只计算反向传播的增量,然后在 calcDeltasAndAdjustments 中,该函数的结果用于计算最终调整(没有学习率)。

代码包括使用 Expecto 作为测试库的测试。在每个测试中,运行反向传播的结果与数值梯度进行比较。 前 3 个测试在各种网络架构组合中使用随机输入和随机权重(试图涵盖所有情况)。这些测试大部分时间都会成功(即使 epsilon 低于 1e-1 :) )。 最后一个测试用例是对所有神经元和随机权重使用常数。这个测试大部分时间都失败了(似乎随机权重对这里的结果有影响)。

代码

这可以通过 dotnet fsi PathToThisCodeFile.fsx 在 .NET 5 中顺利运行。

#r "nuget: MathNet.Numerics"
#r "nuget: MathNet.Numerics.FSharp"
#r "nuget: Expecto"

open MathNet.Numerics

let randomDoubles =
    Random.Random.doubles
    >> Array.map (fun v -> 1.0 - v * 2.0)

let randomDecimals = randomDoubles >> Array.map decimal

module Array2D =

    let inline zip arr1 arr2 =
        arr1
        |> Array2D.mapi (fun i j e1 -> (e1,Array2D.get arr2 i j))

    let inline toSeq (arr: 'a [,]) =
        seq {
            for i = 0 to Array2D.length1 arr - 1 do
                for j = 0 to Array2D.length2 arr - 1 do
                    yield arr.[i,j]
        }

let sigmaFun value = 1.0 / (1.0 + (exp (-float value)))

[<AutoOpen>]
module NN2D =
    type Node = decimal [,]

    type Layer = Node [,]

    type Network2D = { layers: Layer [] }

    type LayerResult =
        { activation: decimal [,]
          intervalues: decimal [,] list }

    type PositionNetwork = { layer: int; node: int * int }

    type CNN =
        { filterMatrices: decimal [,] []
          network: Network2D }

    let getValues2D (inputX: decimal [,]) (network: Network2D) sigmaFun =
        network.layers
        |> Array.fold
            (fun (state: LayerResult) nodes ->
                let activation = state.activation
                let ls = state.intervalues

                let res: decimal [,] =
                    nodes
                    |> Array2D.map
                        (fun weights ->
                            Array2D.zip activation weights
                            |> Array2D.map (fun (a,b) -> a * b)
                            |> Array2D.toSeq
                            |> Seq.sort
                            |> Seq.sum
                            |> sigmaFun)

                { LayerResult.activation = res
                  LayerResult.intervalues = List.append ls [ res ] })
            { LayerResult.activation = inputX
              LayerResult.intervalues = [] }

    let calcDeltas2D (inputX: decimal [,]) network sigmaFun (expected: decimal [,]): decimal [,] [] * LayerResult =
        let layerResult = getValues2D inputX network sigmaFun
        let lastLayerIndex = network.layers.Length - 1

        let deltas: decimal [,] [] =
            [| for layer in network.layers -> Array2D.zeroCreate (Array2D.length1 layer) (Array2D.length2 layer) |]

        layerResult.intervalues.[lastLayerIndex]
        |> Array2D.iteri
            (fun j1 j2 o_j -> deltas.[lastLayerIndex].[j1,j2] <- (o_j - expected.[j1,j2]) * (o_j * (1.0m - o_j)))

        for layer in lastLayerIndex - 1 .. -1 .. 0 do
            layerResult.intervalues.[layer]
            |> Array2D.iteri
                (fun j1 j2 o_j ->
                    let deltaSum =
                        let upperLayer = layer + 1

                        layerResult.intervalues.[upperLayer]
                        |> Array2D.mapi
                            (fun l1 l2 _ ->
                                let delta_l = deltas.[upperLayer].[l1,l2]

                                let w_jl =
                                    network.layers.[upperLayer].[l1,l2].[j1,j2]

                                delta_l * w_jl)
                        |> Array2D.toSeq
                        |> Seq.sort
                        |> Seq.sum

                    let delta_j = deltaSum * (o_j * (1.0m - o_j))
                    deltas.[layer].[j1,j2] <- delta_j)

        deltas,layerResult

    let calcDeltasAndAdjustments (input: decimal [,]) (network) sigmaFun (expected: decimal [,]) =
        let deltasBeforePoolingLayer,layerResult =
            calcDeltas2D input network sigmaFun expected

        let intervalues =
            input :: layerResult.intervalues |> List.toArray

        network.layers
        |> Array.take (Array.length intervalues - 1)
        |> Array.mapi
            (fun layerNumber nodes ->
                nodes
                |> Array2D.mapi
                    (fun j1 j2 weights ->
                        weights
                        |> Array2D.mapi
                            (fun i1 i2 _ ->
                                deltasBeforePoolingLayer.[layerNumber].[j1,j2]
                                * intervalues.[(layerNumber - 1) + 1].[i1,i2]))),layerResult

    open Expecto
    open Expecto.Logging
    open Expecto.Logging.Message

    let private _tests =
        let sigmaFun (value: decimal) =
            1.0 / (1.0 + (exp (-float value))) |> decimal

        let logger = Log.create "2D NN Tests"

        testList
            "2-Dimensional NN functionality tests"
            [ let compareGradients
                network
                input
                (expectedValues: decimal [,])
                (adjustments: decimal [,] [,] [])
                logging
                =
                  network.layers
                  |> Array.mapi
                      (fun layerNumber nodes ->
                          nodes
                          |> Array2D.mapi
                              (fun j1 j2 weights ->
                                  Array2D.zip adjustments.[layerNumber].[j1,j2] weights
                                  |> Array2D.mapi
                                      (fun i1 i2 (adjustment,weight) ->
                                          let calcNetworkWithWeightAdjustment epsilon =
                                              let weights = network.layers.[layerNumber].[j1,j2]
                                              let oldWeight = weight
                                              let res1 = getValues2D input network sigmaFun
                                              weights.[i1,i2] <- oldWeight + epsilon
                                              let res = getValues2D input network sigmaFun

                                              if logging then
                                                  eventX
                                                      $"weight: %A{(i1,i2)},value: {oldWeight},res: %A{res.activation},{
                                                                                                                                res1.activation = res.activation
                                                      }"
                                                  |> logger.info

                                              weights.[i1,i2] <- oldWeight

                                              let acc =
                                                  res.activation
                                                  |> Array2D.mapi
                                                      (fun g h activation ->
                                                          (expectedValues.[g,h] - activation)
                                                          * (expectedValues.[g,h] - activation))
                                                  |> Array2D.toSeq
                                                  |> Seq.sort
                                                  |> Seq.sum

                                              0.5m * acc

                                          let numericalGradient =
                                              let epsilons = [| 99399e-5m |]
                                              if logging then printfn "%A" epsilons
                                              let avgEpsilon = epsilons |> Array.average

                                              let calcNetworkWithShiftedWeightMultipleEpsilons epsilons negate =
                                                  let transform = if negate then fun e -> -e else id

                                                  epsilons
                                                  |> Array.averageBy
                                                      (fun e ->
                                                          let res =
                                                              transform e |> calcNetworkWithWeightAdjustment

                                                          res)

                                              let res1,res2 =
                                                  calcNetworkWithShiftedWeightMultipleEpsilons epsilons false,calcNetworkWithShiftedWeightMultipleEpsilons epsilons true

                                              let a =
                                                  match (res1 - res2) with
                                                  | 0.m -> 1e-20m
                                                  | value -> value

                                              (a) / (2.0m * avgEpsilon)

                                          let dist =
                                              abs (adjustment - numericalGradient)
                                              / (abs adjustment + abs numericalGradient)
                                          if logging && dist > 1.0e-1m then
                                              eventX
                                                  $"calculated gradient: {adjustment},numerical gradient: {
                                                                                                                numericalGradient
                                                  },dist: {dist}"
                                              |> logger.info

                                          dist <= 1.0e-1m)))

              test "Check that Calculated gradients are nearly equal to the numerical gradients | Single Node in all Layers" {
                  let input =
                      randomDecimals 1 |> Array.singleton |> array2D

                  let network =
                      { Network2D.layers =
                            [| array2D [| [| array2D [| randomDecimals input.Length |] |] |]
                               array2D [| [| array2D [| randomDecimals 1 |> Array.map decimal |] |] |] |] }

                  let expectedValues =
                      randomDecimals 1 |> Array.singleton |> array2D

                  let adjustments,_ =
                      calcDeltasAndAdjustments input network sigmaFun expectedValues

                  let expected =
                      network.layers
                      |> Array.map (Array2D.map (Array2D.map (fun _ -> true)))

                  let actual =
                      compareGradients network input expectedValues adjustments false

                  Expect.sequenceEqual actual expected "gradients"
              }

              test "Check that Calculated gradients are nearly equal to the numerical gradients | Single Node only in last layer" {
                  let input = array2D [| randomDecimals 10 |]
                  let innerLayerSize = 3

                  let network =
                      { Network2D.layers =
                            [| Array2D.init
                                innerLayerSize
                                innerLayerSize
                                (fun _ _ -> array2D [| randomDecimals <| Array2D.length2 input |])
                               array2D [| [| array2D [| for _ in 0 .. innerLayerSize - 1 ->
                                                            randomDecimals innerLayerSize |] |] |] |] }

                  let expectedValues =
                      randomDecimals 1 |> Array.singleton |> array2D

                  let adjustments,_ =
                      calcDeltasAndAdjustments input network sigmaFun expectedValues

                  let expected =
                      network.layers
                      |> Array.map (Array2D.map (Array2D.map (fun _ -> true)))

                  let actual =
                      compareGradients network input expectedValues adjustments false

                  Expect.sequenceEqual actual expected "gradients"
              }

              test "Check that Calculated gradients are nearly equal to the numerical gradients | Multi Node in all Layers" {
                  let input = array2D [| randomDecimals 10 |]
                  let innerLayerSize = 3

                  let network =
                      { Network2D.layers =
                            [| Array2D.init
                                innerLayerSize
                                innerLayerSize
                                (fun _ _ -> array2D [| randomDecimals <| Array2D.length2 input |])
                               array2D [| [| for _ in 0 .. innerLayerSize - 1 ->
                                                 array2D [| for _ in 0 .. innerLayerSize - 1 ->
                                                                randomDecimals innerLayerSize |] |] |] |] }

                  let expectedValues =
                      randomDecimals innerLayerSize
                      |> Array.singleton
                      |> array2D

                  let adjustments,_ =
                      calcDeltasAndAdjustments input network sigmaFun expectedValues

                  let expected =
                      network.layers
                      |> Array.map (Array2D.map (Array2D.map (fun _ -> true)))

                  let actual =
                      compareGradients network input expectedValues adjustments false

                  Expect.sequenceEqual actual expected "gradients"
              }

              test "regression 1" {
                  let input = Array2D.create 1 5 -0.2m
                  let expectedValues = array2D [ [ 1.0m ] ]

                  let network =
                      let layer1X = 5
                      let layer1Y = 1

                      let layers =
                          [| Array2D.create layer1X layer1Y
                             <| array2D (Array.init 1 (fun _ -> randomDecimals 5))
                             Array2D.create 1 1
                             <| array2D (Array.init layer1X (fun _ -> randomDecimals layer1Y)) |]

                      { layers = layers }

                  let adjustments,_ =
                      calcDeltasAndAdjustments input network sigmaFun expectedValues

                  let expected =
                      network.layers
                      |> Array.map (Array2D.map (Array2D.map (fun _ -> true)))

                  let actual =
                      compareGradients network input expectedValues adjustments true

                  Expect.sequenceEqual actual expected "gradients"
              } ]
        |> runTestsWithCLIArgs [] [||]

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。