如何解决尽管梯度检查为特定输入返回不同的梯度,但我的反向传播实现是否正确?
自从我上次在这里提问以来已经有一段时间了,所以如果有任何错误或遗漏,请指出,我会尽力解决它! :)
所以我的问题简而言之,对于特定的边缘情况,我的反向传播算法计算的梯度几乎不等于数值计算的梯度。
更多信息
最后,您可以找到我对神经元网络和反向传播的整个实现,其中特定函数可能是错误的,称为 calcDeltas2D 和 calcDeltasAndAdjustments(我正在努力命名)。
在 calcDeltas2D 中只计算反向传播的增量,然后在 calcDeltasAndAdjustments 中,该函数的结果用于计算最终调整(没有学习率)。
代码包括使用 Expecto 作为测试库的测试。在每个测试中,运行反向传播的结果与数值梯度进行比较。 前 3 个测试在各种网络架构组合中使用随机输入和随机权重(试图涵盖所有情况)。这些测试大部分时间都会成功(即使 epsilon 低于 1e-1 :) )。 最后一个测试用例是对所有神经元和随机权重使用常数。这个测试大部分时间都失败了(似乎随机权重对这里的结果有影响)。
代码:
这可以通过 dotnet fsi PathToThisCodeFile.fsx
在 .NET 5 中顺利运行。
#r "nuget: MathNet.Numerics"
#r "nuget: MathNet.Numerics.FSharp"
#r "nuget: Expecto"
open MathNet.Numerics
let randomDoubles =
Random.Random.doubles
>> Array.map (fun v -> 1.0 - v * 2.0)
let randomDecimals = randomDoubles >> Array.map decimal
module Array2D =
let inline zip arr1 arr2 =
arr1
|> Array2D.mapi (fun i j e1 -> (e1,Array2D.get arr2 i j))
let inline toSeq (arr: 'a [,]) =
seq {
for i = 0 to Array2D.length1 arr - 1 do
for j = 0 to Array2D.length2 arr - 1 do
yield arr.[i,j]
}
let sigmaFun value = 1.0 / (1.0 + (exp (-float value)))
[<AutoOpen>]
module NN2D =
type Node = decimal [,]
type Layer = Node [,]
type Network2D = { layers: Layer [] }
type LayerResult =
{ activation: decimal [,]
intervalues: decimal [,] list }
type PositionNetwork = { layer: int; node: int * int }
type CNN =
{ filterMatrices: decimal [,] []
network: Network2D }
let getValues2D (inputX: decimal [,]) (network: Network2D) sigmaFun =
network.layers
|> Array.fold
(fun (state: LayerResult) nodes ->
let activation = state.activation
let ls = state.intervalues
let res: decimal [,] =
nodes
|> Array2D.map
(fun weights ->
Array2D.zip activation weights
|> Array2D.map (fun (a,b) -> a * b)
|> Array2D.toSeq
|> Seq.sort
|> Seq.sum
|> sigmaFun)
{ LayerResult.activation = res
LayerResult.intervalues = List.append ls [ res ] })
{ LayerResult.activation = inputX
LayerResult.intervalues = [] }
let calcDeltas2D (inputX: decimal [,]) network sigmaFun (expected: decimal [,]): decimal [,] [] * LayerResult =
let layerResult = getValues2D inputX network sigmaFun
let lastLayerIndex = network.layers.Length - 1
let deltas: decimal [,] [] =
[| for layer in network.layers -> Array2D.zeroCreate (Array2D.length1 layer) (Array2D.length2 layer) |]
layerResult.intervalues.[lastLayerIndex]
|> Array2D.iteri
(fun j1 j2 o_j -> deltas.[lastLayerIndex].[j1,j2] <- (o_j - expected.[j1,j2]) * (o_j * (1.0m - o_j)))
for layer in lastLayerIndex - 1 .. -1 .. 0 do
layerResult.intervalues.[layer]
|> Array2D.iteri
(fun j1 j2 o_j ->
let deltaSum =
let upperLayer = layer + 1
layerResult.intervalues.[upperLayer]
|> Array2D.mapi
(fun l1 l2 _ ->
let delta_l = deltas.[upperLayer].[l1,l2]
let w_jl =
network.layers.[upperLayer].[l1,l2].[j1,j2]
delta_l * w_jl)
|> Array2D.toSeq
|> Seq.sort
|> Seq.sum
let delta_j = deltaSum * (o_j * (1.0m - o_j))
deltas.[layer].[j1,j2] <- delta_j)
deltas,layerResult
let calcDeltasAndAdjustments (input: decimal [,]) (network) sigmaFun (expected: decimal [,]) =
let deltasBeforePoolingLayer,layerResult =
calcDeltas2D input network sigmaFun expected
let intervalues =
input :: layerResult.intervalues |> List.toArray
network.layers
|> Array.take (Array.length intervalues - 1)
|> Array.mapi
(fun layerNumber nodes ->
nodes
|> Array2D.mapi
(fun j1 j2 weights ->
weights
|> Array2D.mapi
(fun i1 i2 _ ->
deltasBeforePoolingLayer.[layerNumber].[j1,j2]
* intervalues.[(layerNumber - 1) + 1].[i1,i2]))),layerResult
open Expecto
open Expecto.Logging
open Expecto.Logging.Message
let private _tests =
let sigmaFun (value: decimal) =
1.0 / (1.0 + (exp (-float value))) |> decimal
let logger = Log.create "2D NN Tests"
testList
"2-Dimensional NN functionality tests"
[ let compareGradients
network
input
(expectedValues: decimal [,])
(adjustments: decimal [,] [,] [])
logging
=
network.layers
|> Array.mapi
(fun layerNumber nodes ->
nodes
|> Array2D.mapi
(fun j1 j2 weights ->
Array2D.zip adjustments.[layerNumber].[j1,j2] weights
|> Array2D.mapi
(fun i1 i2 (adjustment,weight) ->
let calcNetworkWithWeightAdjustment epsilon =
let weights = network.layers.[layerNumber].[j1,j2]
let oldWeight = weight
let res1 = getValues2D input network sigmaFun
weights.[i1,i2] <- oldWeight + epsilon
let res = getValues2D input network sigmaFun
if logging then
eventX
$"weight: %A{(i1,i2)},value: {oldWeight},res: %A{res.activation},{
res1.activation = res.activation
}"
|> logger.info
weights.[i1,i2] <- oldWeight
let acc =
res.activation
|> Array2D.mapi
(fun g h activation ->
(expectedValues.[g,h] - activation)
* (expectedValues.[g,h] - activation))
|> Array2D.toSeq
|> Seq.sort
|> Seq.sum
0.5m * acc
let numericalGradient =
let epsilons = [| 99399e-5m |]
if logging then printfn "%A" epsilons
let avgEpsilon = epsilons |> Array.average
let calcNetworkWithShiftedWeightMultipleEpsilons epsilons negate =
let transform = if negate then fun e -> -e else id
epsilons
|> Array.averageBy
(fun e ->
let res =
transform e |> calcNetworkWithWeightAdjustment
res)
let res1,res2 =
calcNetworkWithShiftedWeightMultipleEpsilons epsilons false,calcNetworkWithShiftedWeightMultipleEpsilons epsilons true
let a =
match (res1 - res2) with
| 0.m -> 1e-20m
| value -> value
(a) / (2.0m * avgEpsilon)
let dist =
abs (adjustment - numericalGradient)
/ (abs adjustment + abs numericalGradient)
if logging && dist > 1.0e-1m then
eventX
$"calculated gradient: {adjustment},numerical gradient: {
numericalGradient
},dist: {dist}"
|> logger.info
dist <= 1.0e-1m)))
test "Check that Calculated gradients are nearly equal to the numerical gradients | Single Node in all Layers" {
let input =
randomDecimals 1 |> Array.singleton |> array2D
let network =
{ Network2D.layers =
[| array2D [| [| array2D [| randomDecimals input.Length |] |] |]
array2D [| [| array2D [| randomDecimals 1 |> Array.map decimal |] |] |] |] }
let expectedValues =
randomDecimals 1 |> Array.singleton |> array2D
let adjustments,_ =
calcDeltasAndAdjustments input network sigmaFun expectedValues
let expected =
network.layers
|> Array.map (Array2D.map (Array2D.map (fun _ -> true)))
let actual =
compareGradients network input expectedValues adjustments false
Expect.sequenceEqual actual expected "gradients"
}
test "Check that Calculated gradients are nearly equal to the numerical gradients | Single Node only in last layer" {
let input = array2D [| randomDecimals 10 |]
let innerLayerSize = 3
let network =
{ Network2D.layers =
[| Array2D.init
innerLayerSize
innerLayerSize
(fun _ _ -> array2D [| randomDecimals <| Array2D.length2 input |])
array2D [| [| array2D [| for _ in 0 .. innerLayerSize - 1 ->
randomDecimals innerLayerSize |] |] |] |] }
let expectedValues =
randomDecimals 1 |> Array.singleton |> array2D
let adjustments,_ =
calcDeltasAndAdjustments input network sigmaFun expectedValues
let expected =
network.layers
|> Array.map (Array2D.map (Array2D.map (fun _ -> true)))
let actual =
compareGradients network input expectedValues adjustments false
Expect.sequenceEqual actual expected "gradients"
}
test "Check that Calculated gradients are nearly equal to the numerical gradients | Multi Node in all Layers" {
let input = array2D [| randomDecimals 10 |]
let innerLayerSize = 3
let network =
{ Network2D.layers =
[| Array2D.init
innerLayerSize
innerLayerSize
(fun _ _ -> array2D [| randomDecimals <| Array2D.length2 input |])
array2D [| [| for _ in 0 .. innerLayerSize - 1 ->
array2D [| for _ in 0 .. innerLayerSize - 1 ->
randomDecimals innerLayerSize |] |] |] |] }
let expectedValues =
randomDecimals innerLayerSize
|> Array.singleton
|> array2D
let adjustments,_ =
calcDeltasAndAdjustments input network sigmaFun expectedValues
let expected =
network.layers
|> Array.map (Array2D.map (Array2D.map (fun _ -> true)))
let actual =
compareGradients network input expectedValues adjustments false
Expect.sequenceEqual actual expected "gradients"
}
test "regression 1" {
let input = Array2D.create 1 5 -0.2m
let expectedValues = array2D [ [ 1.0m ] ]
let network =
let layer1X = 5
let layer1Y = 1
let layers =
[| Array2D.create layer1X layer1Y
<| array2D (Array.init 1 (fun _ -> randomDecimals 5))
Array2D.create 1 1
<| array2D (Array.init layer1X (fun _ -> randomDecimals layer1Y)) |]
{ layers = layers }
let adjustments,_ =
calcDeltasAndAdjustments input network sigmaFun expectedValues
let expected =
network.layers
|> Array.map (Array2D.map (Array2D.map (fun _ -> true)))
let actual =
compareGradients network input expectedValues adjustments true
Expect.sequenceEqual actual expected "gradients"
} ]
|> runTestsWithCLIArgs [] [||]
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。