如何解决神经网络只在顺序给定几个模式时学习最后一个模式
This post 是关于同样的问题,但没有给出正确的答案。由于这个问题似乎很普遍,我会将我的代码隐藏在幕后。
在 this source 之后,我编写了一个网络,当我给它一个带有目标向量的训练示例时,该网络运行良好。使用梯度下降我最小化成本函数,使网络在给定相应的输入向量时提供目标向量。 但这仅适用于一个示例!
神经网络的主要目标是对不同的输入做出不同的反应,我们应该能够训练它这样做。我尝试通过为每个示例计算的 delta-weights 的平均值来更改网络权重,但失败了:训练过程卡住了输出向量,其中包含训练集中所有目标向量的平均值。没有留下任何想法,也没有找到可以解释的来源。
如何使用一组示例而不是仅使用一个输入向量来训练神经网络?
更新
对于那些想知道的人,我将在下面附上我的代码。尝试运行它,您会看到它提供了 0 1
,而不是输出 0.5 0.5
,这是减去平均增量权重的结果。
import numpy as np
from sympy import symbols,lambdify
from sympy.functions.elementary.exponential import exp
from time import sleep
x = symbols('x')
sigmoid = exp(x) / (1 + exp(x))
sigmoid_der = sigmoid.diff(x)
sigmoid = lambdify(x,sigmoid)
sigmoid_der = lambdify(x,sigmoid_der)
class Neuron:
def __init__(self,amount_of_inputs: int,hidden = True):
self.inputs = np.random.rand(amount_of_inputs) if hidden else np.array([1])
self.bias = 0.0
self._activation = 0.0
self._wsum = 0.0
@property
def activation(self) -> float:
return self._activation
@property
def wsum(self) -> float:
return self._wsum
def calculate(self,indata):
wval = self.inputs * indata + self.bias
self._wsum = wval.sum()
self._activation = sigmoid(self._wsum)
class NeuralNetwork:
def __init__(self,shape: tuple):
self.shape = shape
self.layers = len(self.shape)
self.network = [None for _ in range(self.layers)]
self.network[0] = tuple([Neuron(1,hidden = False) for _ in range(shape[0])])
for L in range(1,self.layers):
self.network[L] = tuple([Neuron(shape[L - 1]) for _ in range(shape[L])])
self.network = tuple(self.network)
y = [symbols(f'y[{i}]') for i in range(shape[self.layers - 1])]
a = [symbols(f'a[{i}]') for i in range(shape[self.layers - 1])]
self.cost_function = sum([(y[i] - a[i]) ** 2 / 2 for i in range(shape[self.layers - 1])])
self.gradient = tuple([self.cost_function.diff(a[i]) for i in range(shape[self.layers - 1])])
self.cost_function = lambdify((y,a),self.cost_function)
self.gradient = lambdify((y,self.gradient)
def getLayer(self,L):
return np.array([self.network[L][i].activation for i in range(self.shape[L])])
def getWeightedSum(self,L):
return np.array([self.network[L][i].wsum for i in range(self.shape[L])])
def getInputsMatrix(self,L):
return np.array([self.network[L][i].inputs for i in range(self.shape[L])])
def calculate(self,values):
for i in range(self.shape[0]):
self.network[0][i].calculate(values[i])
for L in range(1,self.layers):
indata = self.getLayer(L - 1)
for j in range(self.shape[L]):
self.network[L][j].calculate(indata)
def get_result(self) -> tuple:
return tuple([self.network[self.layers - 1][i].activation for i in range(self.shape[self.layers - 1])])
def teach(self,targets,examples):
if len(targets) != len(examples):
raise TypeError("The amounts of target and input vectors do not coincide")
activations = [None for _ in range(len(examples))]
delta = activations.copy()
cost_is_low_enough = False
while not cost_is_low_enough:
for x in range(len(examples)):
self.calculate(examples[x])
activations[x] = [self.getLayer(l) for l in range(self.layers)]
delta[x] = [None for _ in range(self.layers - 1)]
network_output = self.getLayer(self.layers - 1)
output_weighted = self.getWeightedSum(self.layers - 1)
gradient_vector = np.array(self.gradient(targets[x],network_output))
delta[x][-1] = gradient_vector * sigmoid_der(output_weighted)
for l in range(self.layers - 2,-1):
weight_matrix = self.getInputsMatrix(l + 1).transpose()
output_weighted = self.getWeightedSum(l)
activation = self.getLayer(l)
for j in range(self.shape[l]):
delta[x][l - 1] = (weight_matrix @ delta[x][l]) * sigmoid_der(output_weighted) * activation
dw = [None for _ in range(self.layers - 1)]
for x in range(len(examples)):
self.calculate(examples[x])
for l in range(self.layers - 1):
dw[l] = np.empty(self.shape[l + 1])
for j in range(self.shape[l + 1]):
dw[l][j] = np.mean([delta[x][l][j] for x in range(len(examples))])
for l in range(1,self.layers):
for j in range(self.shape[l]):
for k in range(self.shape[l - 1]):
self.network[l][j].inputs[k] -= 0.1 * dw[l - 1][j]
cost = 0
for x in range(len(examples)):
self.calculate(examples[x])
network_output = np.array(self.get_result())
incost = self.cost_function(targets[x],network_output)
print(network_output,incost)
cost += incost
# sleep(0.05)
cost /= len(examples)
print()
if cost < 0.001: cost_is_low_enough = True
network = NeuralNetwork((2,4,1))
examples = np.array([
[1,2],[3,4],])
targets = np.array([
[0],[1]
])
network.teach(targets,examples)
values_1 = np.array([5,10])
network.calculate(values_1)
result = network.get_result()
print(result)
'''
values_2 = np.array([3,4])
network.calculate(values_2)
result = network.get_result()
print(result)
'''
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。