使用神经网络和反向传播来进行数字相加。

huangapple go评论75阅读模式
英文:

Adding numbers using Neural network + back propagation

问题

我正在尝试使用神经网络将数字相加,但似乎无法使反向传播函数正常工作。

这是神经网络的外观,其中 W1 = x1,W2 = x2,W3 = y1,W4 = y2,W5 = z1,W6 = z2。

这是到目前为止我的代码:

from random import randint, random, uniform
import numpy as np

class Data:
    data_dict = {}
    def __init__(self, limit):
        self.limit = limit
    '''creates data but beware that the limit may not be the same as the size of the dictionary'''
    def create_data(self):
        for i in range(self.limit):
            num1 = randint(0, 100)
            num2 = randint(0, 100)
            self.data_dict[(num1, num2)] = num1 + num2

'''you compare the error with every test in the data set and find weights that minimise the error'''
class Neural:
    def __init__(self, data):
        self.x1 = uniform(-1, 1)
        self.x2 = uniform(-1, 1)
        self.y1 = uniform(-1, 1)
        self.y2 = uniform(-1, 1)
        self.z1 = uniform(-1, 1)
        self.z2 = uniform(-1, 1)
        self.data = data

    def relu(self, number):
        return max(0, number)

    def sigmoid(self, number):
         return 1 / (1 + np.exp(-number))

    '''weighted summation with activation function to compute output'''
    def compute_output(self, num1, num2):
        hidden_layer_input1 = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
        hidden_layer_input2 = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
        return ((hidden_layer_input1 * self.z1) + (hidden_layer_input2 * self.z2))

    '''mean squared error between the actual output with the output generated by the algorithm'''
    def compare_output(self, data):
        '''actually, better to find error between all tests. add all the errors up'''
        error = 0
        for key in data.data_dict:
            error += abs(data.data_dict[key] - self.compute_output(key[0], key[1])) ** 2
        return error / len(data.data_dict)

    '''TODO function that changes the weight depending on the errors using gradient descent'''
    '''first make it random'''
    '''next perhaps change weights for each test and average out the adjustments for each weight'''
    def random_back_propagation(self):
        error = 100000
        while error > 0.1:
            self.x1 = random()
            self.x2 = random()
            self.y1 = random()
            self.y2 = random()
            self.z1 = random()
            self.z2 = random()
            error = self.compare_output(self.data)
            print(error)
        print(self.compute_output(140, 15))

    '''learning rate is the amount the weights are updated during training'''
    def back_propagation(self, learning_rate):
        for _ in range(1000):
            for key in self.data.data_dict:
                num1, num2 = key
                target = self.data.data_dict[key]
                hidden_layer1_output = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
                hidden_layer2_output = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
                output = ((hidden_layer1_output * self.z1) + (hidden_layer2_output * self.z2))
                error = target - output

                output_unit1 = output * (1 - output) * (error)
                hidden_unit1 = hidden_layer1_output * (1 - hidden_layer1_output) * self.z1 * output_unit1
                hidden_unit2 = hidden_layer2_output * (1 - hidden_layer2_output) * self.z2 * output_unit1

                self.z1 += (learning_rate * output * hidden_layer1_output)
                self.z2 += (learning_rate * output * hidden_layer2_output)

                self.x2 += (learning_rate * hidden_unit2 * num1)
                self.x1 += (learning_rate * hidden_unit1 * num1)
                self.y1 += (learning_rate * hidden_unit1 * num2)
                self.y2 += (learning_rate * hidden_unit2 * num2)

                print(self.x1, self.x2, self.y1, self.y2, self.z1, self.z2)
            print(num1, num2, self.compute_output(num1, num2))

data = Data(200)
data.create_data()
neural = Neural(data)
neural.back_propagation(0.01)
print(neural.compute_output(15, 7))
英文:

I am trying to use neural networks to add numbers toegther but i can't seem to make the back propagation function working.

使用神经网络和反向传播来进行数字相加。

This is how the neural network looks like where W1 = x1, W2 = x2, W3 = y1, W4 = y2, W5 = z1 and W6 = z2
This is my code so far:

from random import randint,random ,uniform
import numpy as np 
class Data:
data_dict= {}
def __init__(self,limit):
self.limit = limit
'''creates data but beware that the limit may not be the same as the size of the dictionary''' 
def create_data(self):
for i in range(self.limit):
num1 = randint(0,100)
num2 = randint(0,100)
self.data_dict[(num1,num2)] = num1+num2
''' you compare the error with every test in the data set and find weights that minimise the error'''
class Neural:
def __init__(self,data):
self.x1 = uniform(-1,1) 
self.x2 = uniform(-1,1) 
self.y1 = uniform(-1,1) 
self.y2 = uniform(-1,1) 
self.z1 = uniform(-1,1) 
self.z2 = uniform(-1,1) 
self.data=data
def relu(self,number):
return max(0,number)
def sigmoid(self,number):
return 1/(1 + np.exp(-number))
'''weighted summation with activation function to compute output '''
def compute_output(self,num1,num2):
hidden_layer_input1 = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
hidden_layer_input2 = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
return ((hidden_layer_input1*self.z1) + (hidden_layer_input2 * self.z2))
'''mean swaured error error between the actual output with the output generated by the algorithm '''
def compare_ouput(self,data):
'''actually,better to find error between all tests. add all the errors up'''
error = 0
for key in data.data_dict:
error += abs(data.data_dict[key] - self.compute_output(key[0],key[1])) **2
return error/len(data.data_dict)
# return abs(actual - self.compute_output(num1,num2))
'''TODO function that changes the weight depending on the errors using gradient descent'''
'''first make it random'''
'''next perhaps change weights for each test and average out the adjustments for each weight'''
def random_back_propagation(self):
error = 100000
while error>0.1:
self.x1 = random() 
self.x2 = random()
self.y1 = random()
self.y2 = random()
self.z1 = random()
self.z2 = random()
error = self.compare_ouput(self.data)
print(error)
print(self.compute_output(140,15))     
'''learning rate is the amount the weights are updated during training'''
def back_propagation(self, learning_rate):
for _ in range(1000):
for key in self.data.data_dict:
num1, num2 = key
target = self.data.data_dict[key]
hidden_layer1_output = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
hidden_layer2_output = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
output = ((hidden_layer1_output * self.z1) + (hidden_layer2_output * self.z2))
error = target - output
#check if you are happpy with the error    
output_unit1  = output * (1 - output) * (error)
hidden_unit1 = hidden_layer1_output * (1 - hidden_layer1_output) * self.z1 * output_unit1
hidden_unit2 = hidden_layer2_output * (1 - hidden_layer2_output) * self.z2 * output_unit1
self.z1 += (learning_rate * output * hidden_layer1_output)
self.z2 += (learning_rate * output * hidden_layer2_output) 
self.x2 += (learning_rate * hidden_unit2 * num1)
self.x1 += (learning_rate * hidden_unit1 * num1)
self.y1 += (learning_rate * hidden_unit1 * num2) 
self.y2 += (learning_rate * hidden_unit2 * num2)
print(self.x1,self.x2,self.y1,self.y2,self.z1,self.z2)
print(num1,num2,self.compute_output(num1,num2))        
data = Data(200)
data.create_data()
neural = Neural(data)
neural.back_propagation(0.01)
#print(data.data_array)
#print(uniform(-1,1))
print(neural.compute_output(15,7))

I tried changing the learning rate, number of itterations, number of items in the data set but I'm not sure if the problem is with trying to find correct values to use or if my function is just incorrect

答案1

得分: 0

有一些问题存在于你目前的方法中。我将尽力在接下来的部分进行详细说明。

损失函数

你目前使用的损失函数是error = target - output。乍一看似乎可以,因为如果targetoutput相等,error就是0。但请记住,网络将尝试最小化损失函数,在这种情况下,实现这一目标的一种方式是生成一个极大的output,使error为负数。

我建议使用类似MSE(均方误差)的方法:error = (target - output) ** 2。这样,如果网络想要将其最小化,它将不得不尽量使targetoutput相等。

梯度更新

当前你正在使用正梯度来更新参数self.z1 += (learning_rate * gradient)。梯度指向损失函数增加最快的方向。我们不想增加损失,我们想朝着最大减少的方向前进,因此我们使用负梯度self.z1 -= (learning_rate * gradient)

Sigmoid函数

你在一些隐藏单元中使用了Sigmoid作为激活函数。这是Sigmoid的图形:
使用神经网络和反向传播来进行数字相加。

注意当X > 6或X < -6时,Sigmoid基本上变为一条水平线。这条水平线转化为极小的梯度,几乎为0,这意味着参数更新极其缓慢,甚至根本不更新。网络饱和了。

你输入到网络的数据是比神经网络通常处理的数字更大的数字。考虑到你的一个隐藏单元的权重被随机初始化为0.5和0.3。现在你向网络输入你想要求和的数字:50和20。隐藏单元的计算将是sigmoid(50 * 0.5 + 20 * 0.3),即sigmoid(31),其导数实际上是0。

如果你仍想使用Sigmoid,我建议预处理你的输入数据,以便网络不会接收到如此大的数字。例如,你可以将每个输入除以100,因为训练数据中可能的最大整数是100。

将所有内容放在一起

下面是修复了上述问题的代码。由于它使用了不同的损失函数,所有的梯度都是不同的(随时验证,因为我对微积分不太自信)。我还添加了一个打印语句以跟踪跨时期的损失。

from random import randint, random, uniform
import numpy as np

...

# (剩余代码略)
英文:

There are some problems with your current approach. I'll try to elaborate in the following sections.

Loss function

The loss function you are currently using is error = target - output. At first glance it seems to work, because if target and output are equal, error will be 0. But remember that the network will try to minimize the loss function, and in this case one of the ways to do it is by generating an extremely large output, so that error will be negative.

I suggest using something like an MSE (Mean Squared Error): error = (target - output) ** 2. That way, if the network wants to minimize it, it will have no choice but to try to make target and output equal.

Gradient updates

Currently you are updating your parameters with the positive gradient self.z1 += (learning_rate * gradient). The gradient points to the direction of greatest increase of the loss function. We don't want to increase the loss. We want to go in the direction of greatest decrease, so we use the negative gradient self.z1 -= (learning_rate * gradient).

The sigmoid function

You are using a sigmoid as the activation function for some of your hidden units. Here's the sigmoid graph:
使用神经网络和反向传播来进行数字相加。

Notice that when X > 6 or X < -6, the sigmoid basically flatlines. This flatline translates into an extremely small gradient, practically 0, and that means the parameters update extremely slowly or not at all. The network is saturated.

The data that you are feeding to the network consists of numbers that are larger than what neural networks normally work with. Consider that one of your hidden units had its weights randomly initialized to 0.5 and 0.3. Now you feed the network with the numbers you wish to sum: 50 and 20. The computation at the hidden unit will be sigmoid(50 * 0.5 + 20 * 0.3) which is sigmoid(31), and its derivative is effectively 0.

If you still wanna use the sigmoid, I recommend that you preprocess your input data so that the network won't receive such large numbers. For example, you can divide every input by 100, as the largest possible integer in the training data is 100.

Putting it all together

Below is the code with the above mentioned problems fixed. As it uses a different loss function, all of the gradients are different (feel free to double check as I'm not that confident in calculus). I also added a print to keep track of the loss across epochs.

from random import randint, random, uniform
import numpy as np


class Data:
    data_dict = {}

    def __init__(self, limit):
        self.limit = limit

    &quot;&quot;&quot;creates data but beware that the limit may not be the same as the size of the dictionary&quot;&quot;&quot;

    def create_data(self):
        for i in range(self.limit):
            num1 = randint(0, 100)
            num2 = randint(0, 100)
            self.data_dict[(num1, num2)] = num1 + num2


&quot;&quot;&quot; you compare the error with every test in the data set and find weights that minimise the error&quot;&quot;&quot;


class Neural:
    def __init__(self, data):
        self.x1 = uniform(-1, 1)
        self.x2 = uniform(-1, 1)
        self.y1 = uniform(-1, 1)
        self.y2 = uniform(-1, 1)
        self.z1 = uniform(-1, 1)
        self.z2 = uniform(-1, 1)
        self.data = data

    def relu(self, number):
        return max(0, number)

    def sigmoid(self, number):
        return 1 / (1 + np.exp(-number))

    &quot;&quot;&quot;weighted summation with activation function to compute output &quot;&quot;&quot;

    def compute_output(self, num1, num2):
        num1, num2 = num1 / 100, num2 / 100
        hidden_layer_input1 = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
        hidden_layer_input2 = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
        return 100 * ((hidden_layer_input1 * self.z1) + (hidden_layer_input2 * self.z2))

    &quot;&quot;&quot;mean swaured error error between the actual output with the output generated by the algorithm &quot;&quot;&quot;

    def compare_ouput(self, data):
        &quot;&quot;&quot;actually,better to find error between all tests. add all the errors up&quot;&quot;&quot;
        error = 0
        for key in data.data_dict:
            error += abs(data.data_dict[key] - self.compute_output(key[0], key[1])) ** 2
        return error / len(data.data_dict)

    # return abs(actual - self.compute_output(num1,num2))

    &quot;&quot;&quot;TODO function that changes the weight depending on the errors using gradient descent&quot;&quot;&quot;
    &quot;&quot;&quot;first make it random&quot;&quot;&quot;
    &quot;&quot;&quot;next perhaps change weights for each test and average out the adjustments for each weight&quot;&quot;&quot;

    def random_back_propagation(self):
        error = 100000
        while error &gt; 0.1:
            self.x1 = random()
            self.x2 = random()
            self.y1 = random()
            self.y2 = random()
            self.z1 = random()
            self.z2 = random()
            error = self.compare_ouput(self.data)
            print(error)
        print(self.compute_output(140, 15))

        &quot;&quot;&quot;learning rate is the amount the weights are updated during training&quot;&quot;&quot;

    def back_propagation(self, learning_rate):
        for epoch in range(1000):
            errors = []
            for key in self.data.data_dict:

                num1, num2 = key
                target = self.data.data_dict[key]

                # Rescaling everything
                num1, num2 = num1 / 100, num2 / 100
                target = target / 100

                hidden_layer1_output = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
                hidden_layer2_output = self.sigmoid((num1 * self.x2) + (num2 * self.y2))

                output = (hidden_layer1_output * self.z1) + (hidden_layer2_output * self.z2)

                # loss
                error = (target - output) ** 2
                # check if you are happpy with the error

                # derivative of error with respect to output
                # d(error)/d(output) = -2 * (target - output)

                # derivative of error with respect to x1
                # d(error)/d(x1) = d(error)/d(output) * d(output)/d(x1)
                #                                                      /-&gt; hidden_layer1_output
                # d(error)/d(x1) = d(error)/d(output) * ( d(output)/d(hl1) * d(hl1)/d(x1) )

                #   derivative of output with respect to hl1
                #   d(output)/d(hl1) = z1

                #   derivative of hl1 with respect to x1
                #   d(hl1)/d(x1) = hidden_layer1_output * (1 - hidden_layer1_output) * num1

                # d(error)/d(x1) = -2 * (target - output) * z1 * hidden_layer1_output * (1 - hidden_layer1_output) * num1

                self.x1 -= learning_rate * -2 * (target - output) * self.z1 * hidden_layer1_output * (1 - hidden_layer1_output) * num1
                self.y1 -= learning_rate * -2 * (target - output) * self.z1 * hidden_layer1_output * (1 - hidden_layer1_output) * num2
                self.x2 -= learning_rate * -2 * (target - output) * self.z2 * hidden_layer2_output * (1 - hidden_layer2_output) * num1
                self.y2 -= learning_rate * -2 * (target - output) * self.z2 * hidden_layer2_output * (1 - hidden_layer2_output) * num2

                # derivative of error with respect to z1
                # d(error)/d(z1) = d(error)/d(output) * d(output)/d(z1)

                #   derivative of output with respect to z1
                #   d(output)/d(z1) = hidden_layer1_output

                # d(error)/d(z1) = -2(target - output) * hidden_layer1_output
                self.z1 -= learning_rate * -2 * (target - output) * hidden_layer1_output
                self.z2 -= learning_rate * -2 * (target - output) * hidden_layer2_output

                # print(self.x1, self.x2, self.y1, self.y2, self.z1, self.z2)
                errors.append(error)
            print(f&quot;Mean error: {np.mean(errors)}&quot;)


data = Data(2000)
data.create_data()

neural = Neural(data)
neural.back_propagation(0.1)
print(&quot;#################################PREDICTIONS############################################&quot;)
print(f&quot;15 + 7 = {neural.compute_output(15, 7)}&quot;)
print(f&quot;3 + 2 = {neural.compute_output(3, 2)}&quot;)
print(f&quot;50 + 70 = {neural.compute_output(50, 70)}&quot;)

huangapple
  • 本文由 发表于 2023年5月17日 20:37:14
  • 转载请务必保留本文链接:https://go.coder-hub.com/76272182.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定