2023年5月17日 20:37:14go评论160阅读模式

英文:

Adding numbers using Neural network + back propagation

问题

我正在尝试使用神经网络将数字相加，但似乎无法使反向传播函数正常工作。

这是神经网络的外观，其中 W1 = x1，W2 = x2，W3 = y1，W4 = y2，W5 = z1，W6 = z2。

这是到目前为止我的代码：

from random import randint, random, uniform
import numpy as np

class Data:
    data_dict = {}
    def __init__(self, limit):
        self.limit = limit
    '''creates data but beware that the limit may not be the same as the size of the dictionary'''
    def create_data(self):
        for i in range(self.limit):
            num1 = randint(0, 100)
            num2 = randint(0, 100)
            self.data_dict[(num1, num2)] = num1 + num2

'''you compare the error with every test in the data set and find weights that minimise the error'''
class Neural:
    def __init__(self, data):
        self.x1 = uniform(-1, 1)
        self.x2 = uniform(-1, 1)
        self.y1 = uniform(-1, 1)
        self.y2 = uniform(-1, 1)
        self.z1 = uniform(-1, 1)
        self.z2 = uniform(-1, 1)
        self.data = data

    def relu(self, number):
        return max(0, number)

    def sigmoid(self, number):
         return 1 / (1 + np.exp(-number))

    '''weighted summation with activation function to compute output'''
    def compute_output(self, num1, num2):
        hidden_layer_input1 = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
        hidden_layer_input2 = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
        return ((hidden_layer_input1 * self.z1) + (hidden_layer_input2 * self.z2))

    '''mean squared error between the actual output with the output generated by the algorithm'''
    def compare_output(self, data):
        '''actually, better to find error between all tests. add all the errors up'''
        error = 0
        for key in data.data_dict:
            error += abs(data.data_dict[key] - self.compute_output(key[0], key[1])) ** 2
        return error / len(data.data_dict)

    '''TODO function that changes the weight depending on the errors using gradient descent'''
    '''first make it random'''
    '''next perhaps change weights for each test and average out the adjustments for each weight'''
    def random_back_propagation(self):
        error = 100000
        while error > 0.1:
            self.x1 = random()
            self.x2 = random()
            self.y1 = random()
            self.y2 = random()
            self.z1 = random()
            self.z2 = random()
            error = self.compare_output(self.data)
            print(error)
        print(self.compute_output(140, 15))

    '''learning rate is the amount the weights are updated during training'''
    def back_propagation(self, learning_rate):
        for _ in range(1000):
            for key in self.data.data_dict:
                num1, num2 = key
                target = self.data.data_dict[key]
                hidden_layer1_output = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
                hidden_layer2_output = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
                output = ((hidden_layer1_output * self.z1) + (hidden_layer2_output * self.z2))
                error = target - output

                output_unit1 = output * (1 - output) * (error)
                hidden_unit1 = hidden_layer1_output * (1 - hidden_layer1_output) * self.z1 * output_unit1
                hidden_unit2 = hidden_layer2_output * (1 - hidden_layer2_output) * self.z2 * output_unit1

                self.z1 += (learning_rate * output * hidden_layer1_output)
                self.z2 += (learning_rate * output * hidden_layer2_output)

                self.x2 += (learning_rate * hidden_unit2 * num1)
                self.x1 += (learning_rate * hidden_unit1 * num1)
                self.y1 += (learning_rate * hidden_unit1 * num2)
                self.y2 += (learning_rate * hidden_unit2 * num2)

                print(self.x1, self.x2, self.y1, self.y2, self.z1, self.z2)
            print(num1, num2, self.compute_output(num1, num2))

data = Data(200)
data.create_data()
neural = Neural(data)
neural.back_propagation(0.01)
print(neural.compute_output(15, 7))

英文:

I am trying to use neural networks to add numbers toegther but i can't seem to make the back propagation function working.

This is how the neural network looks like where W1 = x1, W2 = x2, W3 = y1, W4 = y2, W5 = z1 and W6 = z2
This is my code so far:

from random import randint,random ,uniform
import numpy as np 
class Data:
data_dict= {}
def __init__(self,limit):
self.limit = limit
&#39;&#39;&#39;creates data but beware that the limit may not be the same as the size of the dictionary&#39;&#39;&#39; 
def create_data(self):
for i in range(self.limit):
num1 = randint(0,100)
num2 = randint(0,100)
self.data_dict[(num1,num2)] = num1+num2
&#39;&#39;&#39; you compare the error with every test in the data set and find weights that minimise the error&#39;&#39;&#39;
class Neural:
def __init__(self,data):
self.x1 = uniform(-1,1) 
self.x2 = uniform(-1,1) 
self.y1 = uniform(-1,1) 
self.y2 = uniform(-1,1) 
self.z1 = uniform(-1,1) 
self.z2 = uniform(-1,1) 
self.data=data
def relu(self,number):
return max(0,number)
def sigmoid(self,number):
return 1/(1 + np.exp(-number))
&#39;&#39;&#39;weighted summation with activation function to compute output &#39;&#39;&#39;
def compute_output(self,num1,num2):
hidden_layer_input1 = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
hidden_layer_input2 = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
return ((hidden_layer_input1*self.z1) + (hidden_layer_input2 * self.z2))
&#39;&#39;&#39;mean swaured error error between the actual output with the output generated by the algorithm &#39;&#39;&#39;
def compare_ouput(self,data):
&#39;&#39;&#39;actually,better to find error between all tests. add all the errors up&#39;&#39;&#39;
error = 0
for key in data.data_dict:
error += abs(data.data_dict[key] - self.compute_output(key[0],key[1])) **2
return error/len(data.data_dict)
# return abs(actual - self.compute_output(num1,num2))
&#39;&#39;&#39;TODO function that changes the weight depending on the errors using gradient descent&#39;&#39;&#39;
&#39;&#39;&#39;first make it random&#39;&#39;&#39;
&#39;&#39;&#39;next perhaps change weights for each test and average out the adjustments for each weight&#39;&#39;&#39;
def random_back_propagation(self):
error = 100000
while error&gt;0.1:
self.x1 = random() 
self.x2 = random()
self.y1 = random()
self.y2 = random()
self.z1 = random()
self.z2 = random()
error = self.compare_ouput(self.data)
print(error)
print(self.compute_output(140,15))     
&#39;&#39;&#39;learning rate is the amount the weights are updated during training&#39;&#39;&#39;
def back_propagation(self, learning_rate):
for _ in range(1000):
for key in self.data.data_dict:
num1, num2 = key
target = self.data.data_dict[key]
hidden_layer1_output = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
hidden_layer2_output = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
output = ((hidden_layer1_output * self.z1) + (hidden_layer2_output * self.z2))
error = target - output
#check if you are happpy with the error    
output_unit1  = output * (1 - output) * (error)
hidden_unit1 = hidden_layer1_output * (1 - hidden_layer1_output) * self.z1 * output_unit1
hidden_unit2 = hidden_layer2_output * (1 - hidden_layer2_output) * self.z2 * output_unit1
self.z1 += (learning_rate * output * hidden_layer1_output)
self.z2 += (learning_rate * output * hidden_layer2_output) 
self.x2 += (learning_rate * hidden_unit2 * num1)
self.x1 += (learning_rate * hidden_unit1 * num1)
self.y1 += (learning_rate * hidden_unit1 * num2) 
self.y2 += (learning_rate * hidden_unit2 * num2)
print(self.x1,self.x2,self.y1,self.y2,self.z1,self.z2)
print(num1,num2,self.compute_output(num1,num2))        
data = Data(200)
data.create_data()
neural = Neural(data)
neural.back_propagation(0.01)
#print(data.data_array)
#print(uniform(-1,1))
print(neural.compute_output(15,7))

I tried changing the learning rate, number of itterations, number of items in the data set but I'm not sure if the problem is with trying to find correct values to use or if my function is just incorrect

答案1

得分: 0

有一些问题存在于你目前的方法中。我将尽力在接下来的部分进行详细说明。

损失函数

你目前使用的损失函数是error = target - output。乍一看似乎可以，因为如果target和output相等，error就是0。但请记住，网络将尝试最小化损失函数，在这种情况下，实现这一目标的一种方式是生成一个极大的output，使error为负数。

我建议使用类似MSE（均方误差）的方法：error = (target - output) ** 2。这样，如果网络想要将其最小化，它将不得不尽量使target和output相等。

梯度更新

当前你正在使用正梯度来更新参数self.z1 += (learning_rate * gradient)。梯度指向损失函数增加最快的方向。我们不想增加损失，我们想朝着最大减少的方向前进，因此我们使用负梯度self.z1 -= (learning_rate * gradient)。

Sigmoid函数

你在一些隐藏单元中使用了Sigmoid作为激活函数。这是Sigmoid的图形：

注意当X > 6或X < -6时，Sigmoid基本上变为一条水平线。这条水平线转化为极小的梯度，几乎为0，这意味着参数更新极其缓慢，甚至根本不更新。网络饱和了。

你输入到网络的数据是比神经网络通常处理的数字更大的数字。考虑到你的一个隐藏单元的权重被随机初始化为0.5和0.3。现在你向网络输入你想要求和的数字：50和20。隐藏单元的计算将是sigmoid(50 * 0.5 + 20 * 0.3)，即sigmoid(31)，其导数实际上是0。

如果你仍想使用Sigmoid，我建议预处理你的输入数据，以便网络不会接收到如此大的数字。例如，你可以将每个输入除以100，因为训练数据中可能的最大整数是100。

将所有内容放在一起

下面是修复了上述问题的代码。由于它使用了不同的损失函数，所有的梯度都是不同的（随时验证，因为我对微积分不太自信）。我还添加了一个打印语句以跟踪跨时期的损失。

from random import randint, random, uniform
import numpy as np

...

# （剩余代码略）

英文:

There are some problems with your current approach. I'll try to elaborate in the following sections.

Loss function

The loss function you are currently using is error = target - output. At first glance it seems to work, because if target and output are equal, error will be 0. But remember that the network will try to minimize the loss function, and in this case one of the ways to do it is by generating an extremely large output, so that error will be negative.

I suggest using something like an MSE (Mean Squared Error): error = (target - output) ** 2. That way, if the network wants to minimize it, it will have no choice but to try to make target and output equal.

Gradient updates

Currently you are updating your parameters with the positive gradient self.z1 += (learning_rate * gradient). The gradient points to the direction of greatest increase of the loss function. We don't want to increase the loss. We want to go in the direction of greatest decrease, so we use the negative gradient self.z1 -= (learning_rate * gradient).

The sigmoid function

You are using a sigmoid as the activation function for some of your hidden units. Here's the sigmoid graph:

Notice that when X > 6 or X < -6, the sigmoid basically flatlines. This flatline translates into an extremely small gradient, practically 0, and that means the parameters update extremely slowly or not at all. The network is saturated.

The data that you are feeding to the network consists of numbers that are larger than what neural networks normally work with. Consider that one of your hidden units had its weights randomly initialized to 0.5 and 0.3. Now you feed the network with the numbers you wish to sum: 50 and 20. The computation at the hidden unit will be sigmoid(50 * 0.5 + 20 * 0.3) which is sigmoid(31), and its derivative is effectively 0.

If you still wanna use the sigmoid, I recommend that you preprocess your input data so that the network won't receive such large numbers. For example, you can divide every input by 100, as the largest possible integer in the training data is 100.

Putting it all together

Below is the code with the above mentioned problems fixed. As it uses a different loss function, all of the gradients are different (feel free to double check as I'm not that confident in calculus). I also added a print to keep track of the loss across epochs.

from random import randint, random, uniform
import numpy as np


class Data:
    data_dict = {}

    def __init__(self, limit):
        self.limit = limit

    &quot;&quot;&quot;creates data but beware that the limit may not be the same as the size of the dictionary&quot;&quot;&quot;

    def create_data(self):
        for i in range(self.limit):
            num1 = randint(0, 100)
            num2 = randint(0, 100)
            self.data_dict[(num1, num2)] = num1 + num2


&quot;&quot;&quot; you compare the error with every test in the data set and find weights that minimise the error&quot;&quot;&quot;


class Neural:
    def __init__(self, data):
        self.x1 = uniform(-1, 1)
        self.x2 = uniform(-1, 1)
        self.y1 = uniform(-1, 1)
        self.y2 = uniform(-1, 1)
        self.z1 = uniform(-1, 1)
        self.z2 = uniform(-1, 1)
        self.data = data

    def relu(self, number):
        return max(0, number)

    def sigmoid(self, number):
        return 1 / (1 + np.exp(-number))

    &quot;&quot;&quot;weighted summation with activation function to compute output &quot;&quot;&quot;

    def compute_output(self, num1, num2):
        num1, num2 = num1 / 100, num2 / 100
        hidden_layer_input1 = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
        hidden_layer_input2 = self.sigmoid((num1 * self.x2) + (num2 * self.y2))
        return 100 * ((hidden_layer_input1 * self.z1) + (hidden_layer_input2 * self.z2))

    &quot;&quot;&quot;mean swaured error error between the actual output with the output generated by the algorithm &quot;&quot;&quot;

    def compare_ouput(self, data):
        &quot;&quot;&quot;actually,better to find error between all tests. add all the errors up&quot;&quot;&quot;
        error = 0
        for key in data.data_dict:
            error += abs(data.data_dict[key] - self.compute_output(key[0], key[1])) ** 2
        return error / len(data.data_dict)

    # return abs(actual - self.compute_output(num1,num2))

    &quot;&quot;&quot;TODO function that changes the weight depending on the errors using gradient descent&quot;&quot;&quot;
    &quot;&quot;&quot;first make it random&quot;&quot;&quot;
    &quot;&quot;&quot;next perhaps change weights for each test and average out the adjustments for each weight&quot;&quot;&quot;

    def random_back_propagation(self):
        error = 100000
        while error &gt; 0.1:
            self.x1 = random()
            self.x2 = random()
            self.y1 = random()
            self.y2 = random()
            self.z1 = random()
            self.z2 = random()
            error = self.compare_ouput(self.data)
            print(error)
        print(self.compute_output(140, 15))

        &quot;&quot;&quot;learning rate is the amount the weights are updated during training&quot;&quot;&quot;

    def back_propagation(self, learning_rate):
        for epoch in range(1000):
            errors = []
            for key in self.data.data_dict:

                num1, num2 = key
                target = self.data.data_dict[key]

                # Rescaling everything
                num1, num2 = num1 / 100, num2 / 100
                target = target / 100

                hidden_layer1_output = self.sigmoid((num1 * self.x1) + (num2 * self.y1))
                hidden_layer2_output = self.sigmoid((num1 * self.x2) + (num2 * self.y2))

                output = (hidden_layer1_output * self.z1) + (hidden_layer2_output * self.z2)

                # loss
                error = (target - output) ** 2
                # check if you are happpy with the error

                # derivative of error with respect to output
                # d(error)/d(output) = -2 * (target - output)

                # derivative of error with respect to x1
                # d(error)/d(x1) = d(error)/d(output) * d(output)/d(x1)
                #                                                      /-&gt; hidden_layer1_output
                # d(error)/d(x1) = d(error)/d(output) * ( d(output)/d(hl1) * d(hl1)/d(x1) )

                #   derivative of output with respect to hl1
                #   d(output)/d(hl1) = z1

                #   derivative of hl1 with respect to x1
                #   d(hl1)/d(x1) = hidden_layer1_output * (1 - hidden_layer1_output) * num1

                # d(error)/d(x1) = -2 * (target - output) * z1 * hidden_layer1_output * (1 - hidden_layer1_output) * num1

                self.x1 -= learning_rate * -2 * (target - output) * self.z1 * hidden_layer1_output * (1 - hidden_layer1_output) * num1
                self.y1 -= learning_rate * -2 * (target - output) * self.z1 * hidden_layer1_output * (1 - hidden_layer1_output) * num2
                self.x2 -= learning_rate * -2 * (target - output) * self.z2 * hidden_layer2_output * (1 - hidden_layer2_output) * num1
                self.y2 -= learning_rate * -2 * (target - output) * self.z2 * hidden_layer2_output * (1 - hidden_layer2_output) * num2

                # derivative of error with respect to z1
                # d(error)/d(z1) = d(error)/d(output) * d(output)/d(z1)

                #   derivative of output with respect to z1
                #   d(output)/d(z1) = hidden_layer1_output

                # d(error)/d(z1) = -2(target - output) * hidden_layer1_output
                self.z1 -= learning_rate * -2 * (target - output) * hidden_layer1_output
                self.z2 -= learning_rate * -2 * (target - output) * hidden_layer2_output

                # print(self.x1, self.x2, self.y1, self.y2, self.z1, self.z2)
                errors.append(error)
            print(f&quot;Mean error: {np.mean(errors)}&quot;)


data = Data(2000)
data.create_data()

neural = Neural(data)
neural.back_propagation(0.1)
print(&quot;#################################PREDICTIONS############################################&quot;)
print(f&quot;15 + 7 = {neural.compute_output(15, 7)}&quot;)
print(f&quot;3 + 2 = {neural.compute_output(3, 2)}&quot;)
print(f&quot;50 + 70 = {neural.compute_output(50, 70)}&quot;)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

使用神经网络和反向传播来进行数字相加。

问题

答案1

损失函数

梯度更新

Sigmoid函数

将所有内容放在一起

Loss function

Gradient updates

The sigmoid function

Putting it all together

Python StatsModels: ValueError: 预期频率为D。得到M。

使用Python查找txt文件中最长的句子。

Number of days between today and a certain date (Pandas)

删除包含某一列中零值超过n个的行。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论