我的神经网络(从头开始)训练,使其与目标之间的距离进一步增大。

huangapple go评论85阅读模式
英文:

My Neural Network (from scratch) training, leaves it further away from target

问题

这是我第一次创建神经网络,我决定使用golang来创建它,尽管这不是通常用于此目的的语言,但我想从头开始只使用基本库来深入了解它们的工作原理。

该程序的目标是训练一个神经网络,使其能够将两个数字(1-10)相加。为此,我创建了一个名为RawAI的神经网络类(这是我能想到的最好的名字),并为其提供了一个输入层(大小为2的数组)、一个隐藏层(大小为2的数组)和一个输出层(大小为1的数组)。

有两个二维数组用于存储权重,一个是IH(输入到隐藏)[2,2],另一个是HO(隐藏到输出)[2,1]。

下面是初始化AI、训练AI和测试AI的代码。你会看到我使用的一些调试语句,以及在下面的代码中显示的任何非golang本地函数或包。这些代码是由我的主函数调用的:

func AdditionNeuralNetworkTest() {
	nn := NewRawAI(2, 2, 1, 1/math.Pow(10, 15))
	fmt.Printf("Weights IH Before: %v\n\nWeights HO After: %v\n", nn.WeightsIH, nn.WeightsHO)
	//Train Neural Network
	//
	for epoch := 0; epoch < 10000000; epoch++ {
		for i := 0; i <= 10; i++ {
			for j := 0; j <= 10; j++ {
				inputs := make([]float64, 2)
				targets := make([]float64, 1)
				inputs[0] = float64(i)
				inputs[1] = float64(j)
				targets[0] = float64(i) + float64(j)
				nn.Train(inputs, targets)
				if epoch%20000 == 0 && i == 5 && j == 5 {
					fmt.Printf("[TRAINING] [EPOCH %d] %f + %f = %f TARGETS[%f]\n", epoch, inputs[0], inputs[1], nn.OutputLayer[0], targets[0])
				}

			}

		}
	}
	// Test neural network
	a := rand.Intn(10) + 1
	b := rand.Intn(10) + 1
	inputs := make([]float64, 2)
	inputs[0] = float64(a)
	inputs[1] = float64(b)
	prediction := nn.FeedForward(inputs)[0]
	fmt.Printf("%d + %d = %f\n", a, b, prediction)
	fmt.Printf("Weights IH: %v\n\nWeights HO: %v\n", nn.WeightsIH, nn.WeightsHO)

}

下面是RawAI文件中的所有代码:

type RawAI struct {
	InputLayer   []float64   `json:"input_layer"`
	HiddenLayer  []float64   `json:"hidden_layer"`
	OutputLayer  []float64   `json:"output_layer"`
	WeightsIH    [][]float64 `json:"weights_ih"`
	WeightsHO    [][]float64 `json:"weights_ho"`
	LearningRate float64     `json:"learning_rate"`
}

func NewRawAI(inputSize, hiddenSize, outputSize int, learningRate float64) *RawAI {
	nn := RawAI{
		InputLayer:   make([]float64, inputSize),
		HiddenLayer:  make([]float64, hiddenSize),
		OutputLayer:  make([]float64, outputSize),
		WeightsIH:    randomMatrix(inputSize, hiddenSize),
		WeightsHO:    randomMatrix(hiddenSize, outputSize),
		LearningRate: learningRate,
	}
	return &nn
}
func (nn *RawAI) FeedForward(inputs []float64) []float64 {
	// Set input layer
	for i := 0; i < len(inputs); i++ {
		nn.InputLayer[i] = inputs[i]
	}

	// Compute hidden layer
	for i := 0; i < len(nn.HiddenLayer); i++ {
		sum := 0.0
		for j := 0; j < len(nn.InputLayer); j++ {
			sum += nn.InputLayer[j] * nn.WeightsIH[j][i]
		}
		nn.HiddenLayer[i] = sum
		if math.IsNaN(sum) {
			panic(fmt.Sprintf("Sum is NaN on Hidden Layer:\nInput Layer: %v\nHidden Layer: %v\nWeights IH: %v\n", nn.InputLayer, nn.HiddenLayer, nn.WeightsIH))
		}

	}

	// Compute output layer
	for k := 0; k < len(nn.OutputLayer); k++ {
		sum := 0.0
		for j := 0; j < len(nn.HiddenLayer); j++ {
			sum += nn.HiddenLayer[j] * nn.WeightsHO[j][k]
		}
		nn.OutputLayer[k] = sum
		if math.IsNaN(sum) {
			panic(fmt.Sprintf("Sum is NaN on Output Layer:\n Model: %v\n", nn))
		}

	}

	return nn.OutputLayer
}
func (nn *RawAI) Train(inputs []float64, targets []float64) {
	nn.FeedForward(inputs)

	// Compute output layer error
	outputErrors := make([]float64, len(targets))
	for k := 0; k < len(targets); k++ {
		outputErrors[k] = targets[k] - nn.OutputLayer[k]
	}

	// Compute hidden layer error
	hiddenErrors := make([]float64, len(nn.HiddenLayer))
	for j := 0; j < len(nn.HiddenLayer); j++ {
		errorSum := 0.0
		for k := 0; k < len(nn.OutputLayer); k++ {
			errorSum += outputErrors[k] * nn.WeightsHO[j][k]
		}
		hiddenErrors[j] = errorSum * sigmoidDerivative(nn.HiddenLayer[j])
		if math.IsInf(math.Abs(hiddenErrors[j]), 1) {
			//Find out why
			fmt.Printf("Hidden Error is Infinite:\nTargets:%v\nOutputLayer:%v\n\n", targets, nn.OutputLayer)
		}
	}

	// Update weights
	for j := 0; j < len(nn.HiddenLayer); j++ {
		for k := 0; k < len(nn.OutputLayer); k++ {
			delta := nn.LearningRate * outputErrors[k] * nn.HiddenLayer[j]
			nn.WeightsHO[j][k] += delta
		}
	}
	for i := 0; i < len(nn.InputLayer); i++ {
		for j := 0; j < len(nn.HiddenLayer); j++ {
			delta := nn.LearningRate * hiddenErrors[j] * nn.InputLayer[i]
			nn.WeightsIH[i][j] += delta
			if math.IsNaN(delta) {
				fmt.Print(fmt.Sprintf("Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n", nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
			}
			if math.IsNaN(nn.WeightsIH[i][j]) {
				fmt.Print(fmt.Sprintf("Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n", nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
			}
		}
	}

}
func (nn *RawAI) ExportWeights(filename string) error {
	weightsJson, err := json.Marshal(nn)
	if err != nil {
		return err
	}
	err = ioutil.WriteFile(filename, weightsJson, 0644)
	if err != nil {
		return err
	}
	return nil
}
func (nn *RawAI) ImportWeights(filename string) error {
	weightsJson, err := ioutil.ReadFile(filename)
	if err != nil {
		return err
	}
	err = json.Unmarshal(weightsJson, nn)
	if err != nil {
		return err
	}
	return nil
}

//RawAI Tools:
func randomMatrix(rows, cols int) [][]float64 {
	matrix := make([][]float64, rows)
	for i := 0; i < rows; i++ {
		matrix[i] = make([]float64, cols)
		for j := 0; j < cols; j++ {
			matrix[i][j] = 1.0
		}
	}
	return matrix
}
func sigmoid(x float64) float64 {
	return 1.0 / (1.0 + exp(-x))
}
func sigmoidDerivative(x float64) float64 {
	return x * (1.0 - x)
}

func exp(x float64) float64 {
	return 1.0 + x + (x*x)/2.0 + (x*x*x)/6.0 + (x*x*x*x)/24.0
}

输出的示例如下:我的神经网络(从头开始)训练,使其与目标之间的距离进一步增大。
正如你所看到的,它慢慢地远离目标并持续这样做。
在询问周围的人、搜索和浏览这个网站后,我找不到似乎是我的错误,所以我决定提出这个问题。

英文:

This is my first time ever creating a neural network, and I've decided to create it in golang, which is not typically a language used for this, however I wanted to create a good understanding of how they work from scratch with only basic libraries.

The goal of the program is to train a Neural Network to be able to add two numbers, 1-10, together. To do this I have created a Neural Network class called RawAI (Best name I could come up with), and given it a 1 input layer (array of size 2), 1 hidden layer (array of size 2) and 1 output layer (array of size 1).

There are 2 2D arrays for weights, one is IH (input to Hidden) [2,2] and one is HO, [2,1].

Below is the code which initiates the AI, trains it, and tests it. You will see a couple of the debugging statements I have used, and any other function which is not native to golang or its package will be shown in the following code for my RawAI class. This is called by my main function.:

func AdditionNeuralNetworkTest() {
nn := NewRawAI(2, 2, 1, 1/math.Pow(10, 15))
fmt.Printf(&quot;Weights IH Before: %v\n\nWeights HO After: %v\n&quot;, nn.WeightsIH, nn.WeightsHO)
//Train Neural Network
//
for epoch := 0; epoch &lt; 10000000; epoch++ {
for i := 0; i &lt;= 10; i++ {
for j := 0; j &lt;= 10; j++ {
inputs := make([]float64, 2)
targets := make([]float64, 1)
inputs[0] = float64(i)
inputs[1] = float64(j)
targets[0] = float64(i) + float64(j)
nn.Train(inputs, targets)
if epoch%20000 == 0 &amp;&amp; i == 5 &amp;&amp; j == 5 {
fmt.Printf(&quot;[TRAINING] [EPOCH %d] %f + %f = %f TARGETS[%f]\n&quot;, epoch, inputs[0], inputs[1], nn.OutputLayer[0], targets[0])
}
}
}
}
// Test neural network
a := rand.Intn(10) + 1
b := rand.Intn(10) + 1
inputs := make([]float64, 2)
inputs[0] = float64(a)
inputs[1] = float64(b)
prediction := nn.FeedForward(inputs)[0]
fmt.Printf(&quot;%d + %d = %f\n&quot;, a, b, prediction)
fmt.Printf(&quot;Weights IH: %v\n\nWeights HO: %v\n&quot;, nn.WeightsIH, nn.WeightsHO)
}

Below is all of the code in the RawAI File:

type RawAI struct {
InputLayer   []float64   `json:&quot;input_layer&quot;`
HiddenLayer  []float64   `json:&quot;hidden_layer&quot;`
OutputLayer  []float64   `json:&quot;output_layer&quot;`
WeightsIH    [][]float64 `json:&quot;weights_ih&quot;`
WeightsHO    [][]float64 `json:&quot;weights_ho&quot;`
LearningRate float64     `json:&quot;learning_rate&quot;`
}
func NewRawAI(inputSize, hiddenSize, outputSize int, learningRate float64) *RawAI {
nn := RawAI{
InputLayer:   make([]float64, inputSize),
HiddenLayer:  make([]float64, hiddenSize),
OutputLayer:  make([]float64, outputSize),
WeightsIH:    randomMatrix(inputSize, hiddenSize),
WeightsHO:    randomMatrix(hiddenSize, outputSize),
LearningRate: learningRate,
}
return &amp;nn
}
func (nn *RawAI) FeedForward(inputs []float64) []float64 {
// Set input layer
for i := 0; i &lt; len(inputs); i++ {
nn.InputLayer[i] = inputs[i]
}
// Compute hidden layer
for i := 0; i &lt; len(nn.HiddenLayer); i++ {
sum := 0.0
for j := 0; j &lt; len(nn.InputLayer); j++ {
sum += nn.InputLayer[j] * nn.WeightsIH[j][i]
}
nn.HiddenLayer[i] = sum
if math.IsNaN(sum) {
panic(fmt.Sprintf(&quot;Sum is NaN on Hidden Layer:\nInput Layer: %v\nHidden Layer: %v\nWeights IH: %v\n&quot;, nn.InputLayer, nn.HiddenLayer, nn.WeightsIH))
}
}
// Compute output layer
for k := 0; k &lt; len(nn.OutputLayer); k++ {
sum := 0.0
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
sum += nn.HiddenLayer[j] * nn.WeightsHO[j][k]
}
nn.OutputLayer[k] = sum
if math.IsNaN(sum) {
panic(fmt.Sprintf(&quot;Sum is NaN on Output Layer:\n Model: %v\n&quot;, nn))
}
}
return nn.OutputLayer
}
func (nn *RawAI) Train(inputs []float64, targets []float64) {
nn.FeedForward(inputs)
// Compute output layer error
outputErrors := make([]float64, len(targets))
for k := 0; k &lt; len(targets); k++ {
outputErrors[k] = targets[k] - nn.OutputLayer[k]
}
// Compute hidden layer error
hiddenErrors := make([]float64, len(nn.HiddenLayer))
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
errorSum := 0.0
for k := 0; k &lt; len(nn.OutputLayer); k++ {
errorSum += outputErrors[k] * nn.WeightsHO[j][k]
}
hiddenErrors[j] = errorSum * sigmoidDerivative(nn.HiddenLayer[j])
if math.IsInf(math.Abs(hiddenErrors[j]), 1) {
//Find out why
fmt.Printf(&quot;Hidden Error is Infinite:\nTargets:%v\nOutputLayer:%v\n\n&quot;, targets, nn.OutputLayer)
}
}
// Update weights
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
for k := 0; k &lt; len(nn.OutputLayer); k++ {
delta := nn.LearningRate * outputErrors[k] * nn.HiddenLayer[j]
nn.WeightsHO[j][k] += delta
}
}
for i := 0; i &lt; len(nn.InputLayer); i++ {
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
delta := nn.LearningRate * hiddenErrors[j] * nn.InputLayer[i]
nn.WeightsIH[i][j] += delta
if math.IsNaN(delta) {
fmt.Print(fmt.Sprintf(&quot;Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n&quot;, nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
}
if math.IsNaN(nn.WeightsIH[i][j]) {
fmt.Print(fmt.Sprintf(&quot;Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n&quot;, nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
}
}
}
}
func (nn *RawAI) ExportWeights(filename string) error {
weightsJson, err := json.Marshal(nn)
if err != nil {
return err
}
err = ioutil.WriteFile(filename, weightsJson, 0644)
if err != nil {
return err
}
return nil
}
func (nn *RawAI) ImportWeights(filename string) error {
weightsJson, err := ioutil.ReadFile(filename)
if err != nil {
return err
}
err = json.Unmarshal(weightsJson, nn)
if err != nil {
return err
}
return nil
}
//RawAI Tools:
func randomMatrix(rows, cols int) [][]float64 {
matrix := make([][]float64, rows)
for i := 0; i &lt; rows; i++ {
matrix[i] = make([]float64, cols)
for j := 0; j &lt; cols; j++ {
matrix[i][j] = 1.0
}
}
return matrix
}
func sigmoid(x float64) float64 {
return 1.0 / (1.0 + exp(-x))
}
func sigmoidDerivative(x float64) float64 {
return x * (1.0 - x)
}
func exp(x float64) float64 {
return 1.0 + x + (x*x)/2.0 + (x*x*x)/6.0 + (x*x*x*x)/24.0
}

The example of the output is this: 我的神经网络(从头开始)训练,使其与目标之间的距离进一步增大。
As you can see it slowly moves further from target and continues to do so.
After asking around, googling, and searching through this website I could not find what seemed to be my error, so I decided to ask this question.

答案1

得分: 3

我认为你在使用“均方误差”并且忘记了微分后面的“-”符号。

所以将代码修改为:

outputErrors[k] = -(targets[k] - nn.OutputLayer[k])
英文:

I think you are using mean-square error and forget - after the differential.

So change:

outputErrors[k] =  (targets[k] - nn.OutputLayer[k])

To:

outputErrors[k] = -(targets[k] - nn.OutputLayer[k])

答案2

得分: 0

这有点偏离目标,因为你想从头开始做。但是如果你对用于Go的机器学习感兴趣,可以看看gomlx/gomlx,类似于Go的Jax/TensorFlow。

虽然矩阵乘法是在加速器(XLA)中完成的,但你可以在那里找到其他部分的清晰(并且有良好文档)的实现,你可能想了解的内容,比如各种优化器(尤其是Adam),归一化(层和批次),随机失活,损失函数,度量指标和相关内容。它们在神经网络的成功中起着重要作用。

还有一些教程和示例供参考。

英文:

This is kind of off-target, since you wanted to do it from scratch. But if you are interested in machine learning for Go, it's worth check out github.com/gomlx/gomlx, kind of like Jax/TensorFlow for Go.

While the matrix multiplication is done in the accelerator (XLA), you can check out there a clean (and well documented) the implementation of the other parts you probably will want to learn about, like the various optimizers (Adam in particular), normalization (layer and batch), dropout, losses, metrics and related things. They play a large role in the success of NN.

There is also a tutorial and some examples.

huangapple
  • 本文由 发表于 2023年3月29日 11:28:39
  • 转载请务必保留本文链接:https://go.coder-hub.com/75872880.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定