
huangapple go评论100阅读模式

My Neural Network (from scratch) training, leaves it further away from target






func AdditionNeuralNetworkTest() {
	nn := NewRawAI(2, 2, 1, 1/math.Pow(10, 15))
	fmt.Printf("Weights IH Before: %v\n\nWeights HO After: %v\n", nn.WeightsIH, nn.WeightsHO)
	//Train Neural Network
	for epoch := 0; epoch < 10000000; epoch++ {
		for i := 0; i <= 10; i++ {
			for j := 0; j <= 10; j++ {
				inputs := make([]float64, 2)
				targets := make([]float64, 1)
				inputs[0] = float64(i)
				inputs[1] = float64(j)
				targets[0] = float64(i) + float64(j)
				nn.Train(inputs, targets)
				if epoch%20000 == 0 && i == 5 && j == 5 {
					fmt.Printf("[TRAINING] [EPOCH %d] %f + %f = %f TARGETS[%f]\n", epoch, inputs[0], inputs[1], nn.OutputLayer[0], targets[0])


	// Test neural network
	a := rand.Intn(10) + 1
	b := rand.Intn(10) + 1
	inputs := make([]float64, 2)
	inputs[0] = float64(a)
	inputs[1] = float64(b)
	prediction := nn.FeedForward(inputs)[0]
	fmt.Printf("%d + %d = %f\n", a, b, prediction)
	fmt.Printf("Weights IH: %v\n\nWeights HO: %v\n", nn.WeightsIH, nn.WeightsHO)



type RawAI struct {
	InputLayer   []float64   `json:"input_layer"`
	HiddenLayer  []float64   `json:"hidden_layer"`
	OutputLayer  []float64   `json:"output_layer"`
	WeightsIH    [][]float64 `json:"weights_ih"`
	WeightsHO    [][]float64 `json:"weights_ho"`
	LearningRate float64     `json:"learning_rate"`

func NewRawAI(inputSize, hiddenSize, outputSize int, learningRate float64) *RawAI {
	nn := RawAI{
		InputLayer:   make([]float64, inputSize),
		HiddenLayer:  make([]float64, hiddenSize),
		OutputLayer:  make([]float64, outputSize),
		WeightsIH:    randomMatrix(inputSize, hiddenSize),
		WeightsHO:    randomMatrix(hiddenSize, outputSize),
		LearningRate: learningRate,
	return &nn
func (nn *RawAI) FeedForward(inputs []float64) []float64 {
	// Set input layer
	for i := 0; i < len(inputs); i++ {
		nn.InputLayer[i] = inputs[i]

	// Compute hidden layer
	for i := 0; i < len(nn.HiddenLayer); i++ {
		sum := 0.0
		for j := 0; j < len(nn.InputLayer); j++ {
			sum += nn.InputLayer[j] * nn.WeightsIH[j][i]
		nn.HiddenLayer[i] = sum
		if math.IsNaN(sum) {
			panic(fmt.Sprintf("Sum is NaN on Hidden Layer:\nInput Layer: %v\nHidden Layer: %v\nWeights IH: %v\n", nn.InputLayer, nn.HiddenLayer, nn.WeightsIH))


	// Compute output layer
	for k := 0; k < len(nn.OutputLayer); k++ {
		sum := 0.0
		for j := 0; j < len(nn.HiddenLayer); j++ {
			sum += nn.HiddenLayer[j] * nn.WeightsHO[j][k]
		nn.OutputLayer[k] = sum
		if math.IsNaN(sum) {
			panic(fmt.Sprintf("Sum is NaN on Output Layer:\n Model: %v\n", nn))


	return nn.OutputLayer
func (nn *RawAI) Train(inputs []float64, targets []float64) {

	// Compute output layer error
	outputErrors := make([]float64, len(targets))
	for k := 0; k < len(targets); k++ {
		outputErrors[k] = targets[k] - nn.OutputLayer[k]

	// Compute hidden layer error
	hiddenErrors := make([]float64, len(nn.HiddenLayer))
	for j := 0; j < len(nn.HiddenLayer); j++ {
		errorSum := 0.0
		for k := 0; k < len(nn.OutputLayer); k++ {
			errorSum += outputErrors[k] * nn.WeightsHO[j][k]
		hiddenErrors[j] = errorSum * sigmoidDerivative(nn.HiddenLayer[j])
		if math.IsInf(math.Abs(hiddenErrors[j]), 1) {
			//Find out why
			fmt.Printf("Hidden Error is Infinite:\nTargets:%v\nOutputLayer:%v\n\n", targets, nn.OutputLayer)

	// Update weights
	for j := 0; j < len(nn.HiddenLayer); j++ {
		for k := 0; k < len(nn.OutputLayer); k++ {
			delta := nn.LearningRate * outputErrors[k] * nn.HiddenLayer[j]
			nn.WeightsHO[j][k] += delta
	for i := 0; i < len(nn.InputLayer); i++ {
		for j := 0; j < len(nn.HiddenLayer); j++ {
			delta := nn.LearningRate * hiddenErrors[j] * nn.InputLayer[i]
			nn.WeightsIH[i][j] += delta
			if math.IsNaN(delta) {
				fmt.Print(fmt.Sprintf("Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n", nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
			if math.IsNaN(nn.WeightsIH[i][j]) {
				fmt.Print(fmt.Sprintf("Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n", nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))

func (nn *RawAI) ExportWeights(filename string) error {
	weightsJson, err := json.Marshal(nn)
	if err != nil {
		return err
	err = ioutil.WriteFile(filename, weightsJson, 0644)
	if err != nil {
		return err
	return nil
func (nn *RawAI) ImportWeights(filename string) error {
	weightsJson, err := ioutil.ReadFile(filename)
	if err != nil {
		return err
	err = json.Unmarshal(weightsJson, nn)
	if err != nil {
		return err
	return nil

//RawAI Tools:
func randomMatrix(rows, cols int) [][]float64 {
	matrix := make([][]float64, rows)
	for i := 0; i < rows; i++ {
		matrix[i] = make([]float64, cols)
		for j := 0; j < cols; j++ {
			matrix[i][j] = 1.0
	return matrix
func sigmoid(x float64) float64 {
	return 1.0 / (1.0 + exp(-x))
func sigmoidDerivative(x float64) float64 {
	return x * (1.0 - x)

func exp(x float64) float64 {
	return 1.0 + x + (x*x)/2.0 + (x*x*x)/6.0 + (x*x*x*x)/24.0



This is my first time ever creating a neural network, and I've decided to create it in golang, which is not typically a language used for this, however I wanted to create a good understanding of how they work from scratch with only basic libraries.

The goal of the program is to train a Neural Network to be able to add two numbers, 1-10, together. To do this I have created a Neural Network class called RawAI (Best name I could come up with), and given it a 1 input layer (array of size 2), 1 hidden layer (array of size 2) and 1 output layer (array of size 1).

There are 2 2D arrays for weights, one is IH (input to Hidden) [2,2] and one is HO, [2,1].

Below is the code which initiates the AI, trains it, and tests it. You will see a couple of the debugging statements I have used, and any other function which is not native to golang or its package will be shown in the following code for my RawAI class. This is called by my main function.:

func AdditionNeuralNetworkTest() {
nn := NewRawAI(2, 2, 1, 1/math.Pow(10, 15))
fmt.Printf(&quot;Weights IH Before: %v\n\nWeights HO After: %v\n&quot;, nn.WeightsIH, nn.WeightsHO)
//Train Neural Network
for epoch := 0; epoch &lt; 10000000; epoch++ {
for i := 0; i &lt;= 10; i++ {
for j := 0; j &lt;= 10; j++ {
inputs := make([]float64, 2)
targets := make([]float64, 1)
inputs[0] = float64(i)
inputs[1] = float64(j)
targets[0] = float64(i) + float64(j)
nn.Train(inputs, targets)
if epoch%20000 == 0 &amp;&amp; i == 5 &amp;&amp; j == 5 {
fmt.Printf(&quot;[TRAINING] [EPOCH %d] %f + %f = %f TARGETS[%f]\n&quot;, epoch, inputs[0], inputs[1], nn.OutputLayer[0], targets[0])
// Test neural network
a := rand.Intn(10) + 1
b := rand.Intn(10) + 1
inputs := make([]float64, 2)
inputs[0] = float64(a)
inputs[1] = float64(b)
prediction := nn.FeedForward(inputs)[0]
fmt.Printf(&quot;%d + %d = %f\n&quot;, a, b, prediction)
fmt.Printf(&quot;Weights IH: %v\n\nWeights HO: %v\n&quot;, nn.WeightsIH, nn.WeightsHO)

Below is all of the code in the RawAI File:

type RawAI struct {
InputLayer   []float64   `json:&quot;input_layer&quot;`
HiddenLayer  []float64   `json:&quot;hidden_layer&quot;`
OutputLayer  []float64   `json:&quot;output_layer&quot;`
WeightsIH    [][]float64 `json:&quot;weights_ih&quot;`
WeightsHO    [][]float64 `json:&quot;weights_ho&quot;`
LearningRate float64     `json:&quot;learning_rate&quot;`
func NewRawAI(inputSize, hiddenSize, outputSize int, learningRate float64) *RawAI {
nn := RawAI{
InputLayer:   make([]float64, inputSize),
HiddenLayer:  make([]float64, hiddenSize),
OutputLayer:  make([]float64, outputSize),
WeightsIH:    randomMatrix(inputSize, hiddenSize),
WeightsHO:    randomMatrix(hiddenSize, outputSize),
LearningRate: learningRate,
return &amp;nn
func (nn *RawAI) FeedForward(inputs []float64) []float64 {
// Set input layer
for i := 0; i &lt; len(inputs); i++ {
nn.InputLayer[i] = inputs[i]
// Compute hidden layer
for i := 0; i &lt; len(nn.HiddenLayer); i++ {
sum := 0.0
for j := 0; j &lt; len(nn.InputLayer); j++ {
sum += nn.InputLayer[j] * nn.WeightsIH[j][i]
nn.HiddenLayer[i] = sum
if math.IsNaN(sum) {
panic(fmt.Sprintf(&quot;Sum is NaN on Hidden Layer:\nInput Layer: %v\nHidden Layer: %v\nWeights IH: %v\n&quot;, nn.InputLayer, nn.HiddenLayer, nn.WeightsIH))
// Compute output layer
for k := 0; k &lt; len(nn.OutputLayer); k++ {
sum := 0.0
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
sum += nn.HiddenLayer[j] * nn.WeightsHO[j][k]
nn.OutputLayer[k] = sum
if math.IsNaN(sum) {
panic(fmt.Sprintf(&quot;Sum is NaN on Output Layer:\n Model: %v\n&quot;, nn))
return nn.OutputLayer
func (nn *RawAI) Train(inputs []float64, targets []float64) {
// Compute output layer error
outputErrors := make([]float64, len(targets))
for k := 0; k &lt; len(targets); k++ {
outputErrors[k] = targets[k] - nn.OutputLayer[k]
// Compute hidden layer error
hiddenErrors := make([]float64, len(nn.HiddenLayer))
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
errorSum := 0.0
for k := 0; k &lt; len(nn.OutputLayer); k++ {
errorSum += outputErrors[k] * nn.WeightsHO[j][k]
hiddenErrors[j] = errorSum * sigmoidDerivative(nn.HiddenLayer[j])
if math.IsInf(math.Abs(hiddenErrors[j]), 1) {
//Find out why
fmt.Printf(&quot;Hidden Error is Infinite:\nTargets:%v\nOutputLayer:%v\n\n&quot;, targets, nn.OutputLayer)
// Update weights
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
for k := 0; k &lt; len(nn.OutputLayer); k++ {
delta := nn.LearningRate * outputErrors[k] * nn.HiddenLayer[j]
nn.WeightsHO[j][k] += delta
for i := 0; i &lt; len(nn.InputLayer); i++ {
for j := 0; j &lt; len(nn.HiddenLayer); j++ {
delta := nn.LearningRate * hiddenErrors[j] * nn.InputLayer[i]
nn.WeightsIH[i][j] += delta
if math.IsNaN(delta) {
fmt.Print(fmt.Sprintf(&quot;Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n&quot;, nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
if math.IsNaN(nn.WeightsIH[i][j]) {
fmt.Print(fmt.Sprintf(&quot;Delta is NaN.\n Learning Rate: %f\nHidden Errors: %f\nInput: %f\n&quot;, nn.LearningRate, hiddenErrors[j], nn.InputLayer[i]))
func (nn *RawAI) ExportWeights(filename string) error {
weightsJson, err := json.Marshal(nn)
if err != nil {
return err
err = ioutil.WriteFile(filename, weightsJson, 0644)
if err != nil {
return err
return nil
func (nn *RawAI) ImportWeights(filename string) error {
weightsJson, err := ioutil.ReadFile(filename)
if err != nil {
return err
err = json.Unmarshal(weightsJson, nn)
if err != nil {
return err
return nil
//RawAI Tools:
func randomMatrix(rows, cols int) [][]float64 {
matrix := make([][]float64, rows)
for i := 0; i &lt; rows; i++ {
matrix[i] = make([]float64, cols)
for j := 0; j &lt; cols; j++ {
matrix[i][j] = 1.0
return matrix
func sigmoid(x float64) float64 {
return 1.0 / (1.0 + exp(-x))
func sigmoidDerivative(x float64) float64 {
return x * (1.0 - x)
func exp(x float64) float64 {
return 1.0 + x + (x*x)/2.0 + (x*x*x)/6.0 + (x*x*x*x)/24.0

The example of the output is this: 我的神经网络(从头开始)训练,使其与目标之间的距离进一步增大。
As you can see it slowly moves further from target and continues to do so.
After asking around, googling, and searching through this website I could not find what seemed to be my error, so I decided to ask this question.


得分: 3



outputErrors[k] = -(targets[k] - nn.OutputLayer[k])

I think you are using mean-square error and forget - after the differential.

So change:

outputErrors[k] =  (targets[k] - nn.OutputLayer[k])


outputErrors[k] = -(targets[k] - nn.OutputLayer[k])


得分: 0





This is kind of off-target, since you wanted to do it from scratch. But if you are interested in machine learning for Go, it's worth check out github.com/gomlx/gomlx, kind of like Jax/TensorFlow for Go.

While the matrix multiplication is done in the accelerator (XLA), you can check out there a clean (and well documented) the implementation of the other parts you probably will want to learn about, like the various optimizers (Adam in particular), normalization (layer and batch), dropout, losses, metrics and related things. They play a large role in the success of NN.

There is also a tutorial and some examples.

  • 本文由 发表于 2023年3月29日 11:28:39
  • 转载请务必保留本文链接:https://go.coder-hub.com/75872880.html



:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:
