Debugging a Backprop Neural Network: Issues & Solutions

In summary: GetLength(0) - 1, j+1]; } } } } // // }In summary,Backprop neural
  • #1
Superposed_Cat
388
5
I am trying for the first time in 5 years, to make a n- layer, m-width backprop neural network from scratch, My issue is, I've tried training it on XOR, where it returns 0.48 for any of the inputs instead of 1 for half of them and 0 for the other half, if you give it a dataset where the outputs range from 0.6 to 0.99 with the average being 0.7, it will return 0.7 for all, I feel like my loop structure and math is correct, I'm failing to see my problem, any help appreciated.

full code: https://www.mediafire.com/file/f58ia4kj4hmhz99/ConsoleApp3.rar/file

output:

enter image description here
backprop:
C#:
public void bp(double[] x)
        {
            for (int i = 0; i < outputs.Length; i++)
            {
                outputs[i].e = -(outputs[i].a - x[i]) *
                    sig_dx(outputs[i].a);

                for (int j = 0; j < width; j++)
                {
                    outputs[i].w[j] += outputs[i].e * n[n.GetLength(0) - 1, j].a;
                }
            }

            for (int j = 0; j < width; j++)
            {
                double sum = 0;
                for (int k = 0; k < outputs.Length; k++)
                {
                    sum += outputs[k].e * sig_dx(n[n.GetLength(0)-1,j].a) *
                        outputs[k].w[j];
                }
                n[n.GetLength(0)-1, j].e = sum;
            }
            
            /*for (int i = layers - 1; i > 0; i--)
            {
                for (int j = 0; j < width; j++)
                {
                    for (int k = 0; k < width; k++)
                    {
                        n[i, j].w[k] += n[i, j].e //* sig_dx(n[i, j].a)
                            * n[i - 1, k].a;
                    }
                }
            }*/
            for (int i = layers - 2; i >= 0; i--)
            {
                for (int j = 0; j < width; j++)
                {
                    double sum = 0;
                    for (int k = 0; k < width; k++)
                    {
                        sum += n[i + 1, k].e * sig_dx(n[i, j].a) *
                            n[i + 1, k].w[j];
                    }
                    n[i, j].e = sum;
                }
            }
            
            //

            for (int j = 0; j < width; j++)
            {
                double sum = 0;
                for (int k = 0; k < width; k++)
                {
                    sum += n[1, k].e * sig_dx(n[0, j].a) *
                        n[1, k].w[j];
                }
                n[0, j].e = sum;
            }

            for (int j = 0; j < width; j++)
            {
                for (int k = 0; k < inputs.Length; k++)
                {
                    n[0, j].w[k] += n[0, j].e //* sig_dx(n[i, j].a)
                        * inputs[k];
                }
            }
        }
feedforward:
C#:
public void ff(double[] x)
        {
            inputs = x;
            for (int j = 0; j < width; j++)
            {
                double sum = 0;
                for (int k = 0; k < x.Length; k++)
                {
                    sum += n[0, j].w[k] * x[k];
                }
                n[0, j].a = sig(sum);
            }
            for (int i = 1; i < layers; i++)
            {
                for(int j = 0; j < width; j++)
                {
                    double sum = 0;
                    for(int k = 0; k < width; k++)
                    {
                        sum += n[i, j].w[k] * n[i - 1, k].a;
                    }
                    n[i, j].a = sig(sum);
                }
            }
            for (int j = 0; j < outputs.Length; j++)
            {
                double sum = 0;
                for (int k = 0; k < width; k++)
                {
                    sum += n[n.GetLength(0)-1, k].a * outputs[j].w[k];
                }
                outputs[j].a = sig(sum);
            }
        }
training:
C#:
var data2 =
                new double[][][] {
                new double[][]{new double[] { 0, 0 }, new double[] { 0 } },
            new double[][]{new double[] { 1, 0 }, new double[] { 1 } },
            new double[][]{new double[] { 0, 1 }, new double[] { 1 } },
            new double[][]{new double[] { 1, 1 }, new double[] { 0 } }
            };
            net n = new net(2, 1, 4, 3);
            for (int t = 0; t < 1000; t++)
            {
                for (int i = 0; i < data2.Length; i++)
                {
                    n.ff(data2[i][0]);
                    n.bp(data2[i][1]);
                }
            }
            Console.WriteLine("done");
            for (int i = 0; i < data2.Length; i++)
            {
                n.ff(data2[i][0]);//new double[] { d,1 });
                Console.WriteLine(n.outputs[0].a);
            }
initialization:
C#:
public class node
    {
        public double a;
        public double e;
        public double[] w;
        Random r = new Random();
        public node(int pl)
        {
            a = 0;
            e = 10;
            w = new double[pl];
            for(int i = 0; i < pl; i++)
            {
                w[i] = r.NextDouble();
            }
        }
    }
    public class net
    {
        public node[,] n;
        public node[] outputs;
        double[] inputs;
        int layers;
        int width;
        public net(int inp,int outp,int layers,int width)
        {
            this.width = width;
            this.layers= layers;
            outputs = new node[outp];
            for(int i = 0; i < outp; i++)
            {
                outputs[i] = new node(width);
            }
            n = new node[layers,width];
            for (int j = 0; j < width; j++)
            {
                n[0, j] = new node(inp);
            }
            for (int i = 1; i < layers; i++)
            {
                for(int j = 0; j < width; j++)
                {
                    n[i, j] = new node(width);
                }
            }
        }
        double sig(double x)
        {
            return 1.0 / (1.0 + Math.Exp(-x));
        }
        double sig_dx(double x)
        {
            return x * (1.0 - x);
        }
Any help appreciated.
 
Technology news on Phys.org
  • #2
I see you haven't had a response yet on this. Rather than just posting your code to be debugged, try walking us through your procedure of how each section of code is supposed to do, and you may get more responses.
 
  • #3
Superposed_Cat said:
My issue is, I've tried training it on XOR, where it returns 0.48 for any of the inputs instead of 1 for half of them and 0 for the other half, ...
For an XOR gate, where the independent input probabilities are a and b ;
the output probability will be; P = a*(1-b) + b*(1-a) ;
Can your network learn to evaluate that equation?
Probabilities of 0 and 1 are the only certainties.
Any input approaching 0.5 will generate an output closer to 0.5 .

I believe you must train an XOR gate with inputs between 0 and 1, and in the vicinity of 0.5 ; Maybe you are starving it of information. It will not learn the non-linear function on only one side of the range, as it will settle to a linear relationship, but the actual XOR relationship is non-linear and symmetrical about 0.5 .
 
  • Like
Likes scottdave
  • #4
I haven't looked at the code in detail, but it looks like you are training with 1,000 epochs? I can't pick oout what learning rate factor you are using (some comments in the code would help).

Anayway if I remember rightly, training for XOR takes a surprising number of epochs: the mean loss remains high and stable for a long time, then begins to fall slowly at first before settling to perfection. Have you tried plotting the mean loss over, say, 5,000 epochs?
 

1. What is backpropagation and why is it important in neural networks?

Backpropagation is a commonly used algorithm for training neural networks. It involves calculating the gradient of the error function with respect to the weights of the network, and using this gradient to update the weights in order to minimize the error. This process is important because it allows the network to learn from its mistakes and improve its performance.

2. What are some common issues that can occur during backpropagation in a neural network?

Some common issues that can occur during backpropagation include vanishing gradients, exploding gradients, and overfitting. These issues can hinder the training process and result in poor performance of the network.

3. How can vanishing gradients be addressed in backpropagation?

Vanishing gradients occur when the gradient of the error function becomes extremely small, making it difficult for the weights to be updated effectively. This can be addressed by using activation functions that have a non-zero derivative, such as ReLU, and by initializing the weights to appropriate values.

4. How can exploding gradients be prevented in backpropagation?

Exploding gradients occur when the gradient of the error function becomes extremely large, causing the weights to be updated too drastically. This can be prevented by clipping the gradients to a certain threshold or by using gradient descent with momentum.

5. What are some techniques for avoiding overfitting in a backprop neural network?

Overfitting occurs when the neural network becomes too specialized on the training data and does not generalize well to new data. Techniques to avoid overfitting include using dropout layers, adding regularization terms to the error function, and using early stopping to prevent the network from overtraining on the data.

Similar threads

Replies
1
Views
1K
  • Programming and Computer Science
Replies
9
Views
2K
  • Programming and Computer Science
Replies
25
Views
2K
  • Programming and Computer Science
Replies
5
Views
1K
  • Programming and Computer Science
Replies
1
Views
873
  • Programming and Computer Science
Replies
1
Views
751
  • Programming and Computer Science
Replies
7
Views
2K
  • Programming and Computer Science
Replies
1
Views
944
  • Programming and Computer Science
2
Replies
36
Views
3K
  • Programming and Computer Science
2
Replies
35
Views
2K
Back
Top