TransWikia.com

Simple neural network in c++

Code Review Asked by Sharif Hasan on October 27, 2021

I have implemented a neural network in C++. But I’m not sure whether my implementation is correct or not. My code of the implementation of neural networks given bellow.
As an inexperienced programmer, I welcome any and all insights to improve my skill.

#include "csv.h"
using namespace rapidcsv;
using namespace std;


class Neuron;
struct connection{
    connection(int i){
        weight=a_weight=0;
        id=i;
    }
    void weight_val(double w){
        weight=w;
    }
    void weight_acc(double a){
        a_weight+=a;
    }
    void reset(){
        a_weight=0.0;
    };
    void move(double m,double alpha,double lambda){
        weight=weight-alpha*a_weight/m-lambda*weight;
    }
    double weight,a_weight;
    int id=0;
};
typedef vector <Neuron> layer;
class Neuron{
public:
    Neuron(int idx,int nxt_layer_size){
        n_id=idx;
        for(int i=0;i<nxt_layer_size;i++){
            n_con.push_back(connection(i));
            n_con[i].weight_val(rand()/double(RAND_MAX));
        }
        set_val(0.0);
        is_output_neuron=false;
    }

    void hypothesis(layer &prev_layer){
        double sm=0;
        for(int i=0;i<prev_layer.size();i++){
            sm+=prev_layer[i].get_val()*prev_layer[i].get_con(n_id).weight;
        }
        set_val(sigmoid(sm));
        if(is_output_neuron){
            cost+=target*log(get_val())+(1-target)*log(1-get_val());
        }
    }

    void calc_delta(layer next_layer={}){
        if(is_output_neuron||next_layer.size()==0){
            delta=get_val()-target;
        }else{
            double sm=0;
            delta=delta_dot(next_layer)*sigmoid_prime(get_val());
        }
    }
    void calc_grad(layer &nxt_layer){
        for(int i=0;i<nxt_layer.size()-1;i++){
            n_con[i].weight_acc(get_val()*nxt_layer[i].get_delta());
        }
    }

    double flush_cost(){
        double tmp=cost;
        cost=0;
        return tmp;
    }

    double get_delta(){
        return delta;
    }
    void set_target(double x){
        target=x;
        is_output_neuron=true;
    }
    double get_val(){
        return a;
    }
    void set_val(double x){
        a=x;
    }

    void update_weight(double m,double alpha,double lambda){
        for(int i=0;i<n_con.size();i++){
            n_con[i].move(m,alpha,lambda);
            n_con[i].reset();
        }
    }


    connection get_con(int idx){
        return n_con[idx];
    }
private:
    int n_id;double a;
    vector <connection> n_con;
    static double sigmoid(double x){
        return 1.0/(1+exp(-x));
    }
    static double sigmoid_prime(double x){
        return x*(1-x);
    }
    double delta_dot(layer nxt_layer){
        assert(nxt_layer.size()-1==n_con.size());
        double sm=0;
        for(int i=0;i<n_con.size();i++){
            sm+=n_con[i].weight*nxt_layer[i].get_delta();
        }
        return sm;
    }
    double target,delta,cost=0;bool is_output_neuron;
};
class Network{
public:
    Network(vector <int> arch){
        srand(time(0));
        for(int i=0;i<arch.size();i++){
            int nxt_layer_size=i==arch.size()-1?0:arch[i+1];
            layer tmp;
            for(int j=0;j<=arch[i];j++){
                tmp.push_back(Neuron(j,nxt_layer_size));
            }
            tmp.back().set_val(1.0);
            n_layers.push_back(tmp);
        }
    }
    vector <double> feed_forward(vector <double> in,bool output=false){
        vector <double> ot;
        assert(in.size()==n_layers[0].size()-1);
        for(int i=0;i<in.size();i++){
            n_layers[0][i].set_val(in[i]);
        }
        for(int i=1;i<n_layers.size();i++){
            for(int j=0;j<n_layers[i].size()-1;j++){
                n_layers[i][j].hypothesis(n_layers[i-1]);
            }

        }
        if(output) {
            for(int i=0;i<n_layers.back().size()-1;i++){
                ot.push_back(n_layers.back()[i].get_val());
            }
        }
        return ot;
    }
    void feed_backward(vector <double> ot){
        assert(ot.size()==n_layers.back().size()-1);
        for(int i=0;i<ot.size();i++){
            n_layers.back()[i].set_target(ot[i]);
        }
        for(int i=0;i<n_layers.back().size()-1;i++){
            n_layers.back()[i].calc_delta();
        }

        for(int i=n_layers.size()-2;i>=0;i--){
            for(auto &a:n_layers[i]){
                a.calc_delta(n_layers[i+1]);
                a.calc_grad(n_layers[i+1]);
            }
        }

    }
    void done(double m){
        for(unsigned i=0;i<n_layers.size();i++){
            for(unsigned j=0;j<n_layers[i].size();j++){
                n_layers[i][j].update_weight(m,alpha,lambda);
            }
        }
    }

    double calc_cost(){
        for(int i=0;i<n_layers.back().size()-1;i++){
            cost_acc+=n_layers.back()[i].flush_cost();
        }
        return cost_acc;
    }
    double get_cost(double m){
        double tmp=cost_acc;
        cost_acc=0;
        return -tmp/m;
    }
    void set_hyper_params(double alpha,double lambda){
        this->alpha=alpha;
        this->lambda=lambda;
    }
private:
    vector <layer> n_layers;
    double cost_acc=0,alpha,lambda;
};

int main() {

    Network net({4,5,3});

    net.set_hyper_params(0.1,0.0);

    Document doc("../dataset.csv");
    vector <double> x1=doc.GetColumn<double>("x1");
    vector <double> x3=doc.GetColumn<double>("x3");
    vector <double> x4=doc.GetColumn<double>("x4");
    vector <double> x2=doc.GetColumn<double>("x2");
    vector <double> y=doc.GetColumn<double>("y");
vector <double> lrc;
    for(int i=0;i<10000;i++){
        for(int j=0;j<x1.size();j++){
            net.feed_forward({x1[j],x2[j],x3[j],x4[j]});
            vector <double> ot;
            ot.push_back(y[j]==0);
            ot.push_back(y[j]==1);
            ot.push_back(y[j]==2);
            net.feed_backward(ot);
            net.calc_cost();

        }
        double cst=net.get_cost(x1.size());
        lrc.push_back(cst);
        if(i%100==0) cout<<"Cost="<<cst<<"/i="<<i<<endl;
        net.done(x1.size());
    }
    return 0;
}

Rapid Csv Iris dataset

One Answer

Looks plausible. The two biggest pieces of advice I have for you are:

  • Format your code consistently and idiomatically! One easy way to do this is to use the clang-format tool on it. A more tedious, but rewarding, way is to study other people's code and try to emulate their style. For example, you should instinctively write vector<T>, not vector <T>.

  • It sounds like you're not sure if your code behaves correctly. For that, you should use unit tests. Figure out what it would mean — what it would look like — for a small part of your code to "behave correctly," and then write a small test that verifies that what you expect is actually what happens. Repeat many times.


Stylistically: Don't do using namespace std;. Every C++ programmer will tell you this. (Why not? There are reasons, but honestly the best reason is because everyone agrees that you shouldn't.)

Forward-declaring class Neuron; above struct connection is strange because connection doesn't actually need to use Neuron for anything.

connection(int i) defines an implicit constructor, such that the following line will compile and do an implicit conversion:

connection conn = 42;

You don't want that. So mark this constructor explicit. (In fact, mark all constructors explicit, except for the two that you want to happen implicitly — that is, copy and move constructors. Everything else should be explicit.)

weight_val and weight_acc look like they should be called set_weight and add_weight, respectively. Use noun phrases for things that are nouns (variables, types) and verb phrases for things that are verbs (functions). Also, avd unnec. abbr'n.

...Oooh! weight_val and weight_acc actually modify different data members! That was sneaky. Okay, from the formula in move, it looks like we've got a sort of an "alpha weight" and a "lambda weight"? I bet these have established names in the literature. So instead of weight_val(x) I would call it set_lambda_weight(x) (or whatever the established name is); instead of weight_acc(x) I would call it add_alpha_weight(x); and instead of reset I would call it set_alpha_weight(0).

Further down, you use get_val() and set_val(x) to get and set a member whose actual name is a. Pick one name for one concept! If its proper name is a, call the methods get_a() and set_a(a). If its proper name is val, then name it val.


void done(double m){
    for(unsigned i=0;i<n_layers.size();i++){
        for(unsigned j=0;j<n_layers[i].size();j++){
            n_layers[i][j].update_weight(m,alpha,lambda);
        }
    }
}

Again, the name of this method doesn't seem to indicate anything about its purpose. x.done() sounds like we're asking if x is done — it doesn't sound like a mutator method. Seems to me that the function should be called update_all_weights.

The body of this function can be written simply as

void update_all_weights(double m) {
    for (Layer& layer : n_layers) {
        for (Neuron& neuron : layer) {
            neuron.update_weight(m, alpha, lambda);
        }
    }
}

Notice that to distinguish the name of the type Layer from the name of the variable layer, I had to uppercase the former. You already uppercased Neuron, so uppercasing Layer should be a no-brainer.


weight=weight-alpha*a_weight/m-lambda*weight;

This formula is impossible to read without some whitespace. Look how much clearer this is:

weight = weight - alpha*a_weight/m - lambda*weight;

And then we can rewrite it as:

weight -= ((alpha/m) * a_weight) + (lambda * weight);

I might even split that up into two subtractions, if I knew I wasn't concerned about floating-point precision loss.

weight -= (alpha/m) * a_weight;
weight -= lambda * weight;

double weight,a_weight;

clang-format will probably do this for you (I hope!), but please: one declaration per line!

double weight;
double a_weight;

That should be enough nitpicking to give you something to do.

Answered by Quuxplusone on October 27, 2021

Add your own answers!

Ask a Question

Get help from others!

© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP