home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Club Amiga de Montreal - CAM
/
CAM_CD_1.iso
/
files
/
618a.lha
/
NeuralNetwork
/
neural_network.h
< prev
next >
Wrap
C/C++ Source or Header
|
1992-03-02
|
16KB
|
321 lines
#ifndef _NEURAL_NETWORK_H_
#define _NEURAL_NETWORK_H_
#define SLOPE 1.0
#define S(x) (1.0 / (1.0 + exp (0.0 - SLOPE*(x))))
#define WRONG 0
#define GOOD 1
#define CORRECT 2
//****************************************************************************
//
// Neural_network class:
//
// This class performs all the necessary functions needed to train
// a Neural Network. The network has an input layer, two hidden
// layers, and an output layer. The size of each layer is specified
// a run time so there is no restriction on size except memory.
// This is a feed-forward network with full connctions from one
// layer to the next.
//
// The network can perform straight back-propagation with no
// modifications (Rumelhart, Hinton, and Williams, 1985) which
// will find a solution but not very quickly. The network can also
// perform back-propagation with the delta-bar-delta rule developed
// by Robert A. Jacobs, University of Massachusetts
// (Neural Networks, Vol 1. pp.295-307, 1988). The basic idea of this
// rule is that every weight has its own learning rate and each
// learning rate should be continously changed according to the
// following rules -
// - If the weight changes in the same direction as the previous update,
// then the learning rate for that weight should increase by a constant.
// - If the weight changes in the opposite direction as the previous
// update, then the learning rate for that weight should decrease
// exponentially.
//
// learning rate = e(t) for each individual weight
// The exact formula for the change in learning rate (DELTA e(t)) is
//
//
// | K if DELTA_BAR(t-1)*DELTA(t) > 0
// DELTA e(t) = | -PHI*e(t) if DELTA_BAR(t-1)*DELTA(t) < 0
// | 0 otherwise
//
// where DELTA(t) = dJ(t) / dw(t) ---> Partial derivative
//
// and DELTA_BAR(t) = (1 - THETA)*DELTA(t) + THETA*DELTA_BAR(t-1).
//
// For full details of the algorithm, read the article in
// Neural Networks.
//
//
// To perform straight back-propagation, just construct a Neural_network
// with no learning parameters specified (they default to straight
// back-propagation) or set them to
// K = 0, PHI = 0, THETA = 1.0
//
// However, using the delta-bar-delta rule should increase your rate of
// convergence by a factor of 10 to 100 generally. The parameters for
// the delta-bar-delta rule I use are
// K = 0.025, PHI = 0.2, THETA = 0.8
//
// One more heuristic method has been employed in this Neural net class-
// the skip heuristic. This is something I thought of and I am sure
// other people have also. If the output activation is within
// skip_epsilon of its desired for each output, then the calc_forward
// routine returns the skip_flag = 1. This allows you to not waste
// time trying to push already very close examples to the exact value.
// If the skip_flag comes back '1', then don't bother calculating forward
// or back-propagating the example for X number of epochs. You must
// write the routine to skip the example yourself, but the Neural_network
// will tell you when to skip the example. This heuristic also has the
// advantage of reducing memorization and increases generalization.
// Typical values I use for this heuristic -
// skip_epsilon = 0.01 - 0.05
// number skipped = 2-10.
//
// Experiment with all the values to see which work best for your
// application.
//
//
// Comments and suggestions are welcome and can be emailed to me
// anstey@sun.soe.clarkson.edu
//
//****************************************************************************
class Neural_network {
private:
// We need
//
// Matrix for hidden layer 1 activation [num_hidden1]
// Matrix for hidden layer 2 activation [num_hidden2]
// Matrix for output layer activation [num_outputs]
//
// Matrix for input to first hidden layer weights [num_inputs] [num_hidden1]
// Matrix for hidden layer 1 to hidden layer 2 weights [hidden1] [hidden2]
// Matrix for hidden layer 2 to output layer weights [hidden2] [outputs]
// 3 Matrices for sum of all the deltas in an epoch - Back propagation
// 2 Matrices for sum of deltas * weight for each neuron in hidden layers
// 1 and 2 for backpropagation - Back propagation
//
// 3 Matrices for each weight's learning rate - delta-bar-delta rule
// 3 Matrices for each weight's learning delta - delta-bar-delta rule
// 3 Matrices for each weight's learning delta_bar - delta-bar-delta rule
int num_inputs;
int num_hidden1;
int num_hidden2;
int num_outputs;
double epsilon;
double skip_epsilon;
double learning_rate;
double theta;
double phi;
double K;
long training_examples;
long examples_since_update;
double *hidden1_act;
double *hidden2_act;
double *output_act;
double *input_weights;
double *hidden1_weights;
double *hidden2_weights;
double *input_learning_rate;
double *hidden1_learning_rate;
double *hidden2_learning_rate;
double *input_learning_delta;
double *hidden1_learning_delta;
double *hidden2_learning_delta;
double *input_learning_delta_bar;
double *hidden1_learning_delta_bar;
double *hidden2_learning_delta_bar;
double *input_weights_sum_delta;
double *hidden1_weights_sum_delta;
double *hidden2_weights_sum_delta;
double *hidden1_sum_delta_weight;
double *hidden2_sum_delta_weight;
void allocate_matrices ();
void initialize_matrices (double range);
void deallocate_matrices ();
public:
//***********************************************************************
// Constructors : *
// Full size specifications and learning parameters. *
// Learning parameters are provided defaults which are set to *
// just use the BP algorithm with no modifications. *
// *
// Read constructor which reads in the size and all the weights from *
// a file. The network is resized to match the size specified *
// by the file. Learning parameters must be specified *
// separately. *
//***********************************************************************
Neural_network (int number_inputs = 1, int number_hidden1 = 1,
int number_hidden2 = 1,
int number_outputs = 1, double t_epsilon = 0.1,
double t_skip_epsilon = 0.0, double t_learning_rate = 0.1,
double t_theta = 1.0, double t_phi = 0.0, double t_K = 0.0,
double range = 3.0);
Neural_network (char *filename, int& file_error, double t_epsilon = 0.1,
double t_skip_epsilon = 0.0, double t_learning_rate = 0.1,
double t_theta = 1.0, double t_phi = 0.0, double t_K = 0.0);
~Neural_network () { deallocate_matrices ();};
//**************************************************************************
// Weight parameter routines: *
// save_weights : This routine saves the weights of the network *
// to the file <filename>. *
// *
// read_weights : This routine reads the weight values from the file *
// <filename>. The network is automatically resized to the *
// size specified by the file. *
// *
// Activation routines return the node activation after a calc_forward *
// has been performed. *
// *
// get_weight routines return the weight between node1 and node2. *
// *
//**************************************************************************
int save_weights (char *filename);
int read_weights (char *filename);
double get_hidden1_activation (int node) { return (hidden1_act [node]); };
double get_hidden2_activation (int node) { return (hidden2_act [node]); };
double get_output_activation (int node) { return (output_act [node]); };
double get_input_weight (int input_node, int hidden1_node) {
return (input_weights [hidden1_node * num_inputs + input_node]);};
double get_hidden1_weight (int hidden1_node, int hidden2_node) {
return (hidden1_weights [hidden2_node * num_hidden1 + hidden1_node]);};
double get_hidden2_weight (int hidden2_node, int output_node) {
return (hidden2_weights [output_node * num_hidden2 + hidden2_node]);};
//*******************************************************************
// Size parameters of network. *
// The size of the network may be changed at any time. The weights *
// will be copied from the old size to the new size. If the new *
// size is larger, then the extra weights will be randomly set *
// between +-range. The matrices used to hold learning updates *
// and activations will be re-initialized (cleared). *
//*******************************************************************
int get_number_of_inputs () { return (num_inputs); };
int get_number_of_hidden1 () { return (num_hidden1); };
int get_number_of_hidden2 () { return (num_hidden2); };
int get_number_of_outputs () { return (num_outputs); };
void set_size_parameters (int number_inputs, int number_hidden1,
int number_hidden2, int number_outputs,
double range = 3.0);
//*******************************************************************
// Learning parameters functions. These parameters may be changed *
// on the fly. The learning rate and K may have to be reduced as *
// more and more training is done to prevent oscillations. *
//*******************************************************************
void set_epsilon (double eps) { epsilon = eps; };
void set_skip_epsilon (double eps) { skip_epsilon = eps; };
void set_learning_rate (double l_rate) { learning_rate = l_rate; };
void set_theta (double t_theta) { theta = t_theta; };
void set_phi (double t_phi) { phi = t_phi; };
void set_K (double t_K) { K = t_K; };
double get_epsilon () { return (epsilon); };
double get_skip_epsilon () { return (skip_epsilon); };
double get_learning_rate () { return (learning_rate); };
double get_theta () { return (theta); };
double get_phi () { return (phi); };
double get_K () { return (K); };
long get_iterations () { return (training_examples); };
//**************************************************************************
// The main neural network routines: *
// *
// The network input is an array of doubles which has a size of *
// number_inputs. *
// The network desired output is an array of doubles which has a size *
// of number_outputs. *
// *
// back_propagation : Calculates how each weight should be changed. *
// Assumes that calc_forward has been called just prior to *
// this routine to calculate all of the node activations. *
// *
// calc_forward : Calculates the output for a given input. Finds *
// all node activations which are needed for back_propagation *
// to calculate weight adjustment. Returns abs (error). *
// The parameter skip is for use with the skip_epsilon *
// parameter. What it means is if the output is within *
// skip_epsilon of the desired, then it is so close that it *
// should be skipped from being calculated the next X times. *
// Careful use of this parameter can significantly increase *
// the rate of convergence and also help prevent over-learning. *
// *
// calc_forward_test : Calculates the output for a given input. This *
// routine is used for testing rather than training. It returns *
// whether the test was CORRECT, GOOD or WRONG which is *
// determined by the parameters correct_epsilon and *
// good_epsilon. CORRECT > GOOD > WRONG. *
// *
// update_weights : Actually adjusts all the weights according to *
// the calculations of back_propagation. This routine should *
// be called at the end of every training epoch. The weights *
// can be updated by the straight BP algorithm, or by the *
// delta-bar-delta algorithm developed by Robert A. Jacobs *
// which increases the rate of convergence generally by at *
// least a factor of 10. The parameters THETA, PHI, and K *
// determine which algorithm is used. The default settings *
// for these parameters cause update_weights to use the straight *
// BP algorithm. *
// *
// kick_weights : This routine changes all weights by a random amount *
// within +-range. It is useful in case the network gets *
// 'stuck' and is having trouble converging to a solution. I *
// use it when the number wrong has not changed for the last 200 *
// epochs. Getting the range right will take some trial and *
// error as it depends on the application and the weights' *
// actual values. *
// *
//**************************************************************************
void back_propagation (double input [], double desired_output [],
int& done);
double calc_forward (double input [], double desired_output [],
int& num_wrong, int& skip, int print_it,
int& actual_printed);
int calc_forward_test (double input [], double desired_output [],
int print_it, double correct_eps, double good_eps);
void update_weights ();
void kick_weights (double range);
};
#endif