home *** CD-ROM | disk | FTP | other *** search
- #include <signal.h>
- #include <math.h>
- #include <bool.h>
- #include <MLCG.h>
- #include <Normal.h>
- #include <GetOpt.h>
- #include <double.Matrix.h>
- #define matrix doubleMatrix
- #define array doubleArray
-
- int watch = FALSE; // toggle watch training mode
- void user_signal1_handler
- (int sig, int code, struct sigcontext* scp, char* addr) { watch = !watch; }
-
- int patient = TRUE; // signal graceful termination
- void user_signal2_handler
- (int sig, int code, struct sigcontext* scp, char* addr) { patient = FALSE; }
-
- static MLCG gen(13, 43);
- static Normal rnd (0.0, 1.0, &gen);
-
- matrix noise(double sigma, array& x)
- {
- matrix result(x);
- for (int i = 0; i < x.m(); i++)
- for (int j = 0; j < x.n(); j++)
- result[i][j] += sigma*rnd();
- return result;
- }
-
- int
- main (int argc, char **argv)
- {
- int trials = 65536; // number of trials
- int verbose = FALSE; // verbose reporting off
- long seedval = 43; // random number seed value
- double learn = 64.0; // 1.0/(learning rate)
- double eta = 1.0/learn; // learning rate
- double sigma = 0.0; // noise variance
- GetOpt getopt (argc, argv, "l:n:s:t:vw");
-
- signal(SIGUSR1, user_signal1_handler); // kill -30 pid
- signal(SIGUSR2, user_signal2_handler); // kill -31 pid
-
- int option;
- while ((option = getopt()) != EOF)
- switch (option)
- {
- case 'l': // learning rate
- learn = atof(getopt.optarg);
- break;
- case 'n': // noise level
- sigma = atof(getopt.optarg);
- break;
- case 's': // seed value
- seedval = atoi(getopt.optarg);
- break;
- case 't': // trials
- trials = atoi(getopt.optarg);
- break;
- case 'v': // verbose reporting on
- verbose = TRUE;
- break;
- case 'w': // watch training
- watch = TRUE;
- break;
- case '?':
- cerr << "Unrecognized option!\n";
- };
-
- if (verbose)
- cerr << " seedval = " << seedval
- << " trials = " << trials
- << " sigma = " << sigma
- << " learn = " << learn << "\n";
-
- int layers; cin >> layers; // number of layers
- matrix b[layers]; // threshold biases
- matrix W[layers]; // connection weights
- matrix x[layers+1]; // inputs/outputs
- matrix d[layers]; // "equivalent error"
-
- int outputs; cin >> outputs; // number of outputs
- int inputs = outputs; // number of inputs
- if (verbose)
- cerr << "N(" << inputs;
-
- int layer;
- for (layer = 0; layer < layers; layer++)
- {
- int inputs = outputs;
- cin >> outputs;
- if (verbose)
- cerr << ", " << outputs;
- x[layer].resize(inputs);
- d[layer].resize(outputs);
- b[layer].resize(outputs);
- W[layer].resize(outputs, inputs);
- for (int output = 0; output < outputs; output++)
- cin >> b[layer][0][output] >> W[layer].s(output);
- };
- x[layers].resize(outputs);
-
- int examples; cin >> examples; // number of examples
- if (verbose)
- cerr << ")\t" << examples << " examples\n";
- matrix X(examples, inputs); // inputs
- matrix Y(examples, outputs); // outputs
- for (int example = 0; example < examples; example++)
- cin >> X.s(example) >> Y.s(example);
-
- if (learn > 0.0)
- eta = 1.0/learn;
- srand48(seedval);
- for (int trial = 0; trial < trials && patient; trial++)
- {
- int
- example = lrand48()%examples;
- //x[0] = noise(sigma, X.s(example));
- x[0] = X.s(example);
- #define y Y.s(example)
-
- int layer;
- //for (layer = 0; layer < layers; layer++) // Feed Forward
- // x[layer+1] = tanh(b[layer] + x[layer]%W[layer]);
- //d[layers-1] = eta*(y - x[layers])*(1.0 - x[layers]*x[layers]);
-
- for (layer = 0; layer < layers-1; layer++) // Feed Forward
- x[layer+1] = tanh(b[layer] + x[layer]%W[layer]);
- x[layers] = b[layers-1] + x[layers-1]%W[layers-1];
-
- d[layers-1] = eta*(y - x[layers]); // Back Propagate
- for (layer = layers-1; layer > 0; layer--)
- d[layer-1] = d[layer]%W[layer].t()*(1.0 - x[layer]*x[layer]);
-
- for (layer = 0; layer < layers; layer++) // Update
- {
- b[layer] += d[layer];
- W[layer] += d[layer]&x[layer];
- };
-
- if (watch)
- cerr << "trial = " << trial
- << "\texample = " << example << "\n";
- };
-
- format("%19.12e", 4);
-
- cout << layers << "\n";
- cout << inputs << "\n";
- for (layer = 0; layer < layers; layer++)
- {
- int outputs = b[layer].n();
- cout << outputs << "\n";
- for (int output = 0; output < outputs; output++)
- cout << form("%19.12e\n", b[layer][0][output])
- << W[layer].s(output);
- };
- cout.flush();
- }
-