/******* * This program is used for training a self-organizing map. It receives its inputs either in * input files, or interactively, from the user. The outputs (the weights and topological * connections) are written to an output file. usage: * a) kohonen * or * b) kohonen input_file data_file output_file * * When format a) is used, the program prompts the user for the names of the data and weights * file, and then proceeds to prompt the user on the structural parameters of the * self-organizing map. *******/ #include #include #define READFILE 0 #define WRITEFILE 1 #define MAXRAND (1<<16) #define MAXWIN 2.0 struct node { double *weight; /* position of the node in input space */ int *pos; /* position of the node in the network */ int num_hits; /* #patterns for which this node was a winner */ }; struct input_s { double *input; /* input vector */ }; struct network { /* The structure */ int indim; /* # input dimensions */ int netdim; /* # network dimensions */ int totalnodes; /* Superfluous: calculated from the num_nodes array */ int *num_nodes; /* #nodes per network dimension */ struct node *nodes; /* Linear storage for the nodes. */ /* The training parameters */ int Dstart, Dend; int iter_start, iter_end; double eta_start, eta_end; int conscience; /* Is the conscience mech. active? */ /* The inputs */ int num_inputs; /* Number of inputs */ struct input_s *inputs; /* Storage for input vectors */ }; void openfile(), kohonen(), trainnetwork(), initnetwork(); void initrandom(), deallocate_network(); double get_rand(); double get_euclidean(); int find_best_match(), are_neighbors(); void update_neighborhood(), clear_hits(); struct network *readinput(), *getnetwork(), *readfromfile(), *readfromkeyboard(); void writeoutput(), write_fancy(); /* argv[1] - input structure file argv[2] - input data file argv[3] - output weights file */ void main(argc, argv) int argc; char *argv[]; { if( argc == 1 ) kohonen(NULL, NULL, NULL); else kohonen(argv[1], argv[2], argv[3]); } /* Initialize random generator with number of microseconds since January 1, 1970, which is almost a random value in itself. */ void initrandom() { struct timeval tp; struct timezone tzp; if( gettimeofday(&tp,&tzp) == -1 ) { printf("Could not initialize random number generator\n"); _exit(0); } srandom((int)tp.tv_usec); } /* Returns a double precision random number between low and high */ double get_rand(low, high) double low, high; { double x = random() % MAXRAND / (double)MAXRAND; return( x*(high-low) + low ); } /* Opens the file filename for reading or writing (depending on type). Terminates the program on error. */ void openfile( filename, fp, type) char *filename; FILE **fp; int type; { if( filename ) { if( type == READFILE ) *fp = fopen(filename, "rt"); else *fp = fopen(filename, "wt"); if( *fp == NULL ) { printf("Unable to open file %s for %s\n",filename, (type == READFILE) ? "reading" : "writing"); _exit(0); } } } /* Clear the number of hits for each node in the network */ void clear_hits(p) struct network *p; { int i; for(i=0; itotalnodes; i++) p->nodes[i].num_hits = 0; } /* Finds the euclidean distance between 2 vectors of doubles (x & y) both of size n */ double get_euclidean( x, y, n) double *x, *y; int n; { double dist = 0; int i; for( i=0; inum_inputs / (double) p->totalnodes); for( i=0; itotalnodes; i++) { if( !p->conscience || p->nodes[i].num_hits < ration ) { distance = get_euclidean(p->nodes[i].weight, in.input, p->indim); if( first || distance < mindistance ) { mindistance = distance; best = i; first = 0; } } } return(best); } /* determines whether nodes x and y are neighbors on a distance <= D. Network dimension is n. */ int are_neighbors( x, y, n, D) struct node x, y; int n, D; { int i; int distance = 0; for( i=0; i y.pos[i] ) distance += x.pos[i] - y.pos[i]; else distance += y.pos[i] - x.pos[i]; return(distance <= D); } /* Update the neighborhood of node "best", according to input "in" and learning rate "eta". Neighborhood is of size "D". */ void update_neighborhood(p, best, D, in, eta) struct network *p; int best, D; struct input_s in; double eta; { int i, j; int distance; for( i=0; itotalnodes; i++) if( are_neighbors(p->nodes[i], p->nodes[best], p->netdim, D) ) for( j=0; jindim; j++) p->nodes[i].weight[j] += eta * (in.input[j] - p->nodes[i].weight[j]); } /* The SOM training algorithm. The #iterations and learning rate (eta) are linearly decreased for each decrease of the neighborhood. For #iterations, and for each input (in that iteration), the best match is found, and the neighborhood is updated. */ void trainnetwork(p) struct network *p; { int D=p->Dstart; int iter; int iter_interval = p->iter_end - p->iter_start; double eta, eta_interval, fraction; int i, j, best; eta_interval = p->eta_end - p->eta_start; if( p->Dstart != p->Dend ) fraction = 1 / (double)(p->Dstart - p->Dend); else fraction = 0; while( D >= p->Dend ) { iter = p->iter_start + (p->Dstart - D) * fraction * iter_interval; eta = p->eta_start + (p->Dstart - D) * fraction * eta_interval; for( i=0; iconscience ) clear_hits(p); for( j=0; jnum_inputs; j++) { best = find_best_match(p, p->inputs[j]); update_neighborhood(p, best, D, p->inputs[j], eta); p->nodes[best].num_hits++; } } printf("%d %d %lf\n",D,iter,eta); D--; } } /* A recursive function for insertion of node positions in the network. Each inserted node will have the heading "heading", which is long "num_head". A few dimensions remain to be inserted, and are kept in "other_dim", with length "num_other". The insertion starts at point "start" in the linear array of nodes. */ void insert(p, heading, other_dim, num_head, num_other, start) struct network *p; int *heading, *other_dim; int num_head, num_other, start; { int i, len, size; if( num_other == 0 ) for( i=0; inodes[start].pos[i] = heading[i]; else { len = 1; for( i=1; iindim*sizeof(double)); max = (double *)malloc(p->indim*sizeof(double)); if(min == NULL || max == NULL) { deallocate_network(p); printf("Out of memory\n"); _exit(0); } insert(p, p->num_nodes, p->num_nodes, 0, p->netdim, 0); /* For each input dimension, find the min. and max. value, and assign all weights from that input node between those two values. */ for(i=0; iindim; i++) { min[i] = max[i] = p->inputs[0].input[i]; for( j=0; jnum_inputs; j++) if( p->inputs[j].input[i] > max[i] ) max[i] = p->inputs[j].input[i]; else if( p->inputs[j].input[i] < min[i] ) min[i] = p->inputs[j].input[i]; /* Otherwise, the get_rand would divide by zero */ if( min[i] == max[i] ) max[i] = min[i] * 1.1; } for( i=0; itotalnodes; i++) for( j=0; jindim; j++) p->nodes[i].weight[j] = get_rand(min[j], max[j]); } /* Allocates the structure network, initializes all pointers to NULL, and all integers to 0 */ struct network *getnetwork() { struct network *p; p = (struct network *)malloc(sizeof(struct network)); if( p == NULL ) return(NULL); p->num_nodes = NULL; p->nodes = NULL; p->inputs = NULL; p->indim = p->netdim = p->totalnodes = p->num_inputs = 0; p->Dstart = p->Dend = p->iter_start = p->iter_end = 0; p->eta_start = p->eta_end = 0.0; } /* Deallocates all already allocated arrays and structures in the network */ void deallocate_network(p) struct network *p; { int i; if( p->num_nodes ) free( p->num_nodes ); if( p->nodes ) { for( i=0; itotalnodes; i++) { if( p->nodes[i].weight ) free( p->nodes[i].weight ); if( p->nodes[i].pos ) free( p->nodes[i].pos); } free( p->nodes); } if( p->inputs ) { for( i=0; inum_inputs; i++) if( p->inputs[i].input ) free( p->inputs[i].input ); free( p->inputs); } free(p); } /* If in == NULL, the user will be interactively queried for all the values. One of the values: the weights file, will give pout its value. */ struct network *readinput(in, data, pout, fancyprint) FILE *in, *data; FILE **pout; int *fancyprint; { struct network *p = NULL; if( in ) p = readfromfile( in, data, fancyprint); else p = readfromkeyboard(pout, fancyprint); return(p); } /* Prompts user for parameters of the machine, as well as names of data and output files. Allocates the structure network and places all values of interest in it. Returns the pointer towards the structure, and opens the weights file for output and places it's handler in pout. */ struct network *readfromkeyboard(pout, fancyprint) FILE **pout; int *fancyprint; { char datafile[50], weightfile[50], fancy[50]; FILE *data; int i, j, totalnodes; struct network *p = NULL; /* Allocate the structure network */ if( (p = getnetwork()) == NULL) return(NULL); printf("Enter the name of the training data file: "); scanf("%s", datafile); printf("Enter the name of the output weight file: "); scanf("%s", weightfile); openfile( datafile, &data, READFILE); openfile( weightfile, pout, WRITEFILE); /* Get the # of dimensions, allocate storage for num_nodes */ printf("Enter the number of input dimensions: "); scanf("%d", &p->indim); printf("Enter the number of topological dimensions: "); scanf("%d", &p->netdim); if( (p->num_nodes = (int *)malloc(p->netdim * sizeof(int))) == NULL) { deallocate_network(p); return(NULL); } /* Get the # of nodes per dimension */ totalnodes = 1; for( i=0; inetdim; i++) { printf("Enter the number of nodes in dimension %d: ",i); scanf("%d", p->num_nodes + i); totalnodes *= p->num_nodes[i]; } p->totalnodes = totalnodes; /* Allocate the nodes, and their internal structures */ p->nodes = (struct node *)malloc(totalnodes * sizeof(struct node)); if( p->nodes == NULL ) { deallocate_network(p); return(NULL); } for( i=0; inodes[i].weight = NULL; p->nodes[i].pos = NULL; p->nodes[i].num_hits = 0; } for( i=0; inodes[i].weight = (double *)malloc(p->indim * sizeof(double)); p->nodes[i].pos = (int *)malloc(p->netdim * sizeof(int)); if( p->nodes[i].weight == NULL || p->nodes[i].pos == NULL ) { deallocate_network(p); return(NULL); } } /* Get the training parameters */ printf("Enter the starting size of neighborhood: "); scanf("%d",&p->Dstart); printf("Enter the number of iterations at start: "); scanf("%d",&p->iter_start); printf("Enter the learning rate at start: "); scanf("%lf",&p->eta_start); printf("Enter the final size of neighborhood: "); scanf("%d",&p->Dend); printf("Enter the number of iterations at end: "); scanf("%d",&p->iter_end); printf("Enter the learning rate at end: "); scanf("%lf",&p->eta_end); /* Get number of inputs, allocate the input storage */ printf("Enter the number of training samples: "); scanf("%d", &p->num_inputs); p->inputs = (struct input_s *)malloc(p->num_inputs*sizeof(struct input_s)); if( p->inputs == NULL ) { deallocate_network(p); return(NULL); } for( i=0; inum_inputs; i++) p->inputs[i].input = NULL; for( i=0; inum_inputs; i++ ) { p->inputs[i].input = (double *)malloc(p->indim * sizeof(double)); if( p->inputs[i].input == NULL ) { deallocate_network(p); return(NULL); } } /* Read the inputs from the data file */ for( i=0; inum_inputs; i++) { for( j=0; jindim; j++) fscanf(data, "%lf ", p->inputs[i].input + j); fscanf(data, "\n"); } printf("Print out network weights in topological order (y/n): "); scanf("%s",fancy); if(fancy[0] == 'y' || fancy[0] == 'Y') *fancyprint = 1; else *fancyprint = 0; return(p); } /* Reads the input files and initializes the structure networs. Places all values of interest in the structure. If one of the arrays in the structure cannot be allocated, it deallocates the whole structure and returns NULL */ struct network *readfromfile(in, data, fancyprint) FILE *in, *data; int *fancyprint; { struct network *p = NULL; int i, j; int totalnodes; char c; /* Allocate the structure network */ if( (p = getnetwork()) == NULL) return(NULL); /* Get the # of dimensions */ fscanf(in, "%d %d\n", &p->indim, &p->netdim); if( (p->num_nodes = (int *)malloc(p->netdim * sizeof(int))) == NULL) { deallocate_network(p); return(NULL); } /* Get the # of nodes per dimension */ totalnodes = 1; for( i=0; inetdim; i++) { fscanf(in, "%d ", p->num_nodes + i); totalnodes *= p->num_nodes[i]; } p->totalnodes = totalnodes; /* Allocate the nodes, and their internal structures */ p->nodes = (struct node *)malloc(totalnodes * sizeof(struct node)); if( p->nodes == NULL ) { deallocate_network(p); return(NULL); } for( i=0; inodes[i].weight = NULL; p->nodes[i].pos = NULL; p->nodes[i].num_hits = 0; } for( i=0; inodes[i].weight = (double *)malloc(p->indim * sizeof(double)); p->nodes[i].pos = (int *)malloc(p->netdim * sizeof(int)); if( p->nodes[i].weight == NULL || p->nodes[i].pos == NULL ) { deallocate_network(p); return(NULL); } } /* Get the training parameters */ fscanf(in, "%d %d %lf\n",&p->Dstart, &p->iter_start, &p->eta_start); fscanf(in, "%d %d %lf\n",&p->Dend, &p->iter_end, &p->eta_end); /* Get number of inputs, allocate the input storage */ fscanf(in, "%d\n", &p->num_inputs); p->inputs = (struct input_s *)malloc(p->num_inputs*sizeof(struct input_s)); if( p->inputs == NULL ) { deallocate_network(p); return(NULL); } for( i=0; inum_inputs; i++) p->inputs[i].input = NULL; for( i=0; inum_inputs; i++ ) { p->inputs[i].input = (double *)malloc(p->indim * sizeof(double)); if( p->inputs[i].input == NULL ) { deallocate_network(p); return(NULL); } } /* Read the inputs from the data file */ for( i=0; inum_inputs; i++) { for( j=0; jindim; j++) fscanf(data, "%lf ", p->inputs[i].input + j); fscanf(data, "\n"); } /* Get the conscience parameter */ fscanf(in, "%c\n", &c); p->conscience = (c == 'y' || c == 'Y'); /* Get the fancyprint parameter */ fscanf(in, "%c\n", &c); if (c == 'y' || c == 'Y') *fancyprint = 1; else *fancyprint = 0; return(p); } /* Writes out the weights in the SOM in a organized manner. Graphical representation is only for the bottom two dimensions. */ void fancy_write(p) struct network *p; { int repetitions; int n, n1, n2, i, j, k, l; int node; /* n nodes' weights will be printed out in a matrix shape */ n1 = p->num_nodes[p->netdim-1]; if (p->netdim > 1) n2 = p->num_nodes[p->netdim-2]; else n2 = 1; n = n1*n2; repetitions = p->totalnodes / n; /* Write out the matrices */ for( i=0; inetdim > 2) { printf("("); for(j=0; jnetdim-3; j++) printf("%d,",p->nodes[i*n].pos[j]); printf("%d",p->nodes[i*n].pos[p->netdim-3]); printf(")\n"); } /* Write out one matrix */ for(j=0; jindim-1; l++) printf("%l4.2f,", p->nodes[node].weight[l]); printf("%l4.2f", p->nodes[node].weight[p->indim-1]); printf(") "); } printf("\n"); } printf("\n"); } } /* Writes all the outputs to the file "out". First the structural parameters of the machine are written, followed by all the nodes in format (position, weight) */ void writeoutput(p, out, fancyprint) struct network *p; FILE *out; int fancyprint; { int i,j; /* Print the # of dimensions */ fprintf(out, "%d %d\n", p->indim, p->netdim); /* Write the # of nodes per dimension */ for( i=0; inetdim; i++) fprintf(out, "%d ", p->num_nodes[i]); fprintf(out, "\n"); /* For all nodes, write their position, followed by their weight */ for( i=0; itotalnodes; i++) { for( j=0; jnetdim; j++) fprintf(out, "%d ", p->nodes[i].pos[j]); for( j=0; jindim; j++) fprintf(out, "%lf ", p->nodes[i].weight[j]); fprintf(out, "\n"); } if (fancyprint) fancy_write(p); } /* The kohonen SOM. Opens all necessary files, reads inputs, trains the network, and writes the derived weights. */ void kohonen(inputfile, datafile, weightfile) char *inputfile, *datafile, *weightfile; { FILE *in, *data, *out; struct network *p; long t1, t2; int fancyprint; t1 = clock(); in = out = data = NULL; openfile( inputfile, &in, READFILE); openfile( datafile, &data, READFILE); openfile( weightfile, &out, WRITEFILE); if( (p = readinput(in, data, &out, &fancyprint)) == NULL) { printf("Not enough memory\n"); _exit(0); } initrandom(); initnetwork(p); trainnetwork(p); writeoutput(p,out, fancyprint); deallocate_network(p); t2 = clock(); printf("Elapsed time %ld milliseconds\n",(t2-t1)/1000); }