#include #include #include /* Maximum random number used for the purposes generating a double-precision floating point random number */ #define MAXRAND (1<<16) #define NOISE_OFF2ON 0.05 #define NOISE_ON2OFF 0.05 /* Storage for the machine. All data relevant for the machine are kept in this structure */ struct machine { /* Machine data */ double ***weights; /* weights[layer i][node in layer i] [node in layer i+1] */ double ***internal; /* intra-layer weights. internal[layer i][from node1 in layer i] [to node2 in layer i] The matrix is symmetrical across it's second and third dimensions! Therefore, only those weights where node1>node2 are stored. The symmetric weight is equal to that */ int *internal_exist; /* Do intralayer connections exist for each layer: internal_exist[layer] */ int *num_nodes; /* Number of nodes in each layer: num_nodes[layer] */ int layers; /* Number of layers in the machine. Should be 3 */ int **states; /* State of the machine: states[layer][node] */ int *clamped; /* Is the i-th layer clamped, i.e. fixed */ /* Input storage */ int num_inputs; /* Number of inputs */ int **inputs; /* The inputs the machine will be trained on: inputs[input#][dimension] */ int **outputs; /* The outputs that correspond to the training inputs. outputs[output#][dimension] /* Training algorithm data */ int max_iter; /* Maximum iterations */ double eta; /* learning rate */ double converge; /* Convergence criterion. */ double t0, beta, tmin; /* annealing start and end temperatures */ /* Mean field annealing data */ int mean_field; /* Is the annealing mean_field? */ double **mean_states; /* mean values of states */ double **pon, **poff; /* prob. of states being on or off */ /* Training algorithm probabilities */ /* probabilities gathered in phases 1 and 2 (p1 and p2) on the weights and the internal weights */ double ***p1weights, ***p1internal, ***p2weights, ***p2internal; }; /* common functions */ int get_1bit(), allocate_weights(); double get_rand(), energy_change(); struct machine *get_machine(); void init_random(), deallocate_machine(); void flip_state(), noisy_flip_state(), select_node(), init_state(); int get_num_changes(), accept_change(); void anneal(), anneal_1_step(); void mean_field_anneal(); double get_mean_input(); void init_exp_lookup(); double exp_lookup(); /* Common functions for training and getting output from a boltzmann machine */ #define ON (1) #define OFF (-1) /* Initialize random generator with number of microseconds since January 1, 1970, which is almost a random value in itself */ void initrandom() { struct timeval tp; struct timezone tzp; if( gettimeofday(&tp,&tzp) == -1 ) { printf("Could not initialize random number generator\n"); _exit(0); } srandom((int)tp.tv_usec); } /* Returns a double precision random number between low and high */ double get_rand(low, high) double low, high; { double x = random() % MAXRAND / (double)MAXRAND; return( x*(high-low) + low ); } /* Returns a 1-bit number */ int get_1bit() { return( random() % 2 ); } /* deallocates a weights matrix */ deallocate_weight_matrix(m,p) double ***m; struct machine *p; { int i,j; if( m ) { for( i=0; ilayers-1; i++ ) { if( m[i] ) { for( j=0; jnum_nodes[i]; j++) if( m[i][j] ) free( m[i][j] ); free(m[i]); } } free(m); } } /* deallocates an internal weights matrix */ deallocate_internal_weight_matrix(m,p) double ***m; struct machine *p; { int i,j; if( m ) { for( i=0; ilayers; i++ ) { if( m[i] ) { for( j=0; jnum_nodes[i]; j++) if( m[i][j] ) free( m[i][j] ); free(m[i]); } } free(m); } } /* deallocates one of the structures associated with mean field annealing */ deallocate_mean_field( m, p) double **m; struct machine *p; { int i; if( m ) { for( i=0; ilayers; i++ ) if( m[i] ) free( m[i] ); free(m); } } /* deallocates the structure and all the allocated arrays within */ void deallocate_machine(p) struct machine *p; { int i,j; if( p->num_nodes ) free(p->num_nodes); deallocate_weight_matrix(p->weights, p); if( p->internal_exist ) free( p->internal_exist ); deallocate_internal_weight_matrix(p->internal, p); if( p->states ) { for( i=0; ilayers; i++ ) if( p->states[i] ) free( p->states[i] ); free(p->states); } if( p->clamped ) free( p->clamped ); if( p->inputs ) { for( i=0; inum_inputs; i++ ) if( p->inputs[i] ) free(p->inputs[i]); free(p->inputs); } if( p->outputs ) { for( i=0; inum_inputs; i++ ) if( p->outputs[i] ) free(p->outputs[i]); free(p->outputs); } deallocate_weight_matrix(p->p1weights, p); deallocate_weight_matrix(p->p2weights, p); deallocate_internal_weight_matrix(p->p1internal, p); deallocate_internal_weight_matrix(p->p2internal, p); deallocate_mean_field( p->mean_states, p ); deallocate_mean_field( p->pon, p ); deallocate_mean_field( p->poff, p); free(p); } /* Allocates a structure for the machine, and initializes all dynamic strutures to NULL */ struct machine *get_machine() { struct machine *p; p = (struct machine *)malloc(sizeof(struct machine)); p->weights = p->internal = NULL; p->internal_exist = NULL; p->num_nodes = NULL; p->clamped = NULL; p->inputs = p->outputs = NULL; p->p1weights = p->p2weights = p->p1internal = p->p2internal = NULL; p->mean_states = p->pon = p->poff = NULL; return(p); } /* Allocate weight matrix (according to data in *p), and return 1 if successfull. If not, return 0. This function will be used for allocating the weight matrix, as well as the 2 probability matrices associated with the weight matrix */ allocate_weight_matrix(pw,p) double ****pw; struct machine *p; { double ***w; int i,j; /* Allocate weights matrix */ *pw = (double ***)malloc((p->layers-1)*sizeof(double **)); if( *pw == NULL ) return(0); w = *pw; for( i=0; ilayers-1; i++ ) w[i] = NULL; /* So that deallocation of partially allocated weights is possible */ for( i=0; ilayers-1; i++ ) { w[i] = (double **)malloc(p->num_nodes[i]*sizeof(double *)); if( w[i] == NULL ) return(0); for( j=0; jnum_nodes[i]; j++ ) w[i][j] = NULL; for( j=0; jnum_nodes[i]; j++ ) { w[i][j] = (double *)malloc(p->num_nodes[i+1]*sizeof(double)); if( w[i][j] == NULL ) return(0); } } /* Success */ return(1); } /* Allocate internal weight matrix (according to data in *p), and return 1 if successfull. If not, return 0. This function will be used for allocating the internal matrix, as well as the 2 probability matrices associated with the internal matrix */ allocate_internal_weight_matrix(pw,p) double ****pw; struct machine *p; { double ***w; int i,j; /* Allocate internal weights matrix */ *pw = (double ***)malloc(p->layers*sizeof(double **)); if( *pw == NULL ) return(0); w = *pw; for( i=0; ilayers; i++ ) w[i] = NULL; /* So that deallocation of partially allocated weights is possible */ for( i=0; ilayers; i++ ) if( p->internal_exist[i] ) /* allocated only if needed */ { w[i] = (double **)malloc(p->num_nodes[i]*sizeof(double *)); if( w[i] == NULL ) return(0); for( j=0; jnum_nodes[i]; j++ ) w[i][j] = NULL; for( j=0; jnum_nodes[i]; j++ ) { w[i][j] = (double *)malloc(p->num_nodes[i]*sizeof(double)); if( w[i][j] == NULL ) return(0); } } /* Success */ return(1); } /* Allocates the 3 arrays associated with the mean field annealing */ allocate_mean_field(p) struct machine *p; { int i; /* Allocate the mean states matrix */ p->mean_states = (double **)malloc(p->layers*sizeof(double *)); if( p->mean_states == NULL ) return(0); for( i=0; ilayers; i++ ) p->mean_states[i] = NULL; for( i=0; ilayers; i++ ) { p->mean_states[i] = (double *)malloc(p->num_nodes[i]*sizeof(double)); if( p->mean_states[i] == NULL ) return(0); } /* Allocate the pon matrix */ p->pon = (double **)malloc(p->layers*sizeof(double *)); if( p->pon == NULL ) return(0); for( i=0; ilayers; i++ ) p->pon[i] = NULL; for( i=0; ilayers; i++ ) { p->pon[i] = (double *)malloc(p->num_nodes[i]*sizeof(double)); if( p->pon[i] == NULL ) return(0); } /* Allocate the poff matrix */ p->poff = (double **)malloc(p->layers*sizeof(double *)); if( p->poff == NULL ) return(0); for( i=0; ilayers; i++ ) p->poff[i] = NULL; for( i=0; ilayers; i++ ) { p->poff[i] = (double *)malloc(p->num_nodes[i]*sizeof(double)); if( p->poff[i] == NULL ) return(0); } /* Success */ return(1); } /* Initializes the arrays in the structure 'machine'. All it's parameters are already in the structure, such as: #layers, #nodes in each layer and the internal_exist array. If any allocation does not succeed, returns 0 (false) */ int allocate_weights(p) struct machine *p; { int i,j,k; /* Allocate the weight matrix */ if( allocate_weight_matrix(&(p->weights), p) == 0) return(0); /* Allocate the internal weights matrix */ if( allocate_internal_weight_matrix(&(p->internal), p) == 0) return(0); /* Allocate the states matrix */ p->states = (int **)malloc(p->layers*sizeof(int *)); if( p->states == NULL ) return(0); for( i=0; ilayers; i++ ) p->states[i] = NULL; for( i=0; ilayers; i++ ) { p->states[i] = (int *)malloc(p->num_nodes[i]*sizeof(int)); if( p->states[i] == NULL ) return(0); } /* Allocate the 'clamped' array */ p->clamped = (int *)malloc(p->layers * sizeof(int)); if( p->clamped == NULL ) return(0); /* Allocate the inputs matrix */ p->inputs = (int **)malloc(p->num_inputs*sizeof(int *)); if( p->inputs == NULL ) return(0); for( i=0; inum_inputs; i++ ) p->inputs[i] = NULL; for( i=0; inum_inputs; i++ ) { p->inputs[i] = (int *)malloc(p->num_nodes[0]*sizeof(int)); if( p->inputs[i] == NULL ) return(0); } /* Allocate the outputs matrix. There are as many outputs as there are inputs in the machine */ p->outputs = (int **)malloc(p->num_inputs*sizeof(int *)); if( p->outputs == NULL ) return(0); for( i=0; inum_inputs; i++ ) p->outputs[i] = NULL; for( i=0; inum_inputs; i++ ) { p->outputs[i] = (int *)malloc(p->num_nodes[p->layers-1]*sizeof(int)); if( p->outputs[i] == NULL ) return(0); } /* Allocate probability matrices */ if( allocate_weight_matrix(&(p->p1weights),p) == 0 ) return(0); if( allocate_weight_matrix(&(p->p2weights),p) == 0 ) return(0); if( allocate_internal_weight_matrix(&(p->p1internal),p) == 0 ) return(0); if( allocate_internal_weight_matrix(&(p->p2internal),p) == 0 ) return(0); /* Success! */ return(1); } /* flips the state of node j in layer i */ void flip_state(p,i,j) struct machine *p; int i,j; { p->states[i][j] = (p->states[i][j] == ON) ? OFF : ON; } /* Flips the state of p->states[i][j] with a certain probability. probabilities defined in the header file */ void noisy_flip_state(p,i,j) struct machine *p; int i,j; { if( p->states[i][j] == ON && get_rand(0.0, 1.0) < NOISE_ON2OFF ) p->states[i][j] = OFF; else if( p->states[i][j] == OFF && get_rand(0.0, 1.0) < NOISE_OFF2ON ) p->states[i][j] = ON; } /* randomly selects one node. Selections should be based on clamped inputs, but that is the second stage. */ void select_node(p, layer, node) struct machine *p; int *layer, *node; { int sum=0, i; int num; int selected = 0; /* Randomly select a number between 0 and the total number of nodes, not counting the clamped nodes */ for( i=0; ilayers; i++) sum += (1-p->clamped[i])*p->num_nodes[i]; /* Keep trying until a node that is not in one of the clamped layers is selected */ num = (int) get_rand(0.0, (double)(sum) - 0.001); /* find the corresponding layer and node. Nodes are 'numbered' starting from 0 (first node in first layer), to sum-1 (last node in last layer. Only unclamped nodes are 'valid' */ for( i=0; ilayers && num >= 0 ; i++ ) if( !p->clamped[i] ) { if( num < p->num_nodes[i] ) { *layer = i; *node = num; } num -= p->num_nodes[i]; } } /* Compute the change of energy if states[layer][node] were flipped */ double energy_change(p, layer, node) struct machine *p; int layer, node; { int old,new; int i,j; double sum = 0; /* When state is OFF or ON, the change of energy is (new_state - old_state) * sum(w*x, for all weights leading into the node, except for the self-excitation weight) for an energy defined as: -sum[w(i,j)*x(i)*x(j)] */ /* Get the old state, and flip it */ old = p->states[layer][node]; flip_state(p, layer, node); new = p->states[layer][node]; /* 3 types of weights lead into node (layer,node): - weights from (layer-1) to (layer), if such exist - weights from (layer) to (layer+1), if such exist - weights from (layer) to (layer), if such exist */ /* Connections from prev. layer */ if( layer-1 >= 0 ) for( j=0; jnum_nodes[layer-1]; j++ ) sum += p->weights[layer-1][j][node] * p->states[layer-1][j]; /* Connections to next layer */ if( layer < p->layers-1 ) for( j=0; jnum_nodes[layer+1]; j++) sum += p->weights[layer][node][j] * p->states[layer+1][j]; /* Connections in the same layer. The internal connections are stored in internal[layer][a][b] iff a>b. If b>a, then they are in internal[layer][b][a]. The reason is that internal is a symmetric matrix */ if( p->internal_exist[layer] ) { for( j=0; jnum_nodes[layer]; j++ ) if( j < node ) sum += p->internal[layer][node][j] * p->states[layer][j]; else if( j > node ) sum += p->internal[layer][j][node] * p->states[layer][j]; } /* Restore old state */ p->states[layer][node] = old; /* printf("Sum=%lf\n",sum); printf("New %d Old %d\n",new,old); printf("New-old %d\n",new-old); printf("Sum of weights and states for [%d][%d] is %lf\n",layer,node,sum); printf("Returning %lf\n",(new-old)*sum); */ return((new-old)*sum); } /* Initialize the state of the machine by setting all nodes to random states */ void initstate(p) struct machine *p; { int i,j; /* All nodes in state ON */ for( i=1; ilayers; i++) if( !p->clamped[i] ) for( j=0; jnum_nodes[i]; j++) p->states[i][j] = ON; /* Randomly flip state of all nodes in layer diff. than input */ for( i=1; ilayers; i++) if( !p->clamped[i] ) for( j=0; jnum_nodes[i]; j++) if( get_1bit() ) flip_state(p,i,j); } /* Which number of energy changes does the machine perform for a given temperature */ int get_num_changes(t,p) double t; struct machine *p; { double num = p->t0 / t; int i; double total=0; for( i=0; ilayers; i++) total += (1-p->clamped[i]) * num * p->num_nodes[i]; return((int)total); } /* Is a change of dE acceptable at temperature t? */ accept_change(dE,t) double dE,t; { double prob, rand; /* Always accept changes that increase the energy */ if( dE > 0 ) return( 1 ); /* If the change decreases the energy, accept it with a certain probability */ prob = 1 / (1 + exp_lookup(-dE/t) ); rand = get_rand(0.0, 1.0); return( rand < prob ); } /* Simulated annealing over machine p. The temperature is constantly decreasing over a set of values. For each temperature a set of state changes are performed on randomly selected (non clamped!) nodes. Each state change is selected with a certain probability. If it decreases the total energy, it is selected, if not it is selected with a probability that is a function of the temperature */ void anneal(p) struct machine *p; { double temp = p->t0; int i, n; int node, layer; double dE; /* Vary the temperature */ while( temp >= p->tmin ) { /* For each temperature, perform a number of operations which is a function of the temperature of annealing. */ n = get_num_changes(temp, p); for( i=0; ibeta; } } /* Change the value for only 1 node, on temperature t. On this temperature accept the change if it increases the energy, and accept it with some probability, if it decreases it. Probability depends on t */ void anneal_1_step(p,t) struct machine *p; double t; { int node, layer; double dE; select_node(p, &layer, &node); dE = energy_change(p, layer, node); if( accept_change(dE, t) ) flip_state(p, layer, node); } /* Lookup table for values of exp(x). x starts from 0 and increases in steps of 0.02 */ double evalues[750]; /* Initialize lookup table */ void init_exp_lookup() { double n; int i; for( i=0; i<750; i++ ) { n = (double) i / 50; evalues[i] = exp(n); } } /* Lookup the value for exp(n) */ double exp_lookup(n) double n; { int i; i = n * 50; return( (i>499) ? evalues[499] : evalues[i] ); } /* Get the net input to node [layer][node]. Net input is calculated as a function of the mean states of the surrounding nodes */ double get_mean_input(p, layer, node) struct machine *p; int layer, node; { double sum = 0; int j; /* Connections from prev. layer */ if( layer-1 >= 0 ) for( j=0; jnum_nodes[layer-1]; j++ ) sum += p->weights[layer-1][j][node] * p->mean_states[layer-1][j]; /* Connections to next layer */ if( layer < p->layers-1 ) for( j=0; jnum_nodes[layer+1]; j++) sum += p->weights[layer][node][j] * p->mean_states[layer+1][j]; /* Connections in the same layer */ if( p->internal_exist[layer] ) { for( j=0; jnum_nodes[layer]; j++ ) if( j < node ) sum += p->internal[layer][node][j] * p->mean_states[layer][j]; else if( j > node ) sum += p->internal[layer][j][node] * p->mean_states[layer][j]; } return(sum); } /* Gets mean state of a node, depending on the sum and temperature. Is a mapping of the tanh(sum/temp) from the (-1,+1) to the (OFF,ON) interval. */ double get_mean_state( sum, temp) double sum, temp; { double x; x = tanh(sum/temp); /* Compute tanh */ /* x = (x+1); /* Map to (0,2) x = x * (ON-OFF) / 2.0; /* Map to (0,(ON-OFF)) x = x + OFF; /* Map to (OFF,ON) */ return(x); } /* mean field annealing algorithm. Perform mean field annealing for each temperature. for now: just one pass to determine the average values of the nodes */ void mean_field_anneal(p) struct machine *p; { double temp = p->t0; double sum; int layer, node, i, j; int repetitions=2; /* assign real values to average values */ for( i=0; ilayers; i++) for( j=0; jnum_nodes[i]; j++) p->mean_states[i][j] = p->states[i][j]; /* For all temperatures */ while( temp >= p->tmin ) { /* repeat a few times */ for( i=0; ilayers; layer++) if( !p->clamped[layer] ) for( node=0; nodenum_nodes[layer]; node++) { sum = get_mean_input(p, layer, node); p->mean_states[layer][node] = get_mean_state(sum, temp); } temp *= p->beta; } /* At the end of the "annealing", find the probabilities, and update the node states accordingly! */ for( layer=0; layerlayers; layer++) for( node=0; nodenum_nodes[layer]; node++) { p->pon[layer][node] = (p->mean_states[layer][node] - OFF); p->pon[layer][node] /= (double) (ON-OFF); p->poff[layer][node] = 1 - p->pon[layer][node]; if( p->pon[layer][node] > p->poff[layer][node] ) p->states[layer][node] = ON; else p->states[layer][node] = OFF; } } void boltzmann(), trainboltzmann(); void initrandom(), initweights(); void openfiles(), writeoutput(); struct machine *readinput(), *readfromfile(), *readfromkeyboard(); void phase1(), phase2(); void empty_stats(), noisyclamp(), update_counters(), normalize_counters(); void update_mean_anneal_counters(), mean_field_anneal(); double get_mean_input(); double modify_weights(); /* argv[1] - input structure file argv[2] - input data file argv[3] - output weights file argv[4] - (optional) input weights file */ void main(argc, argv) int argc; char *argv[]; { if( argc == 1 ) boltzmann(NULL, NULL, NULL, NULL); else { if( argv[1][0] == '-' && argv[1][1] == 'c' ) if( argc == 3) boltzmann(NULL, NULL, NULL, argv[2]); else boltzmann(argv[2],argv[3],argv[4],argv[5]); else boltzmann(argv[1],argv[2],argv[3],NULL); } } /* Initialize both the inter-layer and the intra-layer weights. The existence of the intra-layer weights is controlled by an existence matrix */ void initweights(p) struct machine *p; { int i,j,k; /* Initialize inter-layer weights */ for( i=0; ilayers-1; i++ ) for( j=0; jnum_nodes[i]; j++ ) for( k=0; knum_nodes[i+1]; k++ ) p->weights[i][j][k] = get_rand(-1.0,1.0); /* Initialize internal layer weights */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i]; j++ ) for( k=0; kinternal[i][j][k] = get_rand(-1.0,1.0); } void empty_counters(p, weights, internal) struct machine *p; double ***weights, ***internal; { int i,j,k; /* The weights */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i] ; j++ ) for( k=0; knum_nodes[i+1]; k++ ) weights[i][j][k]=0; /* The internal connections, if they exist */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i] ; j++ ) for( k=0; knum_nodes[layer]; for( i=0; istates[layer][i] = source[i]; noisy_flip_state(p,layer,i); } } /* Update the probability counters */ void update_counters(p, pweights, pinternal ) struct machine *p; double ***pweights, ***pinternal; { int i,j,k; /* First check the states on each inter-layer connection */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i]; j++) for( k=0; knum_nodes[i+1]; k++) if( p->states[i][j] == p->states[i+1][k] ) /* If the states are the same */ pweights[i][j][k]++; /* Check the states on each intra-layer connection. */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i]; j++ ) for( k=0; kstates[i][j] == p->states[i][k] ) pinternal[i][j][k]++; } /* Normalize the probability counters, by dividing by n */ void normalize_counters(p, pweights, pinternal, n ) struct machine *p; double ***pweights, ***pinternal; int n; { int i,j,k; /* First normalize the prob. on the inter-layer connections */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i]; j++) for( k=0; knum_nodes[i+1]; k++) pweights[i][j][k] /= n; /* Normalize the prob. on the intra-layer connections. */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i]; j++ ) for( k=0; kpon; double **poff = p->poff; /* First find the probabilities on the inter-layer connections */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i]; j++) for( k=0; knum_nodes[i+1]; k++) pweights[i][j][k] += pon[i][j] * pon[i+1][k] + poff[i][j] * poff[i+1][k]; /* Find the prob. on the intra-layer connections. */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i]; j++ ) for( k=0; klayers; i++) { printf("Layer %d: ",i); for( j=0; jnum_nodes[i]; j++) printf("%2d ",p->states[i][j]); printf("\n"); } } /* Phase 1: clamp all inputs and outputs. Perform sim. annealing for input# n. Collect statistics for the input */ void phase1(p,n) struct machine *p; int n; { int i,j,k; int inlayer = 0; int outlayer = p->layers-1; int num_updates = 10; int total_updates; /* Initialize the prob. counters */ empty_counters(p, p->p1weights, p->p1internal); /* Clamp inputs and outputs */ initstate(p); p->clamped[inlayer] = p->clamped[outlayer] = 1; /* For the input pattern, put input & output pattern in machine, anneal, update probability counters */ noisyclamp(p->inputs[n], p, inlayer); noisyclamp(p->outputs[n], p, outlayer); if( p->mean_field ) { mean_field_anneal(p); update_mean_anneal_counters(p, p->p1weights, p->p1internal); } else { anneal(p); for( k=0; ktmin); update_counters(p, p->p1weights, p->p1internal); } } if( p->mean_field ) total_updates = 1; else total_updates = num_updates; normalize_counters(p, p->p1weights, p->p1internal, total_updates); /* Release ONLY outputs */ p->clamped[outlayer] = 0; } /* Phase 2: clamp all inputs. Perform sim. annealing for input# n. Collect statistics across the input */ void phase2(p,n) struct machine *p; int n; { int i,j,k; int inlayer = 0; int num_updates = 10; int total_updates; /* Initialize the probability counters */ empty_counters(p, p->p2weights, p->p2internal); /* (inputs already clamped from phase 1)*/ /* For each input pattern, put input pattern in machine (noisy), anneal and update probability counters */ initstate(p); if( p->mean_field ) { mean_field_anneal(p); update_mean_anneal_counters(p, p->p2weights, p->p2internal); } else { anneal(p); for( k=0; ktmin); update_counters(p, p->p2weights, p->p2internal); } } if( p->mean_field ) total_updates = 1; else total_updates = num_updates; normalize_counters(p, p->p2weights, p->p2internal, total_updates); /* Release the inputs */ p->clamped[inlayer] = 0; } /* Modify the weights based on the statistics */ double modify_weights(p) struct machine *p; { int i,j,k; int num = 0; double p1,p2; double sum=0; /* First modify the weights on each inter-layer connection */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i]; j++) for( k=0; knum_nodes[i+1]; k++) { if( (p1=p->p1weights[i][j][k]) != (p2=p->p2weights[i][j][k]) ) { p->weights[i][j][k] += p->eta * (p1 - p2); sum += (p1-p2)*(p1-p2); } num++; } /* Modify the weights on each intra-layer connection. */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i]; j++ ) for( k=0; kp1internal[i][j][k]) != (p2=p->p2internal[i][j][k]) ) { p->internal[i][j][k] += p->eta * (p1 - p2); sum += (p1-p2)*(p1-p2); } num++; } return(sum/num); } /* Training for each pattern */ double trainforpattern(p,n) struct machine *p; int n; { phase1(p,n); phase2(p,n); return( modify_weights(p) ); } /* The Boltzmann machine training algorithm */ void trainboltzmann(p) struct machine *p; { int change = 1, change2 = 1; int iter = 0; int i; double sum,oldsum; init_exp_lookup(); initstate(p); while( change+change2 && iter < p->max_iter ) { sum = 0; for( i=0; inum_inputs; i++) sum += trainforpattern(p,i); sum /= p->num_inputs; change2 = change; if( sum < p->converge ) change = 0; else change = 1; printf("Iteration number %d, maximum %d, changes %lf\n",iter+1, p->max_iter, sum); iter++; } } /* Opens the files. If an error occurs in opening the files, the program exits */ void openfiles(infile, outfile, in, out) char *infile, *outfile; FILE **in, **out; { if( infile ) if( (*in = fopen(infile,"rt")) == NULL ) { printf("Cannot open file %s for reading\n",infile); _exit(0); } if( outfile ) if( (*out = fopen(outfile,"wt")) == NULL ) { printf("Cannot open file %s for writing\n",outfile); _exit(0); } } /* If in == NULL, the user will be interactively queried for all the values. One of the values: the weights file, will give pout its value. */ struct machine *readinput(in, pout, data) FILE *in, *data; FILE **pout; { struct machine *p=NULL; if( in ) p = readfromfile(in, data); else p = readfromkeyboard(pout); return(p); } /* Prompts user for parameters of the machine, as well as names of data and output files. Allocates the structure machine and places all values of interest in it. Returns the pointer towards the structure, and opens the weights file for output and places it's handler in pout. */ struct machine *readfromkeyboard(pout) FILE **pout; { char datafile[50], weightfile[50]; FILE *data; int i, j; char c; struct machine *p; /* Initialize the machine */ if( (p=get_machine()) == NULL) return(p); printf("Enter the name of the training data file: "); scanf("%s", datafile); printf("Enter the name of the output weight file: "); scanf("%s", weightfile); openfiles(datafile, weightfile, &data, pout); /* Read parameters and allocate arrays. On unsuccesfull allocation, deallocate all and return a NULL pointer, signifying non-success */ printf("Enter the number of layers in the machine: "); scanf("%d",&p->layers); if( (p->num_nodes = (int *)malloc(p->layers * sizeof(int))) == NULL) { deallocate_machine(p); return(NULL); } /* Read number of nodes in each layer */ for( i=0; ilayers; i++) { printf("Enter number of nodes in layer %d: ",i); scanf("%d",p->num_nodes+i); } /* Read the existence matrix */ p->internal_exist = (int *)malloc(p->layers*sizeof(int)); if( p->internal_exist == NULL ) { deallocate_machine(p); return(NULL); } for( i=0; ilayers; i++ ) { printf("Is layer %d is exhaustively interconnected; 1-yes, 0-no: ", i); scanf("%d", p->internal_exist + i); } /* Read the training parameters */ printf("Enter the max. number of iterations: "); scanf("%d", &p->max_iter); printf("Enter the learning rate: "); scanf("%lf", &p->eta); printf("Enter the convergence criterion: "); scanf("%lf", &p->converge); printf("Enter the initial annealing temperature: "); scanf("%lf", &p->t0); printf("Enter the final annealing temperature: "); scanf("%lf", &p->tmin); printf("How much shall the temperature be modified between iterations: "); scanf("%lf", &p->beta); printf("Which annealing algorithm shall the machine use?\n"); printf("a - regular annealing; m - mean field annealing: "); scanf("\n%c", &c); if( c == 'm' ) p->mean_field = 1; else if( c == 'a' ) p->mean_field = 0; if( p->mean_field ) if( !allocate_mean_field(p) ) { deallocate_machine(p); return(NULL); } /* Read the # of inputs, then the inputs themselves */ printf("Enter the number of training samples: "); scanf("%d", &p->num_inputs); /* All structural parameters are known. Attempt to allocate weights in machine */ if( !allocate_weights(p) ) { deallocate_machine(p); return(NULL); } /* Now read all the inputs and the outputs*/ for( i=0; inum_inputs; i++ ) { for( j=0; jnum_nodes[0]; j++) fscanf(data, "%d ", p->inputs[i] + j); fscanf(data,"; "); for( j=0; jnum_nodes[p->layers-1]; j++) fscanf(data, "%d ", p->outputs[i] + j); fscanf(data, "\n"); } /* Mark all layers as unclamped */ for( i=0; ilayers; i++ ) p->clamped[i] = 0; return(p); } /* Reads the input file, and initializes the structure machine. Places all values of interest in the structure. If any one of the arrays in the structure cannot be allocated, it deallocates the whole structure and returns NULL */ struct machine *readfromfile(in, data) FILE *in, *data; { struct machine *p=NULL; int i,j; char c; /* Initialize the machine */ if( (p=get_machine()) == NULL) return(p); /* Read file and allocate arrays. On unsuccesfull allocation, deallocate all and return a NULL pointer, signifying non-success */ fscanf(in, "%d\n", &(p->layers)); if( (p->num_nodes = (int *)malloc(p->layers * sizeof(int))) == NULL) { deallocate_machine(p); return(NULL); } /* Read number of nodes in each layer */ for( i=0; ilayers; i++ ) fscanf(in, "%d ",p->num_nodes+i); fscanf(in,"\n"); /* Read the existence matrix */ p->internal_exist = (int *)malloc(p->layers*sizeof(int)); if( p->internal_exist == NULL ) { deallocate_machine(p); return(NULL); } for( i=0; ilayers; i++ ) fscanf(in, "%d ", p->internal_exist + i); fscanf(in,"\n"); /* Read the training parameters */ fscanf(in, "%d %lf %lf\n", &(p->max_iter), &(p->eta), &(p->converge) ); fscanf(in, "%lf %lf %lf %c\n",&(p->t0), &(p->beta), &(p->tmin),&c); if( c == 'm' ) p->mean_field = 1; else if( c == 'a' ) p->mean_field = 0; if( p->mean_field ) if( !allocate_mean_field(p) ) { deallocate_machine(p); return(NULL); } /* Read the number of inputs */ fscanf(in, "%d\n", &(p->num_inputs)); /* All structural parameters are known. Attempt to allocate weights in machine */ if( !allocate_weights(p) ) { deallocate_machine(p); return(NULL); } /* Now read all the inputs and the outputs*/ for( i=0; inum_inputs; i++ ) { for( j=0; jnum_nodes[0]; j++) fscanf(data, "%d ", p->inputs[i] + j); fscanf(data,"; "); for( j=0; jnum_nodes[p->layers-1]; j++) fscanf(data, "%d ", p->outputs[i] + j); fscanf(data, "\n"); } /* Mark all layers as unclamped */ for( i=0; ilayers; i++ ) p->clamped[i] = 0; return(p); } /* Reads the weights from the weight file, and places them into the weight matrices */ readweights(p,wf) struct machine *p; FILE *wf; { int i,j,k; int dump; /* The first few params are irrelevant when continuing with the training. */ fscanf(wf, "%d\n",&dump); for( i=0; ilayers; i++) fscanf(wf, "%d ",&dump); fscanf(wf, "\n"); /* The existence matrix */ for( i=0; ilayers; i++) fscanf(wf, "%d ", &dump); fscanf(wf, "\n"); /* The weights */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i] ; j++ ) for( k=0; knum_nodes[i+1]; k++ ) fscanf(wf, "%lf\n",&(p->weights[i][j][k])); /* The internal connections, if they exist */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i] ; j++ ) for( k=0; kinternal[i][j][k])); } /* Writes info on the machine's configuration, values of weights etc. in the 'out' file */ void writeoutput(p, out) struct machine *p; FILE *out; { int i,j,k; /* No. of layers */ fprintf(out, "%d\n",p->layers); /* No. of nodes in each layer */ for( i=0; ilayers; i++) fprintf(out, "%d ",p->num_nodes[i]); fprintf(out, "\n"); /* The existence matrix */ for( i=0; ilayers; i++) fprintf(out, "%d ", p->internal_exist[i]); fprintf(out, "\n"); /* The weights */ for( i=0; ilayers-1; i++) for( j=0; jnum_nodes[i] ; j++ ) for( k=0; knum_nodes[i+1]; k++ ) fprintf(out, "%lf\n",p->weights[i][j][k]); /* The internal connections, if they exist */ for( i=0; ilayers; i++) if( p->internal_exist[i] ) for( j=0; jnum_nodes[i] ; j++ ) for( k=0; kinternal[i][j][k]); } /* The boltzmann machine algorithm. Reads in the input file, (and interprets it) trains the boltzmann machine and outputs the results. This part does all the allocations and deallocations of memory connected to the storage of the weights needed for the training of the network */ void boltzmann(infile, datafile, outfile,weightfile) char *infile, *datafile, *outfile, *weightfile; { FILE *in, *out, *data, *weights; struct machine *p; long t1,t2; t1 = clock(); in = out = data = weights = NULL; openfiles(infile, outfile, &in, &out); openfiles(datafile, NULL, &data, NULL); if( weightfile ) openfiles(weightfile, NULL, &weights, NULL); if( (p = readinput(in, &out, data)) == NULL) { printf("Not enough memory\n"); _exit(0); } initrandom(); if( weightfile ) readweights(p,weights); else initweights(p); trainboltzmann(p); writeoutput(p,out); deallocate_machine(p); t2 = clock(); printf("Elapsed time %ld microseconds\n",t2-t1); }