mirror of
https://github.com/lxsang/antd-lua-plugin
synced 2025-01-01 03:58:22 +01:00
1113 lines
30 KiB
C++
1113 lines
30 KiB
C++
|
/*
|
||
|
* parallel_FANN.cpp
|
||
|
* Author: Alessandro Pietro Bardelli
|
||
|
*/
|
||
|
#ifndef DISABLE_PARALLEL_FANN
|
||
|
#include "parallel_fann.hpp"
|
||
|
#include <omp.h>
|
||
|
using namespace std;
|
||
|
namespace parallel_fann {
|
||
|
// TODO rewrite all these functions in c++ using fann_cpp interface
|
||
|
|
||
|
float train_epoch_batch_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
|
||
|
{
|
||
|
fann_reset_MSE(ann);
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
fann_run(ann_vect[j], data->input[i]);
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//parallel update of the weights
|
||
|
{
|
||
|
const unsigned int num_data=data->num_data;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
fann_type *weights = ann->weights;
|
||
|
const fann_type epsilon = ann->learning_rate / num_data;
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
weights[i] += temp_slopes*epsilon;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_irpropm_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
|
||
|
{
|
||
|
|
||
|
if(ann->prev_train_slopes == NULL)
|
||
|
{
|
||
|
fann_clear_train_arrays(ann);
|
||
|
}
|
||
|
|
||
|
//#define THREADNUM 1
|
||
|
fann_reset_MSE(ann);
|
||
|
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
fann_run(ann_vect[j], data->input[i]);
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
fann_type *weights = ann->weights;
|
||
|
fann_type *prev_steps = ann->prev_steps;
|
||
|
fann_type *prev_train_slopes = ann->prev_train_slopes;
|
||
|
|
||
|
fann_type next_step;
|
||
|
|
||
|
const float increase_factor = ann->rprop_increase_factor; //1.2;
|
||
|
const float decrease_factor = ann->rprop_decrease_factor; //0.5;
|
||
|
const float delta_min = ann->rprop_delta_min; //0.0;
|
||
|
const float delta_max = ann->rprop_delta_max; //50.0;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(next_step)
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
|
||
|
const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.0001); // prev_step may not be zero because then the training will stop
|
||
|
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
|
||
|
const fann_type prev_slope = prev_train_slopes[i];
|
||
|
|
||
|
const fann_type same_sign = prev_slope * temp_slopes;
|
||
|
|
||
|
if(same_sign >= 0.0)
|
||
|
next_step = fann_min(prev_step * increase_factor, delta_max);
|
||
|
else
|
||
|
{
|
||
|
next_step = fann_max(prev_step * decrease_factor, delta_min);
|
||
|
temp_slopes = 0;
|
||
|
}
|
||
|
|
||
|
if(temp_slopes < 0)
|
||
|
{
|
||
|
weights[i] -= next_step;
|
||
|
if(weights[i] < -1500)
|
||
|
weights[i] = -1500;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
weights[i] += next_step;
|
||
|
if(weights[i] > 1500)
|
||
|
weights[i] = 1500;
|
||
|
}
|
||
|
|
||
|
// update global data arrays
|
||
|
prev_steps[i] = next_step;
|
||
|
prev_train_slopes[i] = temp_slopes;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_quickprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
|
||
|
{
|
||
|
|
||
|
if(ann->prev_train_slopes == NULL)
|
||
|
{
|
||
|
fann_clear_train_arrays(ann);
|
||
|
}
|
||
|
|
||
|
//#define THREADNUM 1
|
||
|
fann_reset_MSE(ann);
|
||
|
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
fann_run(ann_vect[j], data->input[i]);
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
fann_type *weights = ann->weights;
|
||
|
fann_type *prev_steps = ann->prev_steps;
|
||
|
fann_type *prev_train_slopes = ann->prev_train_slopes;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
|
||
|
fann_type w=0.0, next_step;
|
||
|
|
||
|
const float epsilon = ann->learning_rate / data->num_data;
|
||
|
const float decay = ann->quickprop_decay; /*-0.0001;*/
|
||
|
const float mu = ann->quickprop_mu; /*1.75; */
|
||
|
const float shrink_factor = (float) (mu / (1.0 + mu));
|
||
|
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(w, next_step)
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
|
||
|
w = weights[i];
|
||
|
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
temp_slopes+= decay * w;
|
||
|
|
||
|
const fann_type prev_step = prev_steps[i];
|
||
|
const fann_type prev_slope = prev_train_slopes[i];
|
||
|
|
||
|
next_step = 0.0;
|
||
|
|
||
|
|
||
|
/* The step must always be in direction opposite to the slope. */
|
||
|
if(prev_step > 0.001)
|
||
|
{
|
||
|
/* If last step was positive... */
|
||
|
if(temp_slopes > 0.0) /* Add in linear term if current slope is still positive. */
|
||
|
next_step += epsilon * temp_slopes;
|
||
|
|
||
|
/*If current slope is close to or larger than prev slope... */
|
||
|
if(temp_slopes > (shrink_factor * prev_slope))
|
||
|
next_step += mu * prev_step; /* Take maximum size negative step. */
|
||
|
else
|
||
|
next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */
|
||
|
}
|
||
|
else if(prev_step < -0.001)
|
||
|
{
|
||
|
/* If last step was negative... */
|
||
|
if(temp_slopes < 0.0) /* Add in linear term if current slope is still negative. */
|
||
|
next_step += epsilon * temp_slopes;
|
||
|
|
||
|
/* If current slope is close to or more neg than prev slope... */
|
||
|
if(temp_slopes < (shrink_factor * prev_slope))
|
||
|
next_step += mu * prev_step; /* Take maximum size negative step. */
|
||
|
else
|
||
|
next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */
|
||
|
}
|
||
|
else /* Last step was zero, so use only linear term. */
|
||
|
next_step += epsilon * temp_slopes;
|
||
|
|
||
|
/* update global data arrays */
|
||
|
prev_steps[i] = next_step;
|
||
|
prev_train_slopes[i] = temp_slopes;
|
||
|
|
||
|
w += next_step;
|
||
|
|
||
|
if(w > 1500)
|
||
|
weights[i] = 1500;
|
||
|
else if(w < -1500)
|
||
|
weights[i] = -1500;
|
||
|
else
|
||
|
weights[i] = w;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_sarprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
|
||
|
{
|
||
|
if(ann->prev_train_slopes == NULL)
|
||
|
{
|
||
|
fann_clear_train_arrays(ann);
|
||
|
}
|
||
|
|
||
|
//#define THREADNUM 1
|
||
|
fann_reset_MSE(ann);
|
||
|
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
fann_run(ann_vect[j], data->input[i]);
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
fann_type *weights = ann->weights;
|
||
|
fann_type *prev_steps = ann->prev_steps;
|
||
|
fann_type *prev_train_slopes = ann->prev_train_slopes;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
const unsigned int epoch=ann->sarprop_epoch;
|
||
|
|
||
|
fann_type next_step;
|
||
|
|
||
|
/* These should be set from variables */
|
||
|
const float increase_factor = ann->rprop_increase_factor; /*1.2; */
|
||
|
const float decrease_factor = ann->rprop_decrease_factor; /*0.5; */
|
||
|
/* TODO: why is delta_min 0.0 in iRprop? SARPROP uses 1x10^-6 (Braun and Riedmiller, 1993) */
|
||
|
const float delta_min = 0.000001f;
|
||
|
const float delta_max = ann->rprop_delta_max; /*50.0; */
|
||
|
const float weight_decay_shift = ann->sarprop_weight_decay_shift; /* ld 0.01 = -6.644 */
|
||
|
const float step_error_threshold_factor = ann->sarprop_step_error_threshold_factor; /* 0.1 */
|
||
|
const float step_error_shift = ann->sarprop_step_error_shift; /* ld 3 = 1.585 */
|
||
|
const float T = ann->sarprop_temperature;
|
||
|
|
||
|
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
}
|
||
|
|
||
|
const float MSE = fann_get_MSE(ann);
|
||
|
const float RMSE = sqrtf(MSE);
|
||
|
|
||
|
/* for all weights; TODO: are biases included? */
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(next_step)
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
/* TODO: confirm whether 1x10^-6 == delta_min is really better */
|
||
|
const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.000001); /* prev_step may not be zero because then the training will stop */
|
||
|
|
||
|
/* calculate SARPROP slope; TODO: better as new error function? (see SARPROP paper)*/
|
||
|
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
temp_slopes= -temp_slopes - weights[i] * (fann_type)fann_exp2(-T * epoch + weight_decay_shift);
|
||
|
|
||
|
next_step=0.0;
|
||
|
|
||
|
/* TODO: is prev_train_slopes[i] 0.0 in the beginning? */
|
||
|
const fann_type prev_slope = prev_train_slopes[i];
|
||
|
|
||
|
const fann_type same_sign = prev_slope * temp_slopes;
|
||
|
|
||
|
if(same_sign > 0.0)
|
||
|
{
|
||
|
next_step = fann_min(prev_step * increase_factor, delta_max);
|
||
|
/* TODO: are the signs inverted? see differences between SARPROP paper and iRprop */
|
||
|
if (temp_slopes < 0.0)
|
||
|
weights[i] += next_step;
|
||
|
else
|
||
|
weights[i] -= next_step;
|
||
|
}
|
||
|
else if(same_sign < 0.0)
|
||
|
{
|
||
|
#ifndef RAND_MAX
|
||
|
#define RAND_MAX 0x7fffffff
|
||
|
#endif
|
||
|
if(prev_step < step_error_threshold_factor * MSE)
|
||
|
next_step = prev_step * decrease_factor + (float)rand() / RAND_MAX * RMSE * (fann_type)fann_exp2(-T * epoch + step_error_shift);
|
||
|
else
|
||
|
next_step = fann_max(prev_step * decrease_factor, delta_min);
|
||
|
|
||
|
temp_slopes = 0.0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if(temp_slopes < 0.0)
|
||
|
weights[i] += prev_step;
|
||
|
else
|
||
|
weights[i] -= prev_step;
|
||
|
}
|
||
|
|
||
|
/* update global data arrays */
|
||
|
prev_steps[i] = next_step;
|
||
|
prev_train_slopes[i] = temp_slopes;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
++(ann->sarprop_epoch);
|
||
|
|
||
|
//already computed before
|
||
|
/*//merge of MSEs
|
||
|
for(i=0;i<threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
}*/
|
||
|
//destroy the copies of the ann
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
float train_epoch_incremental_mod(struct fann *ann, struct fann_train_data *data)
|
||
|
{
|
||
|
unsigned int i;
|
||
|
|
||
|
fann_reset_MSE(ann);
|
||
|
|
||
|
for(i = 0; i != data->num_data; i++)
|
||
|
{
|
||
|
fann_train(ann, data->input[i], data->output[i]);
|
||
|
}
|
||
|
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
//the following versions returns also the outputs via the predicted_outputs parameter
|
||
|
|
||
|
float train_epoch_batch_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb,vector< vector<fann_type> >& predicted_outputs)
|
||
|
{
|
||
|
fann_reset_MSE(ann);
|
||
|
predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
|
||
|
fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
|
||
|
for(unsigned int k=0;k<data->num_output;++k)
|
||
|
{
|
||
|
predicted_outputs[i][k]=temp_predicted_output[k];
|
||
|
}
|
||
|
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//parallel update of the weights
|
||
|
{
|
||
|
const unsigned int num_data=data->num_data;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
fann_type *weights = ann->weights;
|
||
|
const fann_type epsilon = ann->learning_rate / num_data;
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
weights[i] += temp_slopes*epsilon;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_irpropm_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs)
|
||
|
{
|
||
|
|
||
|
if(ann->prev_train_slopes == NULL)
|
||
|
{
|
||
|
fann_clear_train_arrays(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
fann_reset_MSE(ann);
|
||
|
predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
|
||
|
fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
|
||
|
for(unsigned int k=0;k<data->num_output;++k)
|
||
|
{
|
||
|
predicted_outputs[i][k]=temp_predicted_output[k];
|
||
|
}
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
fann_type *weights = ann->weights;
|
||
|
fann_type *prev_steps = ann->prev_steps;
|
||
|
fann_type *prev_train_slopes = ann->prev_train_slopes;
|
||
|
|
||
|
fann_type next_step;
|
||
|
|
||
|
const float increase_factor = ann->rprop_increase_factor; //1.2;
|
||
|
const float decrease_factor = ann->rprop_decrease_factor; //0.5;
|
||
|
const float delta_min = ann->rprop_delta_min; //0.0;
|
||
|
const float delta_max = ann->rprop_delta_max; //50.0;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(next_step)
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
|
||
|
const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.0001); // prev_step may not be zero because then the training will stop
|
||
|
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
|
||
|
const fann_type prev_slope = prev_train_slopes[i];
|
||
|
|
||
|
const fann_type same_sign = prev_slope * temp_slopes;
|
||
|
|
||
|
if(same_sign >= 0.0)
|
||
|
next_step = fann_min(prev_step * increase_factor, delta_max);
|
||
|
else
|
||
|
{
|
||
|
next_step = fann_max(prev_step * decrease_factor, delta_min);
|
||
|
temp_slopes = 0;
|
||
|
}
|
||
|
|
||
|
if(temp_slopes < 0)
|
||
|
{
|
||
|
weights[i] -= next_step;
|
||
|
if(weights[i] < -1500)
|
||
|
weights[i] = -1500;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
weights[i] += next_step;
|
||
|
if(weights[i] > 1500)
|
||
|
weights[i] = 1500;
|
||
|
}
|
||
|
|
||
|
// update global data arrays
|
||
|
prev_steps[i] = next_step;
|
||
|
prev_train_slopes[i] = temp_slopes;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_quickprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs)
|
||
|
{
|
||
|
|
||
|
if(ann->prev_train_slopes == NULL)
|
||
|
{
|
||
|
fann_clear_train_arrays(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
fann_reset_MSE(ann);
|
||
|
predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
|
||
|
fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
|
||
|
for(unsigned int k=0;k<data->num_output;++k)
|
||
|
{
|
||
|
predicted_outputs[i][k]=temp_predicted_output[k];
|
||
|
}
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
fann_type *weights = ann->weights;
|
||
|
fann_type *prev_steps = ann->prev_steps;
|
||
|
fann_type *prev_train_slopes = ann->prev_train_slopes;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
|
||
|
fann_type w=0.0, next_step;
|
||
|
|
||
|
const float epsilon = ann->learning_rate / data->num_data;
|
||
|
const float decay = ann->quickprop_decay; /*-0.0001;*/
|
||
|
const float mu = ann->quickprop_mu; /*1.75; */
|
||
|
const float shrink_factor = (float) (mu / (1.0 + mu));
|
||
|
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(w, next_step)
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
|
||
|
w = weights[i];
|
||
|
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
temp_slopes+= decay * w;
|
||
|
|
||
|
const fann_type prev_step = prev_steps[i];
|
||
|
const fann_type prev_slope = prev_train_slopes[i];
|
||
|
|
||
|
next_step = 0.0;
|
||
|
|
||
|
|
||
|
/* The step must always be in direction opposite to the slope. */
|
||
|
if(prev_step > 0.001)
|
||
|
{
|
||
|
/* If last step was positive... */
|
||
|
if(temp_slopes > 0.0) /* Add in linear term if current slope is still positive. */
|
||
|
next_step += epsilon * temp_slopes;
|
||
|
|
||
|
/*If current slope is close to or larger than prev slope... */
|
||
|
if(temp_slopes > (shrink_factor * prev_slope))
|
||
|
next_step += mu * prev_step; /* Take maximum size negative step. */
|
||
|
else
|
||
|
next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */
|
||
|
}
|
||
|
else if(prev_step < -0.001)
|
||
|
{
|
||
|
/* If last step was negative... */
|
||
|
if(temp_slopes < 0.0) /* Add in linear term if current slope is still negative. */
|
||
|
next_step += epsilon * temp_slopes;
|
||
|
|
||
|
/* If current slope is close to or more neg than prev slope... */
|
||
|
if(temp_slopes < (shrink_factor * prev_slope))
|
||
|
next_step += mu * prev_step; /* Take maximum size negative step. */
|
||
|
else
|
||
|
next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */
|
||
|
}
|
||
|
else /* Last step was zero, so use only linear term. */
|
||
|
next_step += epsilon * temp_slopes;
|
||
|
|
||
|
/* update global data arrays */
|
||
|
prev_steps[i] = next_step;
|
||
|
prev_train_slopes[i] = temp_slopes;
|
||
|
|
||
|
w += next_step;
|
||
|
|
||
|
if(w > 1500)
|
||
|
weights[i] = 1500;
|
||
|
else if(w < -1500)
|
||
|
weights[i] = -1500;
|
||
|
else
|
||
|
weights[i] = w;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_sarprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs)
|
||
|
{
|
||
|
|
||
|
if(ann->prev_train_slopes == NULL)
|
||
|
{
|
||
|
fann_clear_train_arrays(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
fann_reset_MSE(ann);
|
||
|
predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; i++)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
|
||
|
fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
|
||
|
for(unsigned int k=0;k<data->num_output;++k)
|
||
|
{
|
||
|
predicted_outputs[i][k]=temp_predicted_output[k];
|
||
|
}
|
||
|
fann_compute_MSE(ann_vect[j], data->output[i]);
|
||
|
fann_backpropagate_MSE(ann_vect[j]);
|
||
|
fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
fann_type *weights = ann->weights;
|
||
|
fann_type *prev_steps = ann->prev_steps;
|
||
|
fann_type *prev_train_slopes = ann->prev_train_slopes;
|
||
|
const unsigned int first_weight=0;
|
||
|
const unsigned int past_end=ann->total_connections;
|
||
|
const unsigned int epoch=ann->sarprop_epoch;
|
||
|
|
||
|
fann_type next_step;
|
||
|
|
||
|
/* These should be set from variables */
|
||
|
const float increase_factor = ann->rprop_increase_factor; /*1.2; */
|
||
|
const float decrease_factor = ann->rprop_decrease_factor; /*0.5; */
|
||
|
/* TODO: why is delta_min 0.0 in iRprop? SARPROP uses 1x10^-6 (Braun and Riedmiller, 1993) */
|
||
|
const float delta_min = 0.000001f;
|
||
|
const float delta_max = ann->rprop_delta_max; /*50.0; */
|
||
|
const float weight_decay_shift = ann->sarprop_weight_decay_shift; /* ld 0.01 = -6.644 */
|
||
|
const float step_error_threshold_factor = ann->sarprop_step_error_threshold_factor; /* 0.1 */
|
||
|
const float step_error_shift = ann->sarprop_step_error_shift; /* ld 3 = 1.585 */
|
||
|
const float T = ann->sarprop_temperature;
|
||
|
|
||
|
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
}
|
||
|
|
||
|
const float MSE = fann_get_MSE(ann);
|
||
|
const float RMSE = (float)sqrt(MSE);
|
||
|
|
||
|
/* for all weights; TODO: are biases included? */
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(next_step)
|
||
|
{
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=first_weight; i < (int)past_end; i++)
|
||
|
{
|
||
|
/* TODO: confirm whether 1x10^-6 == delta_min is really better */
|
||
|
const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.000001); /* prev_step may not be zero because then the training will stop */
|
||
|
|
||
|
/* calculate SARPROP slope; TODO: better as new error function? (see SARPROP paper)*/
|
||
|
|
||
|
fann_type temp_slopes=0.0;
|
||
|
unsigned int k;
|
||
|
fann_type *train_slopes;
|
||
|
for(k=0;k<threadnumb;++k)
|
||
|
{
|
||
|
train_slopes=ann_vect[k]->train_slopes;
|
||
|
temp_slopes+= train_slopes[i];
|
||
|
train_slopes[i]=0.0;
|
||
|
}
|
||
|
temp_slopes= -temp_slopes - weights[i] * (fann_type)fann_exp2(-T * epoch + weight_decay_shift);
|
||
|
|
||
|
next_step=0.0;
|
||
|
|
||
|
/* TODO: is prev_train_slopes[i] 0.0 in the beginning? */
|
||
|
const fann_type prev_slope = prev_train_slopes[i];
|
||
|
|
||
|
const fann_type same_sign = prev_slope * temp_slopes;
|
||
|
|
||
|
if(same_sign > 0.0)
|
||
|
{
|
||
|
next_step = fann_min(prev_step * increase_factor, delta_max);
|
||
|
/* TODO: are the signs inverted? see differences between SARPROP paper and iRprop */
|
||
|
if (temp_slopes < 0.0)
|
||
|
weights[i] += next_step;
|
||
|
else
|
||
|
weights[i] -= next_step;
|
||
|
}
|
||
|
else if(same_sign < 0.0)
|
||
|
{
|
||
|
#ifndef RAND_MAX
|
||
|
#define RAND_MAX 0x7fffffff
|
||
|
#endif
|
||
|
if(prev_step < step_error_threshold_factor * MSE)
|
||
|
next_step = prev_step * decrease_factor + (float)rand() / RAND_MAX * RMSE * (fann_type)fann_exp2(-T * epoch + step_error_shift);
|
||
|
else
|
||
|
next_step = fann_max(prev_step * decrease_factor, delta_min);
|
||
|
|
||
|
temp_slopes = 0.0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if(temp_slopes < 0.0)
|
||
|
weights[i] += prev_step;
|
||
|
else
|
||
|
weights[i] -= prev_step;
|
||
|
}
|
||
|
|
||
|
/* update global data arrays */
|
||
|
prev_steps[i] = next_step;
|
||
|
prev_train_slopes[i] = temp_slopes;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
++(ann->sarprop_epoch);
|
||
|
|
||
|
//already computed before
|
||
|
/*//merge of MSEs
|
||
|
for(i=0;i<threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
}*/
|
||
|
//destroy the copies of the ann
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
|
||
|
float train_epoch_incremental_mod(struct fann *ann, struct fann_train_data *data, vector< vector<fann_type> >& predicted_outputs)
|
||
|
{
|
||
|
|
||
|
predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
|
||
|
fann_reset_MSE(ann);
|
||
|
|
||
|
for(unsigned int i = 0; i < data->num_data; ++i)
|
||
|
{
|
||
|
fann_type* temp_predicted_output=fann_run(ann, data->input[i]);
|
||
|
for(unsigned int k=0;k<data->num_output;++k)
|
||
|
{
|
||
|
predicted_outputs[i][k]=temp_predicted_output[k];
|
||
|
}
|
||
|
|
||
|
fann_compute_MSE(ann, data->output[i]);
|
||
|
|
||
|
fann_backpropagate_MSE(ann);
|
||
|
|
||
|
fann_update_weights(ann);
|
||
|
}
|
||
|
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
float test_data_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
|
||
|
{
|
||
|
if(fann_check_input_output_sizes(ann, data) == -1)
|
||
|
return 0;
|
||
|
|
||
|
fann_reset_MSE(ann);
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; ++i)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
fann_test(ann_vect[j], data->input[i],data->output[i]);
|
||
|
}
|
||
|
}
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
|
||
|
float test_data_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs)
|
||
|
{
|
||
|
if(fann_check_input_output_sizes(ann, data) == -1)
|
||
|
return 0;
|
||
|
predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
|
||
|
fann_reset_MSE(ann);
|
||
|
vector<struct fann *> ann_vect(threadnumb);
|
||
|
int i=0,j=0;
|
||
|
|
||
|
//generate copies of the ann
|
||
|
omp_set_dynamic(0);
|
||
|
omp_set_num_threads(threadnumb);
|
||
|
#pragma omp parallel private(j)
|
||
|
{
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i=0; i<(int)threadnumb; i++)
|
||
|
{
|
||
|
ann_vect[i]=fann_copy(ann);
|
||
|
}
|
||
|
|
||
|
//parallel computing of the updates
|
||
|
|
||
|
#pragma omp for schedule(static)
|
||
|
for(i = 0; i < (int)data->num_data; ++i)
|
||
|
{
|
||
|
j=omp_get_thread_num();
|
||
|
|
||
|
fann_type* temp_predicted_output=fann_test(ann_vect[j], data->input[i],data->output[i]);
|
||
|
for(unsigned int k=0;k<data->num_output;++k)
|
||
|
{
|
||
|
predicted_outputs[i][k]=temp_predicted_output[k];
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
//merge of MSEs
|
||
|
for(i=0;i<(int)threadnumb;++i)
|
||
|
{
|
||
|
ann->MSE_value+= ann_vect[i]->MSE_value;
|
||
|
ann->num_MSE+=ann_vect[i]->num_MSE;
|
||
|
fann_destroy(ann_vect[i]);
|
||
|
}
|
||
|
return fann_get_MSE(ann);
|
||
|
}
|
||
|
}
|
||
|
#endif /* DISABLE_PARALLEL_FANN */
|