00001
00005 #include <assert.h>
00006 #include "feedforwardnn.h"
00007 #include "optimize.h"
00008
00009 REGISTER_CREATOR(lemga::FeedForwardNN);
00010
00011 namespace lemga {
00012
00013 void FeedForwardNN::free_space () {
00014 for (UINT i = 1; i <= n_layer; ++i) {
00015 assert(layer[i] != NULL);
00016 delete layer[i];
00017 }
00018 layer.resize(1);
00019 n_layer = 0;
00020 }
00021
00022 FeedForwardNN::FeedForwardNN ()
00023 : LearnModel(0,0), n_layer(0),
00024 online_learn(false), train_method(GRADIENT_DESCENT),
00025 learn_rate(0.01), min_cst(0), max_run(500)
00026 {
00027 layer.push_back(NULL);
00029 }
00030
00031 FeedForwardNN::FeedForwardNN (const FeedForwardNN& nn)
00032 : LearnModel(nn), n_layer(nn.n_layer), _y(nn._y), _dy(nn._dy),
00033 online_learn(nn.online_learn), train_method(nn.train_method),
00034 learn_rate(nn.learn_rate), min_cst(nn.min_cst), max_run(nn.max_run)
00035 {
00036 assert(n_layer+1 == nn.layer.size());
00037 layer.push_back(NULL);
00038 for (UINT i = 1; i <= n_layer; ++i)
00039 layer.push_back(nn.layer[i]->clone());
00040 }
00041
00042 FeedForwardNN::~FeedForwardNN () {
00043 free_space();
00044 }
00045
00046 const FeedForwardNN& FeedForwardNN::operator= (const FeedForwardNN& nn) {
00047 if (&nn == this) return *this;
00048 LearnModel::operator=(nn);
00049
00050 free_space();
00051 n_layer = nn.n_layer;
00052 _y = nn._y;
00053 _dy = nn._dy;
00054 online_learn = nn.online_learn;
00055 train_method = nn.train_method;
00056 learn_rate = nn.learn_rate;
00057 min_cst = nn.min_cst;
00058 max_run = nn.max_run;
00059
00060 assert(n_layer+1 == nn.layer.size());
00061 for (UINT i = 1; i <= n_layer; ++i)
00062 layer.push_back(nn.layer[i]->clone());
00063
00064 return *this;
00065 }
00066
00067 bool FeedForwardNN::serialize (std::ostream& os, ver_list& vl) const {
00068 SERIALIZE_PARENT(LearnModel, os, vl, 1);
00069
00070 if (!(os << n_layer << '\n')) return false;
00071 if (!(os << online_learn << ' ' << learn_rate << ' '
00072 << min_cst << ' ' << max_run << '\n')) return false;
00073
00074 for (UINT i = 1; i <= n_layer; ++i)
00075 if (!(os << *layer[i])) return false;
00076 return true;
00077 }
00078
00079 bool
00080 FeedForwardNN::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00081 if (d != id() && d != NIL_ID) return false;
00082 UNSERIALIZE_PARENT(LearnModel, is, vl, 1, v);
00083
00084 UINT tmp_layer;
00085 if (!(is >> tmp_layer) || tmp_layer == 0) return false;
00086
00087 std::vector<UINT> lsize;
00088 if (v == 0) {
00089 lsize.resize(tmp_layer+1);
00090 for (UINT i = 0; i <= tmp_layer; ++i)
00091 if (!(is >> lsize[i]) || lsize[i] == 0) return false;
00092 }
00093
00094 int online;
00095 if (!(is >> online >> learn_rate >> min_cst >> max_run))
00096 return false;
00097 if (online > 1 || learn_rate <= 0 || min_cst < 0 || max_run < 1)
00098 return false;
00099 online_learn = (online != 0);
00100
00101
00102 const UINT n_in_got = _n_in, n_out_got = _n_out;
00103 free_space();
00104 _y.clear();
00105 _dy.clear();
00106
00107 for (UINT i = 0; i < tmp_layer; ++i) {
00108 NNLayer* pl = (NNLayer*) Object::create(is);
00109 if (pl == 0) return false;
00110
00111 if (v == 0) {
00112 if (pl->n_input() != lsize[i] || pl->n_output() != lsize[i+1])
00113 return false;
00114 }
00115 else {
00116 static UINT last_output;
00117 if (i > 0 && pl->n_input() != last_output) return false;
00118 last_output = pl->n_output();
00119 }
00120
00121 add_top(*pl); delete pl;
00122 }
00123 if (v > 0)
00124 if (n_in_got != _n_in || n_out_got != _n_out) return false;
00125
00126 return true;
00127 }
00128
00129 void FeedForwardNN::add_top (const NNLayer& nl) {
00130 assert(n_layer+1 == layer.size());
00131 assert(n_output() == nl.n_input() || n_layer == 0);
00132 if (n_layer == 0) {
00133 assert(_y.empty() && _dy.empty());
00134 _n_in = nl.n_input();
00135 _y.push_back(Output(_n_in));
00136 _dy.push_back(Output(_n_in));
00137 }
00138
00139 n_layer++;
00140 _n_out = nl.n_output();
00141 layer.push_back(nl.clone());
00142 _y.push_back(Output(nl.n_output()));
00143 _dy.push_back(Output(nl.n_output()));
00144 }
00145
00146 void FeedForwardNN::initialize () {
00147 for (UINT i = 1; i <= n_layer; ++i)
00148 layer[i]->initialize();
00149 }
00150
00151 void FeedForwardNN::train () {
00152 assert(n_layer > 0);
00153 assert(ptd != NULL && ptw != NULL);
00154
00155 switch (train_method) {
00156 case GRADIENT_DESCENT:
00157 iterative_optimize(_gradient_descent<FeedForwardNN,WEIGHT,REAL>
00158 (this, learn_rate));
00159 break;
00160 case LINE_SEARCH:
00161 iterative_optimize(_line_search<FeedForwardNN,WEIGHT,REAL,REAL>
00162 (this, learn_rate));
00163 break;
00164 case CONJUGATE_GRADIENT:
00165 iterative_optimize
00166 (_conjugate_gradient<FeedForwardNN,WEIGHT,REAL,REAL>
00167 (this, learn_rate));
00168 break;
00169 case WEIGHT_DECAY:
00170 iterative_optimize(_gd_weightdecay<FeedForwardNN,WEIGHT,REAL>
00171 (this, learn_rate, 0.01));
00172 break;
00173 case ADAPTIVE_LEARNING_RATE:
00174 iterative_optimize(_gd_adaptive<FeedForwardNN,WEIGHT,REAL,REAL>
00175 (this, learn_rate, 1.15, 0.5));
00176 break;
00177 default:
00178 assert(0);
00179 }
00180 }
00181
00182 void FeedForwardNN::log_cost (UINT epoch, REAL cst) {
00183 if (logf != NULL)
00184 fprintf(logf, "%lu %g %g\n", epoch, learn_rate, cst);
00185
00186 if (epoch % 20 == 1)
00187 printf("epoch %lu, cost = %g\n", epoch, cst);
00188 }
00189
00190 Output FeedForwardNN::operator() (const Input& x) const {
00191 assert(n_layer > 0);
00192 assert(x.size() == n_input());
00193
00194 forward(x);
00195 return _y[n_layer];
00196 }
00197
00198 FeedForwardNN::WEIGHT FeedForwardNN::weight () const {
00199 WEIGHT wgt;
00200 for (UINT i = 1; i <= n_layer; ++i)
00201 wgt.push_back(layer[i]->weight());
00202 return wgt;
00203 }
00204
00205 void FeedForwardNN::set_weight (const WEIGHT& wgt) {
00206 assert(wgt.size() == n_layer);
00207 for (UINT i = 1; i <= n_layer; ++i)
00208 layer[i]->set_weight(wgt[i-1]);
00209 }
00210
00211 Output FeedForwardNN::_cost_deriv (const Output& F, const Output& y) const {
00212 assert(F.size() == n_output() && y.size() == n_output());
00213
00214 Output d(_n_out);
00215 for (UINT i = 0; i < _n_out; ++i)
00216 d[i] = F[i] - y[i];
00217 return d;
00218 }
00219
00220 REAL FeedForwardNN::cost (UINT idx) const {
00221 return _cost(get_output(idx), ptd->y(idx));
00222 }
00223
00224 REAL FeedForwardNN::cost () const {
00225 assert(ptd != NULL && ptw != NULL);
00226 const UINT n = ptd->size();
00227 REAL cst = 0;
00228 for (UINT i = 0; i < n; ++i)
00229 cst += cost(i) * (*ptw)[i];
00230 return cst;
00231 }
00232
00233 FeedForwardNN::WEIGHT FeedForwardNN::gradient (UINT idx) const {
00234 assert(ptd != NULL);
00235 assert(n_layer > 0);
00236
00237 clear_gradient();
00238
00239 forward(_y[0] = ptd->x(idx));
00240 _dy[n_layer] = _cost_deriv(_y[n_layer], ptd->y(idx));
00241 for (UINT i = n_layer; i; --i)
00242 layer[i]->back_propagate(_y[i-1], _dy[i], _dy[i-1]);
00243
00244 WEIGHT grad;
00245 for (UINT i = 1; i <= n_layer; ++i)
00246 grad.push_back(layer[i]->gradient());
00247 return grad;
00248 }
00249
00250 FeedForwardNN::WEIGHT FeedForwardNN::gradient () const {
00251 assert(ptd != NULL && ptw != NULL);
00252 assert(n_layer > 0);
00253
00254 clear_gradient();
00255
00256 const UINT n = ptd->size();
00257 for (UINT idx = 0; idx < n; idx++) {
00258 forward(_y[0] = ptd->x(idx));
00259
00260 _dy[n_layer] = _cost_deriv(_y[n_layer], ptd->y(idx));
00261 assert(_dy[n_layer].size() == _n_out);
00262 const REAL w = (*ptw)[idx] * n;
00263 for (UINT j = 0; j < _n_out; ++j)
00264 _dy[n_layer][j] *= w;
00265
00266 for (UINT i = n_layer; i; --i)
00267 layer[i]->back_propagate(_y[i-1], _dy[i], _dy[i-1]);
00268 }
00269
00270 WEIGHT grad;
00271 for (UINT i = 1; i <= n_layer; ++i)
00272 grad.push_back(layer[i]->gradient());
00273 return grad;
00274 }
00275
00276 void FeedForwardNN::clear_gradient () const {
00277 for (UINT i = 1; i <= n_layer; ++i)
00278 layer[i]->clear_gradient();
00279 }
00280
00281 bool FeedForwardNN::stop_opt (UINT step, REAL cst) {
00282 log_cost(step, cst);
00283 return (step >= max_run || cst < min_cst);
00284 }
00285
00286 }