learnmodel.cpp

Go to the documentation of this file.
00001 
00005 #include <assert.h>
00006 #include <cmath>
00007 #include <sstream>
00008 #include <stdio.h>
00009 #include "learnmodel.h"
00010 
00011 namespace lemga {
00012 
00014 static DataSet*
00015 load_data (DataSet* pd, std::istream& is, UINT n, UINT in, UINT out) {
00016     for (UINT i = 0; i < n; ++i) {
00017         Input x(in);
00018         Output y(out);
00019         for (UINT j = 0; j < in; ++j)
00020             if (!(is >> x[j])) return pd;
00021         for (UINT j = 0; j < out; ++j)
00022             if (!(is >> y[j])) return pd;
00023 
00024         pd->append(x, y);
00025     }
00026     return pd;
00027 }
00028 
00037 DataSet* load_data (std::istream& is, UINT n, UINT in, UINT out) {
00038     DataSet* pd = new DataSet();
00039     return load_data(pd, is, n, in, out);
00040 }
00041 
00046 DataSet* load_data (std::istream& is, UINT n) {
00047     assert(n > 0);
00048     /* read the first line and infer the input dimension */
00049     Input x;
00050     do {
00051         char line[1024*10];
00052         is.getline(line, 1024*10);
00053         std::istringstream iss(line);
00054         REAL xi;
00055         while (iss >> xi)
00056             x.push_back(xi);
00057     } while (x.empty() && !is.eof());
00058     if (x.empty()) return 0;
00059 
00060     Output y(1, x.back());
00061     x.pop_back();
00062 
00063     DataSet* pd = new DataSet();
00064     pd->append(x, y);
00065     return load_data(pd, is, n-1, x.size(), 1);
00066 }
00067 
00070 LearnModel::LearnModel (UINT n_in, UINT n_out)
00071     : Object(), _n_in(n_in), _n_out(n_out), n_samples(0), logf(NULL)
00072 { /* empty */ }
00073 
00074 bool LearnModel::serialize (std::ostream& os,
00075                             ver_list& vl) const {
00076     SERIALIZE_PARENT(Object, os, vl, 1);
00077     return (os << _n_in << ' ' << _n_out << '\n');
00078 }
00079 
00080 bool LearnModel::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00081     assert(d == NIL_ID);
00082     UNSERIALIZE_PARENT(Object, is, vl, 1, v);
00083     _n_in = 0; _n_out = 0;
00084     ptd = 0; ptw = 0; n_samples = 0;
00085     if (v == 0) return true;
00086     return (is >> _n_in >> _n_out);
00087 }
00088 
00094 REAL LearnModel::r_error (const Output& out, const Output& y) const {
00095     assert(out.size() == n_output());
00096     assert(y.size() == n_output());
00097 
00098     REAL err = 0;
00099     for (UINT i = 0; i < _n_out; ++i) {
00100         REAL dif = out[i] - y[i];
00101         err += dif * dif;
00102     }
00103     return err / 2;
00104 }
00105 
00112 REAL LearnModel::c_error (const Output& out, const Output& y) const {
00113     assert(n_output() == 1);
00114     assert(std::fabs(std::fabs(y[0]) - 1) < INFINITESIMAL);
00115     return (out[0]*y[0] <= 0);
00116 }
00117 
00118 REAL LearnModel::train_r_error () const {
00119     assert(ptw != 0);
00120     REAL err = 0;
00121     for (UINT i = 0; i < n_samples; ++i)
00122         err += (*ptw)[i] * r_error(get_output(i), ptd->y(i));
00123     return err;
00124 }
00125 
00126 REAL LearnModel::train_c_error () const {
00127     assert(ptw != 0);
00128     REAL err = 0;
00129     for (UINT i = 0; i < n_samples; ++i)
00130         err += (*ptw)[i] * c_error(get_output(i), ptd->y(i));
00131     return err;
00132 }
00133 
00134 REAL LearnModel::test_r_error (const pDataSet& pd) const {
00135     UINT n = pd->size();
00136     REAL err = 0;
00137     for (UINT i = 0; i < n; ++i)
00138         err += r_error((*this)(pd->x(i)), pd->y(i));
00139     return err / n;
00140 }
00141 
00142 REAL LearnModel::test_c_error (const pDataSet& pd) const {
00143     UINT n = pd->size();
00144     REAL err = 0;
00145     for (UINT i = 0; i < n; ++i)
00146         err += c_error((*this)(pd->x(i)), pd->y(i));
00147     return err / n;
00148 }
00149 
00165 void LearnModel::set_train_data (const pDataSet& pd, const pDataWgt& pw) {
00166     n_samples = pd->size();
00167     assert(n_samples > 0);
00168     assert(!pw || n_samples == pw->size());
00169     if (support_weighted_data()) {
00170         ptd = pd;
00171         ptw = (pw != 0)? pw : new DataWgt(n_samples, 1.0 / n_samples);
00172     } else {
00173         ptd = (!pw)? pd : pd->random_sample(*pw, n_samples);
00174         ptw = 0;
00175     }
00176     assert(support_weighted_data() == (ptw != 0));
00177 #ifndef NDEBUG
00178     // assert: ptw is a probability vector
00179     if (ptw != 0) {
00180         REAL wsum = 0;
00181         for (UINT i = 0; i < n_samples; i++) {
00182             assert((*ptw)[i] >= 0);
00183             wsum += (*ptw)[i];
00184         }
00185         assert(wsum-1 > -EPSILON && wsum-1 < EPSILON);
00186     }
00187 #endif
00188     if (!exact_dimensions(*pd)) {
00189         std::cerr << id() << "::set_train_data: Error: "
00190             "Wrong input/output dimensions.\n";
00191         std::exit(-1);
00192     }
00193 }
00194 
00195 void LearnModel::reset () {
00196     _n_in = _n_out = 0;
00197 }
00198 
00199 REAL LearnModel::margin_of (const Input&, const Output&) const {
00200     OBJ_FUNC_UNDEFINED("margin_of");
00201 }
00202 
00203 REAL LearnModel::min_margin () const {
00204     REAL min_m = INFINITY;
00205     for (UINT i = 0; i < n_samples; ++i) {
00206         // assume all examples count (in computing the minimum)
00207         assert((*ptw)[i] > INFINITESIMAL);
00208         REAL m = margin(i);
00209         if (min_m > m) min_m = m;
00210     }
00211     return min_m;
00212 }
00213 
00214 bool LearnModel::valid_dimensions (UINT nin, UINT nout) const {
00215     return (nin == 0 || _n_in == 0 || nin == _n_in) &&
00216         (nout == 0 || _n_out == 0 || nout == _n_out);
00217 }
00218 
00219 void LearnModel::set_dimensions (UINT nin, UINT nout) {
00220     assert(valid_dimensions(nin, nout));
00221     if (nin > 0) _n_in = nin;
00222     if (nout > 0) _n_out = nout;
00223 }
00224 
00225 } // namespace lemga

Generated on Wed Nov 8 08:15:21 2006 for LEMGA by  doxygen 1.4.6