learnmodel.cpp

Go to the documentation of this file.
00001 
00005 #include <assert.h>
00006 #include <cmath>
00007 #include <sstream>
00008 #include <stdio.h>
00009 #include "learnmodel.h"
00010 
00011 namespace lemga {
00012 
00014 static DataSet*
00015 load_data (DataSet* pd, std::istream& is, UINT n, UINT in, UINT out) {
00016     for (UINT i = 0; i < n; ++i) {
00017         Input x(in);
00018         Output y(out);
00019         for (UINT j = 0; j < in; ++j)
00020             if (!(is >> x[j])) return pd;
00021         for (UINT j = 0; j < out; ++j)
00022             if (!(is >> y[j])) return pd;
00023 
00024         pd->append(x, y);
00025     }
00026     return pd;
00027 }
00028 
00037 DataSet* load_data (std::istream& is, UINT n, UINT in, UINT out) {
00038     DataSet* pd = new DataSet();
00039     return load_data(pd, is, n, in, out);
00040 }
00041 
00046 DataSet* load_data (std::istream& is, UINT n) {
00047     assert(n > 0);
00048     /* read the first line and infer the input dimension */
00049     Input x;
00050     do {
00051         char line[1024*10];
00052         is.getline(line, 1024*10);
00053         std::istringstream iss(line);
00054         REAL xi;
00055         while (iss >> xi)
00056             x.push_back(xi);
00057     } while (x.empty() && !is.eof());
00058     if (x.empty()) return 0;
00059 
00060     Output y(1, x.back());
00061     x.pop_back();
00062 
00063     DataSet* pd = new DataSet();
00064     pd->append(x, y);
00065     return load_data(pd, is, n-1, x.size(), 1);
00066 }
00067 
00070 LearnModel::LearnModel (UINT n_in, UINT n_out)
00071     : Object(), _n_in(n_in), _n_out(n_out), n_samples(0), logf(NULL)
00072 { /* empty */ }
00073 
00074 bool LearnModel::serialize (std::ostream& os,
00075                             ver_list& vl) const {
00076     SERIALIZE_PARENT(Object, os, vl, 1);
00077     return (os << _n_in << ' ' << _n_out << '\n');
00078 }
00079 
00080 bool LearnModel::unserialize (std::istream& is, ver_list& vl,
00081                               const id_t& _id) {
00082     assert(_id == empty_id);
00083     UNSERIALIZE_PARENT(Object, is, vl, 1, v);
00084     if (v == 0) return true;
00085     return (is >> _n_in >> _n_out);
00086 }
00087 
00089 LearnModel::LearnModel (const LearnModel &lm)
00090     : Object(lm), _n_in(lm._n_in), _n_out(lm._n_out),
00091       ptd(lm.ptd), ptw(lm.ptw), n_samples(lm.n_samples), logf(lm.logf) /* is this reasonable? */
00092 { /* empty */ }
00093 
00099 REAL LearnModel::r_error (const Output& out, const Output& y) const {
00100     assert(out.size() == n_output());
00101     assert(y.size() == n_output());
00102 
00103     REAL err = 0;
00104     for (UINT i = 0; i < _n_out; ++i) {
00105         REAL dif = out[i] - y[i];
00106         err += dif * dif;
00107     }
00108     return err / 2;
00109 }
00110 
00117 REAL LearnModel::c_error (const Output& out, const Output& y) const {
00118     assert(n_output() == 1);
00119     assert(std::fabs(std::fabs(y[0]) - 1) < INFINITESIMAL);
00120     return (out[0]*y[0] <= 0);
00121 }
00122 
00123 REAL LearnModel::train_r_error () const {
00124     assert(ptw != 0);
00125     REAL err = 0;
00126     for (UINT i = 0; i < n_samples; ++i)
00127         err += (*ptw)[i] * r_error(get_output(i), ptd->y(i));
00128     return err;
00129 }
00130 
00131 REAL LearnModel::train_c_error () const {
00132     assert(ptw != 0);
00133     REAL err = 0;
00134     for (UINT i = 0; i < n_samples; ++i)
00135         err += (*ptw)[i] * c_error(get_output(i), ptd->y(i));
00136     return err;
00137 }
00138 
00139 REAL LearnModel::test_r_error (const pDataSet& pd) const {
00140     UINT n = pd->size();
00141     REAL err = 0;
00142     for (UINT i = 0; i < n; ++i)
00143         err += r_error((*this)(pd->x(i)), pd->y(i));
00144     return err / n;
00145 }
00146 
00147 REAL LearnModel::test_c_error (const pDataSet& pd) const {
00148     UINT n = pd->size();
00149     REAL err = 0;
00150     for (UINT i = 0; i < n; ++i)
00151         err += c_error((*this)(pd->x(i)), pd->y(i));
00152     return err / n;
00153 }
00154 
00170 void LearnModel::set_train_data (const pDataSet& pd, const pDataWgt& pw) {
00171     n_samples = pd->size();
00172     assert(n_samples > 0);
00173     if (support_weighted_data()) {
00174         ptd = pd;
00175         ptw = (pw != 0)? pw : new DataWgt(n_samples, 1.0 / n_samples);
00176     }
00177     else {
00178         ptd = (!pw)? pd : pd->random_sample(*pw, n_samples);
00179         ptw = 0;
00180     }
00181     assert(!ptw || n_samples == ptw->size());
00182     assert(support_weighted_data() == (ptw != NULL));
00183 #ifndef NDEBUG
00184     // assert: ptw is a probability vector
00185     if (ptw != 0) {
00186         REAL wsum = 0;
00187         for (UINT i = 0; i < n_samples; i++) {
00188             assert((*ptw)[i] >= 0);
00189             wsum += (*ptw)[i];
00190         }
00191         assert(wsum-1 > -EPSILON && wsum-1 < EPSILON);
00192     }
00193 #endif
00194     UINT din = pd->x(0).size(), dout = pd->y(0).size();
00195     if (_n_in == 0)
00196         _n_in = din;
00197     else if (_n_in != din) {
00198         std::cerr << id() << "::set_train_data: Error: "
00199             "Wrong input dimension.\n";
00200         std::exit(-1);
00201     }
00202     if (_n_out == 0)
00203         _n_out = dout;
00204     else if (_n_out != dout) {
00205         std::cerr << id() << "::set_train_data: Error: "
00206             "Wrong output dimension.\n";
00207         std::exit(-1);
00208     }
00209 }
00210 
00211 REAL LearnModel::margin_of (const Input&, const Output&) const {
00212     OBJ_FUNC_UNDEFINED("margin_of");
00213 }
00214 
00215 REAL LearnModel::min_margin () const {
00216     REAL min_m = INFINITY;
00217     for (UINT i = 0; i < n_samples; ++i) {
00218         // assume all examples count (in computing the minimum)
00219         assert((*ptw)[i] > INFINITESIMAL);
00220         REAL m = margin(i);
00221         if (min_m > m) min_m = m;
00222     }
00223     return min_m;
00224 }
00225 
00226 } // namespace lemga

Generated on Mon Jan 9 23:43:24 2006 for LEMGA by  doxygen 1.4.6