00001
00005 #include <assert.h>
00006 #include <cmath>
00007 #include <sstream>
00008 #include <stdio.h>
00009 #include "learnmodel.h"
00010
00011 namespace lemga {
00012
00014 static DataSet*
00015 load_data (DataSet* pd, std::istream& is, UINT n, UINT in, UINT out) {
00016 for (UINT i = 0; i < n; ++i) {
00017 Input x(in);
00018 Output y(out);
00019 for (UINT j = 0; j < in; ++j)
00020 if (!(is >> x[j])) return pd;
00021 for (UINT j = 0; j < out; ++j)
00022 if (!(is >> y[j])) return pd;
00023
00024 pd->append(x, y);
00025 }
00026 return pd;
00027 }
00028
00037 DataSet* load_data (std::istream& is, UINT n, UINT in, UINT out) {
00038 DataSet* pd = new DataSet();
00039 return load_data(pd, is, n, in, out);
00040 }
00041
00046 DataSet* load_data (std::istream& is, UINT n) {
00047 assert(n > 0);
00048
00049 Input x;
00050 do {
00051 char line[1024*10];
00052 is.getline(line, 1024*10);
00053 std::istringstream iss(line);
00054 REAL xi;
00055 while (iss >> xi)
00056 x.push_back(xi);
00057 } while (x.empty() && !is.eof());
00058 if (x.empty()) return 0;
00059
00060 Output y(1, x.back());
00061 x.pop_back();
00062
00063 DataSet* pd = new DataSet();
00064 pd->append(x, y);
00065 return load_data(pd, is, n-1, x.size(), 1);
00066 }
00067
00070 LearnModel::LearnModel (UINT n_in, UINT n_out)
00071 : Object(), _n_in(n_in), _n_out(n_out), n_samples(0), logf(NULL)
00072 { }
00073
00074 bool LearnModel::serialize (std::ostream& os,
00075 ver_list& vl) const {
00076 SERIALIZE_PARENT(Object, os, vl, 1);
00077 return (os << _n_in << ' ' << _n_out << '\n');
00078 }
00079
00080 bool LearnModel::unserialize (std::istream& is, ver_list& vl,
00081 const id_t& _id) {
00082 assert(_id == empty_id);
00083 UNSERIALIZE_PARENT(Object, is, vl, 1, v);
00084 if (v == 0) return true;
00085 return (is >> _n_in >> _n_out);
00086 }
00087
00089 LearnModel::LearnModel (const LearnModel &lm)
00090 : Object(lm), _n_in(lm._n_in), _n_out(lm._n_out),
00091 ptd(lm.ptd), ptw(lm.ptw), n_samples(lm.n_samples), logf(lm.logf)
00092 { }
00093
00099 REAL LearnModel::r_error (const Output& out, const Output& y) const {
00100 assert(out.size() == n_output());
00101 assert(y.size() == n_output());
00102
00103 REAL err = 0;
00104 for (UINT i = 0; i < _n_out; ++i) {
00105 REAL dif = out[i] - y[i];
00106 err += dif * dif;
00107 }
00108 return err / 2;
00109 }
00110
00117 REAL LearnModel::c_error (const Output& out, const Output& y) const {
00118 assert(n_output() == 1);
00119 assert(std::fabs(std::fabs(y[0]) - 1) < INFINITESIMAL);
00120 return (out[0]*y[0] <= 0);
00121 }
00122
00123 REAL LearnModel::train_r_error () const {
00124 assert(ptw != 0);
00125 REAL err = 0;
00126 for (UINT i = 0; i < n_samples; ++i)
00127 err += (*ptw)[i] * r_error(get_output(i), ptd->y(i));
00128 return err;
00129 }
00130
00131 REAL LearnModel::train_c_error () const {
00132 assert(ptw != 0);
00133 REAL err = 0;
00134 for (UINT i = 0; i < n_samples; ++i)
00135 err += (*ptw)[i] * c_error(get_output(i), ptd->y(i));
00136 return err;
00137 }
00138
00139 REAL LearnModel::test_r_error (const pDataSet& pd) const {
00140 UINT n = pd->size();
00141 REAL err = 0;
00142 for (UINT i = 0; i < n; ++i)
00143 err += r_error((*this)(pd->x(i)), pd->y(i));
00144 return err / n;
00145 }
00146
00147 REAL LearnModel::test_c_error (const pDataSet& pd) const {
00148 UINT n = pd->size();
00149 REAL err = 0;
00150 for (UINT i = 0; i < n; ++i)
00151 err += c_error((*this)(pd->x(i)), pd->y(i));
00152 return err / n;
00153 }
00154
00170 void LearnModel::set_train_data (const pDataSet& pd, const pDataWgt& pw) {
00171 n_samples = pd->size();
00172 assert(n_samples > 0);
00173 if (support_weighted_data()) {
00174 ptd = pd;
00175 ptw = (pw != 0)? pw : new DataWgt(n_samples, 1.0 / n_samples);
00176 }
00177 else {
00178 ptd = (!pw)? pd : pd->random_sample(*pw, n_samples);
00179 ptw = 0;
00180 }
00181 assert(!ptw || n_samples == ptw->size());
00182 assert(support_weighted_data() == (ptw != NULL));
00183 #ifndef NDEBUG
00184
00185 if (ptw != 0) {
00186 REAL wsum = 0;
00187 for (UINT i = 0; i < n_samples; i++) {
00188 assert((*ptw)[i] >= 0);
00189 wsum += (*ptw)[i];
00190 }
00191 assert(wsum-1 > -EPSILON && wsum-1 < EPSILON);
00192 }
00193 #endif
00194 UINT din = pd->x(0).size(), dout = pd->y(0).size();
00195 if (_n_in == 0)
00196 _n_in = din;
00197 else if (_n_in != din) {
00198 std::cerr << id() << "::set_train_data: Error: "
00199 "Wrong input dimension.\n";
00200 std::exit(-1);
00201 }
00202 if (_n_out == 0)
00203 _n_out = dout;
00204 else if (_n_out != dout) {
00205 std::cerr << id() << "::set_train_data: Error: "
00206 "Wrong output dimension.\n";
00207 std::exit(-1);
00208 }
00209 }
00210
00211 REAL LearnModel::margin_of (const Input&, const Output&) const {
00212 OBJ_FUNC_UNDEFINED("margin_of");
00213 }
00214
00215 REAL LearnModel::min_margin () const {
00216 REAL min_m = INFINITY;
00217 for (UINT i = 0; i < n_samples; ++i) {
00218
00219 assert((*ptw)[i] > INFINITESIMAL);
00220 REAL m = margin(i);
00221 if (min_m > m) min_m = m;
00222 }
00223 return min_m;
00224 }
00225
00226 }