00001
00005 #include <assert.h>
00006 #include <cmath>
00007 #include <sstream>
00008 #include <stdio.h>
00009 #include "learnmodel.h"
00010
00011 namespace lemga {
00012
00014 static DataSet*
00015 load_data (DataSet* pd, std::istream& is, UINT n, UINT in, UINT out) {
00016 for (UINT i = 0; i < n; ++i) {
00017 Input x(in);
00018 Output y(out);
00019 for (UINT j = 0; j < in; ++j)
00020 if (!(is >> x[j])) return pd;
00021 for (UINT j = 0; j < out; ++j)
00022 if (!(is >> y[j])) return pd;
00023
00024 pd->append(x, y);
00025 }
00026 return pd;
00027 }
00028
00037 DataSet* load_data (std::istream& is, UINT n, UINT in, UINT out) {
00038 DataSet* pd = new DataSet();
00039 return load_data(pd, is, n, in, out);
00040 }
00041
00046 DataSet* load_data (std::istream& is, UINT n) {
00047 assert(n > 0);
00048
00049 Input x;
00050 do {
00051 char line[1024*10];
00052 is.getline(line, 1024*10);
00053 std::istringstream iss(line);
00054 REAL xi;
00055 while (iss >> xi)
00056 x.push_back(xi);
00057 } while (x.empty() && !is.eof());
00058 if (x.empty()) return 0;
00059
00060 Output y(1, x.back());
00061 x.pop_back();
00062
00063 DataSet* pd = new DataSet();
00064 pd->append(x, y);
00065 return load_data(pd, is, n-1, x.size(), 1);
00066 }
00067
00070 LearnModel::LearnModel (UINT n_in, UINT n_out)
00071 : Object(), _n_in(n_in), _n_out(n_out), n_samples(0), logf(NULL)
00072 { }
00073
00074 bool LearnModel::serialize (std::ostream& os,
00075 ver_list& vl) const {
00076 SERIALIZE_PARENT(Object, os, vl, 1);
00077 return (os << _n_in << ' ' << _n_out << '\n');
00078 }
00079
00080 bool LearnModel::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00081 assert(d == NIL_ID);
00082 UNSERIALIZE_PARENT(Object, is, vl, 1, v);
00083 _n_in = 0; _n_out = 0;
00084 ptd = 0; ptw = 0; n_samples = 0;
00085 if (v == 0) return true;
00086 return (is >> _n_in >> _n_out);
00087 }
00088
00094 REAL LearnModel::r_error (const Output& out, const Output& y) const {
00095 assert(out.size() == n_output());
00096 assert(y.size() == n_output());
00097
00098 REAL err = 0;
00099 for (UINT i = 0; i < _n_out; ++i) {
00100 REAL dif = out[i] - y[i];
00101 err += dif * dif;
00102 }
00103 return err / 2;
00104 }
00105
00112 REAL LearnModel::c_error (const Output& out, const Output& y) const {
00113 assert(n_output() == 1);
00114 assert(std::fabs(std::fabs(y[0]) - 1) < INFINITESIMAL);
00115 return (out[0]*y[0] <= 0);
00116 }
00117
00118 REAL LearnModel::train_r_error () const {
00119 assert(ptw != 0);
00120 REAL err = 0;
00121 for (UINT i = 0; i < n_samples; ++i)
00122 err += (*ptw)[i] * r_error(get_output(i), ptd->y(i));
00123 return err;
00124 }
00125
00126 REAL LearnModel::train_c_error () const {
00127 assert(ptw != 0);
00128 REAL err = 0;
00129 for (UINT i = 0; i < n_samples; ++i)
00130 err += (*ptw)[i] * c_error(get_output(i), ptd->y(i));
00131 return err;
00132 }
00133
00134 REAL LearnModel::test_r_error (const pDataSet& pd) const {
00135 UINT n = pd->size();
00136 REAL err = 0;
00137 for (UINT i = 0; i < n; ++i)
00138 err += r_error((*this)(pd->x(i)), pd->y(i));
00139 return err / n;
00140 }
00141
00142 REAL LearnModel::test_c_error (const pDataSet& pd) const {
00143 UINT n = pd->size();
00144 REAL err = 0;
00145 for (UINT i = 0; i < n; ++i)
00146 err += c_error((*this)(pd->x(i)), pd->y(i));
00147 return err / n;
00148 }
00149
00165 void LearnModel::set_train_data (const pDataSet& pd, const pDataWgt& pw) {
00166 n_samples = pd->size();
00167 assert(n_samples > 0);
00168 assert(!pw || n_samples == pw->size());
00169 if (support_weighted_data()) {
00170 ptd = pd;
00171 ptw = (pw != 0)? pw : new DataWgt(n_samples, 1.0 / n_samples);
00172 } else {
00173 ptd = (!pw)? pd : pd->random_sample(*pw, n_samples);
00174 ptw = 0;
00175 }
00176 assert(support_weighted_data() == (ptw != 0));
00177 #ifndef NDEBUG
00178
00179 if (ptw != 0) {
00180 REAL wsum = 0;
00181 for (UINT i = 0; i < n_samples; i++) {
00182 assert((*ptw)[i] >= 0);
00183 wsum += (*ptw)[i];
00184 }
00185 assert(wsum-1 > -EPSILON && wsum-1 < EPSILON);
00186 }
00187 #endif
00188 if (!exact_dimensions(*pd)) {
00189 std::cerr << id() << "::set_train_data: Error: "
00190 "Wrong input/output dimensions.\n";
00191 std::exit(-1);
00192 }
00193 }
00194
00195 void LearnModel::reset () {
00196 _n_in = _n_out = 0;
00197 }
00198
00199 REAL LearnModel::margin_of (const Input&, const Output&) const {
00200 OBJ_FUNC_UNDEFINED("margin_of");
00201 }
00202
00203 REAL LearnModel::min_margin () const {
00204 REAL min_m = INFINITY;
00205 for (UINT i = 0; i < n_samples; ++i) {
00206
00207 assert((*ptw)[i] > INFINITESIMAL);
00208 REAL m = margin(i);
00209 if (min_m > m) min_m = m;
00210 }
00211 return min_m;
00212 }
00213
00214 bool LearnModel::valid_dimensions (UINT nin, UINT nout) const {
00215 return (nin == 0 || _n_in == 0 || nin == _n_in) &&
00216 (nout == 0 || _n_out == 0 || nout == _n_out);
00217 }
00218
00219 void LearnModel::set_dimensions (UINT nin, UINT nout) {
00220 assert(valid_dimensions(nin, nout));
00221 if (nin > 0) _n_in = nin;
00222 if (nout > 0) _n_out = nout;
00223 }
00224
00225 }