perceptron.cpp

Go to the documentation of this file.
00001 
00005 // "Fixed bias" means we still use the bias term, but do not change it.
00006 // This is to simulate a perceptron without the bias term. An alternative
00007 // is to modify the dset_distract (for RCD) and those update rules (for
00008 // other algorithms) to not add "1" to input vectors. Although this approach
00009 // is simpler (to implement), it causes extra troubles with saving/loading
00010 // or importing from SVM.
00011 #define PERCEPTRON_FIXED_BIAS      0
00012 
00013 #define STUMP_DOUBLE_DIRECTIONS    0
00014 
00015 #include <assert.h>
00016 #include <cmath>
00017 #include <numeric>
00018 #include "random.h"
00019 #include "stump.h"
00020 #include "perceptron.h"
00021 
00022 REGISTER_CREATOR(lemga::Perceptron);
00023 
00024 typedef std::vector<REAL> RVEC;
00025 typedef std::vector<RVEC> RMAT;
00026 
00027 #define DOTPROD(x,y) std::inner_product(x.begin(), x.end(), y.begin(), .0)
00028 // The version without bias
00029 #define DOTPROD_NB(x,y) std::inner_product(x.begin(),x.end()-1,y.begin(),.0)
00030 
00031 inline RVEC randvec (UINT n) {
00032     RVEC x(n);
00033     for (UINT i = 0; i < n; ++i)
00034         x[i] = 2*randu() - 1;
00035     return x;
00036 }
00037 
00038 inline RVEC coorvec (UINT n, UINT c) {
00039     RVEC x(n, 0);
00040     x[c] = 1;
00041     return x;
00042 }
00043 
00044 inline void normalize (RVEC& v, REAL thr = 0) {
00045     REAL s = DOTPROD(v, v);
00046     assert(s > 0);
00047     if (s <= thr) return;
00048     s = 1 / std::sqrt(s);
00049     for (RVEC::iterator p = v.begin(); p != v.end(); ++p)
00050         *p *= s;
00051 }
00052 
00053 RMAT randrot (UINT n, bool bias_row = false, bool conjugate = true) {
00054 #if PERCEPTRON_FIXED_BIAS
00055     bias_row = true;
00056 #endif
00057     RMAT m(n);
00058     if (bias_row)
00059         m.back() = coorvec(n, n-1);
00060 
00061     for (UINT i = 0; i+bias_row < n; ++i) {
00062         m[i] = randvec(n);
00063         if (bias_row) m[i][n-1] = 0;
00064         if (!conjugate) continue;
00065 
00066         // make m[i] independent
00067         for (UINT k = 0; k < i; ++k) {
00068             REAL t = DOTPROD(m[i], m[k]);
00069             for (UINT j = 0; j < n; ++j)
00070                 m[i][j] -= t * m[k][j];
00071         }
00072         // normalize m[i]
00073         normalize(m[i]);
00074     }
00075     return m;
00076 }
00077 
00078 // from the Numerical Recipes in C
00079 bool Cholesky_decomp (RMAT& A, RVEC& p) {
00080     const UINT n = A.size();
00081     assert(p.size() == n);
00082     for (UINT i = 0; i < n; ++i) {
00083         for (UINT j = i; j < n; ++j) {
00084             REAL sum = A[i][j];
00085             for (UINT k = 0; k < i; ++k)
00086                 sum -= A[i][k] * A[j][k];
00087             if (i == j) {
00088                 if (sum <= 0)
00089                     return false;
00090                 p[i] = std::sqrt(sum);
00091             } else
00092                 A[j][i] = sum / p[i];
00093         }
00094     }
00095     return true;
00096 }
00097 
00098 // from the Numerical Recipes in C
00099 void Cholesky_linsol (const RMAT& A, const RVEC& p, const RVEC& b, RVEC& x) {
00100     const UINT n = A.size();
00101     assert(p.size() == n && b.size() == n);
00102     x.resize(n);
00103 
00104     for (UINT i = 0; i < n; ++i) {
00105         REAL sum = b[i];
00106         for (UINT k = 0; k < i; ++k)
00107             sum -= A[i][k] * x[k];
00108         x[i] = sum / p[i];
00109     }
00110     for (UINT i = n; i--;) {
00111         REAL sum = x[i];
00112         for (UINT k = i+1; k < n; ++k)
00113             sum -= A[k][i] * x[k];
00114         x[i] = sum / p[i];
00115     }
00116 }
00117 
00118 namespace lemga {
00119 
00120 bool ldivide (RMAT& A, const RVEC& b, RVEC& x) {
00121     const UINT n = A.size();
00122     assert(b.size() == n);
00123     RVEC p(n);
00124     if (!Cholesky_decomp(A, p)) return false;
00125     Cholesky_linsol(A, p, b, x);
00126     return true;
00127 }
00128 
00131 void update_wgt (RVEC& wgt, const RVEC& dir, const RMAT& X, const RVEC& y) {
00132     const UINT dim = wgt.size();
00133     assert(dim == dir.size());
00134     const UINT n_samples = X.size();
00135     assert(n_samples == y.size());
00136 
00137     RVEC xn(n_samples), yn(n_samples);
00138 #if STUMP_DOUBLE_DIRECTIONS
00139     REAL bias = 0;
00140 #endif
00141     for (UINT i = 0; i < n_samples; ++i) {
00142         assert(X[i].size() == dim);
00143         const REAL x_d = DOTPROD(X[i], dir);
00144         REAL x_new = 0, y_new = 0;
00145         if (x_d != 0) {
00146             y_new = (x_d > 0)? y[i] : -y[i];
00147             REAL x_w = DOTPROD(X[i], wgt);
00148             x_new = x_w / x_d;
00149         }
00150 #if STUMP_DOUBLE_DIRECTIONS
00151         else
00152             bias += (DOTPROD(X[i], wgt) > 0)? y[i] : -y[i];
00153 #endif
00154         xn[i] = x_new; yn[i] = y_new;
00155     }
00156 
00157 #if STUMP_DOUBLE_DIRECTIONS
00158     REAL th1, th2;
00159     bool ir, pos;
00160     Stump::train_1d(xn, yn, bias, ir, pos, th1, th2);
00161     const REAL w_d_new = -(th1 + th2) / 2;
00162 #else
00163     const REAL w_d_new = - Stump::train_1d(xn, yn);
00164 #endif
00165 
00166     for (UINT j = 0; j < dim; ++j)
00167         wgt[j] += w_d_new * dir[j];
00168 
00169 #if STUMP_DOUBLE_DIRECTIONS
00170     if (!pos)
00171         for (UINT j = 0; j < dim; ++j)
00172             wgt[j] = -wgt[j];
00173 #endif
00174 
00175     // use a big threshold to reduce the # of normalization
00176     normalize(wgt, 1e10);
00177 }
00178 
00179 void dset_extract (const pDataSet& ptd, RMAT& X, RVEC& y) {
00180     UINT n = ptd->size();
00181     X.resize(n); y.resize(n);
00182     for (UINT i = 0; i < n; ++i) {
00183         X[i] = ptd->x(i);
00184         X[i].push_back(1);
00185         y[i] = ptd->y(i)[0];
00186     }
00187 }
00188 
00189 inline void dset_mult_wgt (const pDataWgt& ptw, RVEC& y) {
00190     UINT n = y.size();
00191     assert(ptw->size() == n);
00192     for (UINT i = 0; i < n; ++i)
00193         y[i] *= (*ptw)[i];
00194 }
00195 
00196 Perceptron::Perceptron (UINT n_in)
00197     : LearnModel(n_in, 1), wgt(n_in+1,0), resample(0),
00198       train_method(RCD_BIAS), learn_rate(0.002), min_cst(0),
00199       max_run(1000), with_fld(false)
00200 {
00201 }
00202 
00203 Perceptron::Perceptron (const SVM& s)
00204     : LearnModel(s), wgt(_n_in+1,0), resample(0),
00205       train_method(RCD_BIAS), learn_rate(0.002), min_cst(0),
00206       max_run(1000), with_fld(false)
00207 {
00208     assert(wgt.size() == _n_in+1);
00209     // unable to check that the SVM uses Linear kernel
00210 
00211     const UINT nsv = s.n_support_vectors();
00212     for (UINT i = 0; i < nsv; ++i) {
00213         const Input sv = s.support_vector(i);
00214         const REAL coef = s.support_vector_coef(i);
00215         assert(sv.size() == _n_in);
00216         for (UINT k = 0; k < _n_in; ++k)
00217             wgt[k] += coef * sv[k];
00218     }
00219     wgt.back() = s.bias();
00220 }
00221 
00222 bool Perceptron::serialize (std::ostream& os, ver_list& vl) const {
00223     SERIALIZE_PARENT(LearnModel, os, vl, 1);
00224     for (UINT i = 0; i <= _n_in; ++i)
00225         if (!(os << wgt[i] << ' ')) return false;
00226     return (os << '\n');
00227 }
00228 
00229 bool Perceptron::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00230     if (d != id() && d != empty_id) return false;
00231     UNSERIALIZE_PARENT(LearnModel, is, vl, 1, v);
00232 
00233     wgt.resize(_n_in+1);
00234     for (UINT i = 0; i <= _n_in; ++i)
00235         if (!(is >> wgt[i])) return false;
00236     return true;
00237 }
00238 
00239 void Perceptron::set_weight (const WEIGHT& w) {
00240     wgt = w;
00241     if (_n_in == 0)
00242         _n_in = w.size() - 1;
00243     assert(_n_in > 0 && _n_in+1 == wgt.size());
00244 }
00245 
00246 void Perceptron::initialize () {
00247     assert(_n_in > 0);
00248     wgt.resize(_n_in + 1);
00249     for (UINT i = 0; i+PERCEPTRON_FIXED_BIAS < wgt.size(); ++i)
00250         wgt[i] = randu() * 0.1 - 0.05; // small random numbers
00251 }
00252 
00253 Perceptron::WEIGHT Perceptron::fld () const {
00254     assert(ptd != 0 && ptw != 0);
00255 
00256     // get the mean
00257     Input m1(_n_in, 0), m2(_n_in, 0);
00258     REAL w1 = 0, w2 = 0;
00259     for (UINT i = 0; i < n_samples; ++i) {
00260         const Input& x = ptd->x(i);
00261         REAL y = ptd->y(i)[0];
00262         REAL w = (*ptw)[i];
00263 
00264         if (y > 0) {
00265             w1 += w;
00266             for (UINT k = 0; k < _n_in; ++k)
00267                 m1[k] += w * x[k];
00268         } else {
00269             w2 += w;
00270             for (UINT k = 0; k < _n_in; ++k)
00271                 m2[k] += w * x[k];
00272         }
00273     }
00274     assert(w1 > 0 && w2 > 0 && std::fabs(w1+w2-1) < EPSILON);
00275     for (UINT k = 0; k < _n_in; ++k) {
00276         m1[k] /= w1; m2[k] /= w2;
00277     }
00278 
00279     // get the covariance
00280     RMAT A(_n_in, RVEC(_n_in, 0));
00281     RVEC diff(_n_in);
00282     for (UINT i = 0; i < n_samples; ++i) {
00283         const Input& x = ptd->x(i);
00284         REAL y = ptd->y(i)[0];
00285         REAL w = (*ptw)[i];
00286 
00287         if (y > 0)
00288             for (UINT k = 0; k < _n_in; ++k)
00289                 diff[k] = x[k] - m1[k];
00290         else
00291             for (UINT k = 0; k < _n_in; ++k)
00292                 diff[k] = x[k] - m2[k];
00293         // we only need the upper triangular part
00294         for (UINT j = 0; j < _n_in; ++j)
00295             for (UINT k = j; k < _n_in; ++k)
00296                 A[j][k] += w * diff[j] * diff[k];
00297     }
00298 
00299     for (UINT k = 0; k < _n_in; ++k)
00300         diff[k] = m1[k] - m2[k];
00301 
00302     RVEC w;
00303     bool not_reg = true;
00304     while (!ldivide(A, diff, w)) {
00305         assert(not_reg); not_reg = false; // should only happen once
00306         REAL tr = 0;
00307         for (UINT j = 0; j < _n_in; ++j)
00308             tr += A[j][j];
00309         const REAL gamma = 1e-10; // see [Friedman 1989]
00310         const REAL adj = gamma/(1-gamma) * tr/_n_in;
00311         for (UINT j = 0; j < _n_in; ++j)
00312             A[j][j] += adj;
00313 #if VERBOSE_OUTPUT
00314         std::cerr << "LDA: The class covariance matrix estimate is "
00315             "regularized by\n\tan eigenvalue shinkage parameter "
00316                   << gamma << '\n';
00317 #endif
00318     }
00319 
00320     w.push_back(- (DOTPROD(m1,w) * w2 + DOTPROD(m2,w) * w1));
00321     return w;
00322 }
00323 
00324 inline UINT randcdf (REAL r, const RVEC& cdf) {
00325     UINT b = 0, e = cdf.size()-1;
00326     while (b+1 < e) {
00327         UINT m = (b + e) / 2;
00328         if (r < cdf[m]) e = m;
00329         else b = m;
00330     }
00331     return b;
00332 }
00333 
00334 REAL Perceptron::train () {
00335     const UINT dim = _n_in+1;
00336     // but what is updated might be of a smaller size
00337     const UINT udim = dim - PERCEPTRON_FIXED_BIAS;
00338     assert(wgt.size() == dim);
00339     assert(ptd != NULL && ptw != NULL);
00340     assert(ptd->size() == n_samples);
00341 
00342     RVEC cdf;
00343     if (resample) {
00344         cdf.resize(n_samples+1);
00345         cdf[0] = 0;
00346         for (UINT i = 0; i < n_samples; ++i)
00347             cdf[i+1] = cdf[i] + (*ptw)[i];
00348         assert(cdf[n_samples]-1 > -EPSILON && cdf[n_samples]-1 < EPSILON);
00349     }
00350 
00351 #if PERCEPTRON_FIXED_BIAS
00352     REAL bias_save = wgt.back();
00353 #endif
00354     if (with_fld) wgt = fld();
00355 #if PERCEPTRON_FIXED_BIAS
00356     wgt.back() = bias_save;
00357 #endif
00358 
00359     RMAT X; RVEC Y;
00360     dset_extract(ptd, X, Y);
00361 
00362 #define RAND_IDX() (resample? randcdf(randu(),cdf) : UINT(randu()*n_samples))
00363 #define SAMPWGT(i) (resample? 1 : (*ptw)[i]*n_samples)
00364 #define GET_XYO(i)         \
00365     const Input& x = X[i]; \
00366     const REAL y = Y[i];   \
00367     const REAL o = DOTPROD(wgt,x)
00368 
00369     log_error(0);
00370     switch (train_method) {
00371     case PERCEPTRON:
00372     case ADALINE:
00373         for (UINT i = 0; i < max_run; ++i) {
00374             for (UINT j = 0; j < n_samples; ++j) {
00375                 const UINT idx = RAND_IDX();
00376                 GET_XYO(idx);
00377                 if (y * o > 0) continue;
00378                 REAL deriv = (train_method == PERCEPTRON? y : (y - o));
00379                 REAL adj = learn_rate * SAMPWGT(idx) * deriv;
00380 
00381                 for (UINT k = 0; k < udim; ++k)
00382                     wgt[k] += adj * x[k];
00383             }
00384             log_error(i+1);
00385         }
00386         break;
00387 
00388     case POCKET_RATCHET:
00389     case POCKET: {
00390         bool ratchet = (train_method == POCKET_RATCHET);
00391         RVEC best_w(wgt);
00392         REAL run = 0, err = train_c_error();
00393         bool err_valid = true;
00394         REAL best_run = run, best_err = err;
00395         for (UINT i = 0; i < max_run; ++i) {
00396             for (UINT j = 0; j < n_samples; ++j) {
00397                 const UINT idx = RAND_IDX();
00398                 GET_XYO(idx);
00399 
00400                 if (y * o > 0) {
00401                     run += SAMPWGT(idx);
00402                     if (run > best_run) {
00403                         if (!err_valid) err = train_c_error();
00404                         err_valid = true;
00405                         if (!ratchet || err < best_err) {
00406                             best_run = run;
00407                             best_err = err;
00408                             best_w = wgt;
00409                         }
00410                         if (err <= 0) break;
00411                     }
00412                 } else {
00413                     run = 0;
00414                     err_valid = false;
00415 
00416                     const REAL adj = SAMPWGT(idx) * y;
00417                     for (UINT k = 0; k < udim; ++k)
00418                         wgt[k] += adj * x[k];
00419                 }
00420             }
00421             wgt.swap(best_w); log_error(i+1, best_err); wgt.swap(best_w);
00422         }
00423         wgt.swap(best_w);
00424     }
00425         break;
00426 
00427     case AVE_PERCEPTRON_RAND:
00428     case AVE_PERCEPTRON: {
00429         assert(train_method != AVE_PERCEPTRON || !resample);
00430         RVEC ave_wgt(dim, 0);
00431         REAL run = 0;
00432         for (UINT i = 0; i < max_run; ++i) {
00433             for (UINT j = 0; j < n_samples; ++j) {
00434                 const UINT idx = (train_method == AVE_PERCEPTRON)?
00435                     j : RAND_IDX();
00436                 GET_XYO(idx);
00437                 if (y * o > 0)
00438                     run += SAMPWGT(idx);
00439                 else {
00440                     for (UINT k = 0; k < dim; ++k)
00441                         ave_wgt[k] += run * wgt[k];
00442 
00443                     const REAL adj = SAMPWGT(idx) * y;
00444                     for (UINT k = 0; k < udim; ++k)
00445                         wgt[k] += adj * x[k];
00446                     run = SAMPWGT(idx);
00447                 }
00448             }
00449             RVEC tmp_wgt(ave_wgt);
00450             for (UINT k = 0; k < dim; ++k)
00451                 tmp_wgt[k] += run * wgt[k];
00452             wgt.swap(tmp_wgt); log_error(i+1); wgt.swap(tmp_wgt);
00453         }
00454         for (UINT k = 0; k < dim; ++k)
00455             wgt[k] = ave_wgt[k] + run * wgt[k];
00456     }
00457         break;
00458 
00459     case ROMMA_AGG_RAND:
00460     case ROMMA_AGG:
00461     case ROMMA_RAND:
00462     case ROMMA: {
00463         bool fixed = (train_method == ROMMA || train_method == ROMMA_AGG);
00464         assert(!fixed || !resample);
00465         REAL bnd = (train_method == ROMMA || train_method == ROMMA_RAND)?
00466             0 : (1-EPSILON);
00467         for (UINT i = 0; i < max_run; ++i) {
00468             for (UINT j = 0; j < n_samples; ++j) {
00469                 const UINT idx = fixed? j : RAND_IDX();
00470                 GET_XYO(idx); const REAL& w_x = o;
00471                 if (y * w_x > bnd) continue;
00472 
00473                 REAL w_w = DOTPROD(wgt, wgt);
00474                 REAL x_x = 1 + DOTPROD(x, x);
00475                 REAL x2w2 = x_x * w_w;
00476                 REAL deno = x2w2 - w_x*w_x;
00477                 REAL c = (x2w2 - y*w_x) / deno;
00478                 REAL d = w_w * (y - w_x) / deno;
00479 
00480                 wgt[0] = c*wgt[0] + d;
00481                 for (UINT k = 0; k < _n_in; ++k)
00482                     wgt[k+1] = c*wgt[k+1] + d*x[k];
00483             }
00484             log_error(i+1);
00485         }
00486     }
00487         break;
00488 
00489     case SGD_HINGE:
00490     case SGD_MLSE: {
00491         const REAL C = 0; // C is lambda
00492 
00493         for (UINT i = 0; i < max_run; ++i) {
00494             for (UINT j = 0; j < n_samples; ++j) {
00495                 const UINT idx = RAND_IDX();
00496                 GET_XYO(idx);
00497 
00498                 if (y*o < 1) {
00499                     REAL shrink = 1 - C * learn_rate;
00500                     REAL deriv = (train_method == SGD_HINGE? y : (y - o));
00501                     REAL adj = learn_rate * SAMPWGT(idx) * deriv;
00502                     for (UINT k = 0; k < udim; ++k)
00503                         wgt[k] = shrink * wgt[k] + adj * x[k];
00504                 }
00505             }
00506             log_error(i+1);
00507         }
00508     }
00509         break;
00510 
00511 #undef RAND_IDX
00512 #undef SAMPWGT
00513 #define CYCLE(r)  (((r)+dim-1) % udim)
00514 #define UPDATE_WGT(d) update_wgt(wgt, d, X, Y)
00515 
00516     case COORDINATE_DESCENT: {
00517         dset_mult_wgt(ptw, Y);
00518         for (UINT r = 0; r < max_run; ++r) {
00519             UPDATE_WGT(coorvec(dim, CYCLE(r)));
00520             log_error(r+1);
00521         }
00522     }
00523         break;
00524 
00525     case FIXED_RCD:
00526     case FIXED_RCD_CONJ:
00527     case FIXED_RCD_BIAS:
00528     case FIXED_RCD_CONJ_BIAS: {
00529         bool bias_row = (train_method == FIXED_RCD_BIAS ||
00530                          train_method == FIXED_RCD_CONJ_BIAS);
00531         bool conjugate = (train_method == FIXED_RCD_CONJ ||
00532                           train_method == FIXED_RCD_CONJ_BIAS);
00533         RMAT A = randrot(dim, bias_row, conjugate);
00534 
00535         dset_mult_wgt(ptw, Y);
00536         for (UINT r = 0; r < max_run; ++r) {
00537             UPDATE_WGT(A[CYCLE(r)]);
00538             log_error(r+1);
00539         }
00540     }
00541         break;
00542 
00543     case RCD:
00544     case RCD_CONJ:
00545     case RCD_BIAS:
00546     case RCD_CONJ_BIAS: {
00547         bool bias_row = (train_method == RCD_BIAS ||
00548                          train_method == RCD_CONJ_BIAS);
00549         bool conjugate = (train_method == RCD_CONJ ||
00550                           train_method == RCD_CONJ_BIAS);
00551         RMAT A;
00552 
00553         dset_mult_wgt(ptw, Y);
00554         for (UINT r = 0; r < max_run; ++r) {
00555             const UINT c = CYCLE(r);
00556             if (c == CYCLE(0))
00557                 A = randrot(dim, bias_row, conjugate);
00558             UPDATE_WGT(A[c]);
00559             log_error(r+1);
00560         }
00561     }
00562         break;
00563 
00564     case RCD_GRAD_BATCH_RAND:
00565     case RCD_GRAD_BATCH:
00566     case RCD_GRAD_RAND:
00567     case RCD_GRAD: {
00568         bool online = (train_method == RCD_GRAD ||
00569                        train_method == RCD_GRAD_RAND);
00570         bool wrand = (train_method == RCD_GRAD_RAND ||
00571                       train_method == RCD_GRAD_BATCH_RAND);
00572         // gradient of sum weight*y*<w,x> over all unsatisfied examples
00573         dset_mult_wgt(ptw, Y);
00574         for (UINT r = 0; r < max_run; ++r) {
00575             RVEC dir(dim, 0);
00576             if (r % 5 == 0 && wrand) {
00577                 dir = randvec(dim);
00578             } else if (online) {
00579                 UINT idx, cnt = 0;
00580                 REAL o;
00581                 do {
00582                     ++cnt;
00583                     idx = UINT(randu() * n_samples);
00584                     o = DOTPROD(wgt, X[idx]);
00585                 } while (Y[idx] * o > 0 && cnt < 2*n_samples);
00586                 // if we've tried too many times, just use any X
00587                 dir = X[idx];
00588             } else {
00589                 bool no_err = true;
00590                 for (UINT j = 0; j < n_samples; ++j) {
00591                     GET_XYO(j);
00592                     if (y * o > 0) continue;
00593                     no_err = false;
00594                     for (UINT k = 0; k < udim; ++k)
00595                         dir[k] += y * x[k];
00596                 }
00597                 if (no_err) break;
00598             }
00599 
00600 #if PERCEPTRON_FIXED_BIAS
00601             dir.back() = 0;
00602 #endif
00603             UPDATE_WGT(dir);
00604             log_error(r+1);
00605         }
00606     }
00607         break;
00608 
00609     case RCD_GRAD_MIXED_BATCH_INITRAND:
00610     case RCD_GRAD_MIXED_BATCH:
00611     case RCD_GRAD_MIXED_INITRAND:
00612     case RCD_GRAD_MIXED: {
00613         bool online = (train_method == RCD_GRAD_MIXED ||
00614                        train_method == RCD_GRAD_MIXED_INITRAND);
00615         bool init_rand = (train_method == RCD_GRAD_MIXED_INITRAND ||
00616                           train_method == RCD_GRAD_MIXED_BATCH_INITRAND);
00617         dset_mult_wgt(ptw, Y);
00618 
00619         for (UINT r = 0; r < max_run; ++r) {
00620             RVEC dir(dim, 0);
00621             if (init_rand)
00622                 dir = randvec(dim);
00623             UINT cnt = 0;
00624 
00625             for (UINT j = 0; j < n_samples; ++j) {
00626                 UINT idx = (online? UINT(randu() * n_samples) : j);
00627                 GET_XYO(idx);
00628                 if (y * o > 0) continue;
00629                 ++cnt;
00630                 REAL adj = y*n_samples * randu();
00631                 for (UINT k = 0; k < udim; ++k)
00632                     dir[k] += adj * x[k];
00633             }
00634             //if (cnt == 0 && !online) break;
00635 
00636             if (cnt == 0 && !init_rand)
00637                 dir = randvec(dim);
00638 #if PERCEPTRON_FIXED_BIAS
00639             dir.back() = 0;
00640 #endif
00641             UPDATE_WGT(dir);
00642             log_error(r+1);
00643         }
00644     }
00645         break;
00646 
00647     case RCD_MIXED: {
00648         dset_mult_wgt(ptw, Y);
00649         RMAT A;
00650         for (UINT r = 0; r < max_run; ++r) {
00651             UINT c = r % (2*udim);
00652             RVEC dir;
00653             if (c < udim)
00654                 dir = coorvec(dim, CYCLE(c));
00655             else {
00656                 if (c == udim) A = randrot(dim, false, false);
00657                 dir = A[c-udim]; // CYCLE doesn't change anything
00658             }
00659             UPDATE_WGT(dir);
00660             log_error(r+1);
00661         }
00662     }
00663         break;
00664 
00665     default:
00666         assert(false);
00667     }
00668 
00669 #if PERCEPTRON_FIXED_BIAS
00670     assert(wgt.back() == bias_save || train_method == AVE_PERCEPTRON ||
00671            train_method == AVE_PERCEPTRON_RAND);
00672 #endif
00673     return 0;
00674 }
00675 
00676 #define INPUT_SUM(w,x)  \
00677     std::inner_product(x.begin(), x.end(), w.begin(), w.back())
00678 
00679 Output Perceptron::operator() (const Input& x) const {
00680     assert(x.size() == n_input());
00681     REAL sum = INPUT_SUM(wgt, x);
00682     return Output(1, (sum>=0)? 1 : -1);
00683 }
00684 
00685 REAL Perceptron::margin_of (const Input& x, const Output& y) const {
00686     assert(std::fabs(std::fabs(y[0]) - 1) < INFINITESIMAL);
00687     return INPUT_SUM(wgt, x) * y[0];
00688 }
00689 
00690 REAL Perceptron::w_norm () const {
00691     REAL s = DOTPROD_NB(wgt, wgt);
00692     return std::sqrt(s);
00693 }
00694 
00695 void Perceptron::log_error (UINT ite, REAL err) const {
00696     if (logf != NULL) {
00697         if (err < 0) err = train_c_error();
00698         fprintf(logf, "%g ", err);
00699     }
00700 }
00701 
00702 } // namespace lemga

Generated on Mon Jan 9 23:43:24 2006 for LEMGA by  doxygen 1.4.6