LEMGA: cgboost.cpp Source File

00001 
00005 #include <assert.h>
00006 #include <cmath>
00007 #include "cgboost.h"
00008 #include "vectorop.h"
00009 #include "optimize.h"
00010 
00015 #define USE_F_FOR_D  false
00016 
00017 REGISTER_CREATOR(lemga::CGBoost);
00018 
00019 namespace lemga {
00020 
00021 void CGBoost::reset () {
00022     Boosting::reset();
00023     all_wgts.clear();
00024 }
00025 
00026 bool CGBoost::set_aggregation_size (UINT n) {
00027     if (grad_desc_view) {
00028         assert(size() == all_wgts.size() || size()+1 == all_wgts.size());
00029         if (n > all_wgts.size()) return false;
00030         if (n > 0) lm_wgt = all_wgts[n-1];
00031 #if BOOSTING_OUTPUT_CACHE
00032         clear_cache();
00033 #endif
00034     }
00035     return Boosting::set_aggregation_size(n);
00036 }
00037 
00038 bool CGBoost::serialize (std::ostream& os, ver_list& vl) const {
00039     SERIALIZE_PARENT(Boosting, os, vl, 1);
00040     if (grad_desc_view) {
00041         const UINT n = size();
00042         assert(n == all_wgts.size() || n+1 == all_wgts.size());
00043         if (!(os << n << '\n')) return false;
00044         for (UINT i = 0; i+1 < n; ++i) {  // Boosting saved the last one
00045             assert(all_wgts[i].size() == i+1);
00046             for (UINT j = 0; j <= i; ++j)
00047                 if (!(os << all_wgts[i][j] << ' ')) return false;
00048             if (!(os << '\n')) return false;
00049         }
00050         return true;
00051     }
00052     else
00053         return (os << 0 << '\n');
00054 }
00055 
00056 bool CGBoost::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00057     if (d != id() && d != NIL_ID) return false;
00058     UNSERIALIZE_PARENT(Boosting, is, vl, 1, v);
00059     assert(v > 0);
00060 
00061     UINT n;
00062     if (!(is >> n)) return false;
00063     if (n > 0 && n != size()) return false;
00064 
00065     if (n == 0 && size() > 0)
00066         use_gradient_descent(false);
00067 
00068     if (n > 0) {
00069         use_gradient_descent(true);
00070         all_wgts.clear();
00071         for (UINT i = 1; i < n; ++i) {
00072             std::vector<REAL> wgt(i);
00073             for (UINT j = 0; j < i; ++j)
00074                 if (!(is >> wgt[j])) return false;
00075             all_wgts.push_back(wgt);
00076         }
00077         all_wgts.push_back(lm_wgt);
00078     }
00079 
00080     return true;
00081 }
00082 
00083 void CGBoost::train () {
00084     if (!grad_desc_view) {
00085         using namespace op;
00086         ncd = *ptw; ncd *= n_samples; // optional, make numbers not too small
00087         cgd = ncd;
00088         cur_err.resize(n_samples);
00089     }
00090 
00091     Boosting::train();
00092 
00093     cur_err.clear();
00094     ncd.clear(); cgd.clear();
00095 }
00096 
00097 void CGBoost::train_gd () {
00098     _boost_cg bcg(this);
00099     iterative_optimize(_conjugate_gradient<_boost_cg,BoostWgt,REAL,REAL>
00100                        (&bcg, convex? 1 : 0.5));
00101 }
00102 
00108 REAL CGBoost::linear_weight (const DataWgt&, const LearnModel& l) {
00109     assert(exact_dimensions(l));
00110     assert(l.train_data() == ptd);
00111 
00112     REAL cor = 0, err = 0;
00113     for (UINT i = 0; i < n_samples; ++i) {
00114         assert(ncd[i] >= 0);
00115         cur_err[i] = l.c_error(l.get_output(i), ptd->y(i));
00116         if (cur_err[i] > 0.1)
00117             err += ncd[i];
00118         else cor += ncd[i];
00119     }
00120     assert(err+cor > 0);
00121 #if VERBOSE_OUTPUT
00122     std::cout << "?Weighted classification error: " <<
00123         err/(err+cor)*100 << "%%\n";
00124 #endif
00125 
00126     if (err >= cor) return -1;
00127 
00128     REAL beta;
00129     if (err <= 0)
00130         beta = 1000;
00131     else
00132         beta = cor / err;
00133     return std::log(beta) / 2;
00134 }
00135 
00141 void CGBoost::linear_smpwgt (DataWgt& sw) {
00142     // update ratio (\beta) for error and correct samples
00143     const REAL be = std::exp(lm_wgt[n_in_agg-1]), bc = 1 / be;
00144     REAL s1 = 0, s2 = 0;
00145     for (UINT i = 0; i < n_samples; ++i) {
00146         const REAL tmp = ncd[i] * (cur_err[i]? be : bc);
00147         s1 += tmp * (tmp - ncd[i]);
00148         s2 += ncd[i] * ncd[i];
00149         ncd[i] = tmp;
00150         assert(fabs(ncd[i] - n_samples * (*ptw)[i] *
00151                     std::exp(- ptd->y(i)[0]*get_output(i)[0])) < EPSILON);
00152     }
00153     assert(s2 != 0);
00154     REAL beta = s1 / s2;
00155     if (beta < 0) beta = 0;
00156 
00157 #if USE_F_FOR_D
00158     /* Compute the norm ratio between d and f, which will mutiply
00159      * f so as to keep the correct norm. */
00160     REAL d2_sum = 0;
00161     for (UINT i = 0; i < n_samples; i++)
00162         d2_sum += cgd[i] * cgd[i];
00163     const REAL cf_ratio = std::sqrt(d2_sum / n_samples);
00164 #if VERBOSE_OUTPUT
00165     std::cout << "cf_ratio = " << cf_ratio << ", ";
00166 #endif
00167 #endif
00168 
00169     REAL bw_sum = 0;
00170     for (UINT i = 0; i < n_samples; ++i) {
00171 #if USE_F_FOR_D
00172         /* if we want to use f insted of cgd */
00173         cgd[i] = (cur_err[i] > 0.1)? -cf_ratio : cf_ratio;
00174 #endif
00175         cgd[i] = ncd[i] + beta * cgd[i];
00176         bw_sum += cgd[i];
00177     }
00178 #if VERBOSE_OUTPUT
00179     std::cout << "beta = " << beta << '\n';
00180 #endif
00181 
00182     assert(bw_sum != 0);
00183     for (UINT i = 0; i < n_samples; ++i) {
00184         sw[i] = cgd[i] / bw_sum;
00185         assert(sw[i] >= 0);
00186     }
00187 }
00188 
00189 } // namespace lemga