LEMGA: cgboost.cpp Source File

00001 
00005 #include <assert.h>
00006 #include <cmath>
00007 #include "cgboost.h"
00008 #include "vectorop.h"
00009 #include "optimize.h"
00010 
00015 #define USE_F_FOR_D  false
00016 
00017 REGISTER_CREATOR(lemga::CGBoost);
00018 
00019 namespace lemga {
00020 
00021 void CGBoost::initialize () {
00022     Boosting::initialize();
00023     all_wgts.clear();
00024 }
00025 
00026 bool CGBoost::set_aggregation_size (UINT n) {
00027     if (grad_desc_view) {
00028         assert(size() == all_wgts.size() || size()+1 == all_wgts.size());
00029         if (n > all_wgts.size()) return false;
00030         if (n > 0) lm_wgt = all_wgts[n-1];
00031 #if BOOSTING_OUTPUT_CACHE
00032         clear_cache();
00033 #endif
00034     }
00035     return Boosting::set_aggregation_size(n);
00036 }
00037 
00038 bool CGBoost::serialize (std::ostream& os, ver_list& vl) const {
00039     SERIALIZE_PARENT(Boosting, os, vl, 1);
00040     if (grad_desc_view) {
00041         const UINT n = size();
00042         assert(n == all_wgts.size() || n+1 == all_wgts.size());
00043         if (!(os << n << '\n')) return false;
00044         for (UINT i = 0; i+1 < n; ++i) {  // Boosting saved the last one
00045             assert(all_wgts[i].size() == i+1);
00046             for (UINT j = 0; j <= i; ++j)
00047                 if (!(os << all_wgts[i][j] << ' ')) return false;
00048             if (!(os << '\n')) return false;
00049         }
00050         return true;
00051     }
00052     else
00053         return (os << 0 << '\n');
00054 }
00055 
00056 bool CGBoost::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00057     if (d != id() && d != empty_id) return false;
00058     UNSERIALIZE_PARENT(Boosting, is, vl, 1, v);
00059     assert(v > 0);
00060 
00061     UINT n;
00062     if (!(is >> n)) return false;
00063     if (n > 0 && n != size()) return false;
00064 
00065     if (n == 0 && size() > 0)
00066         use_gradient_descent(false);
00067 
00068     if (n > 0) {
00069         use_gradient_descent(true);
00070         all_wgts.clear();
00071         for (UINT i = 1; i < n; ++i) {
00072             std::vector<REAL> wgt(i);
00073             for (UINT j = 0; j < i; ++j)
00074                 if (!(is >> wgt[j])) return false;
00075             all_wgts.push_back(wgt);
00076         }
00077         all_wgts.push_back(lm_wgt);
00078     }
00079 
00080     return true;
00081 }
00082 
00083 REAL CGBoost::train () {
00084     assert(n_output() == 1);
00085     if (grad_desc_view) return train_gd();
00086 
00087     using namespace op;
00088     ncd = *ptw; ncd *= n_samples; // make numbers not too small (optional)
00089     cgd = ncd;
00090     cur_err.resize(n_samples);
00091 
00092     const REAL err = Boosting::train();
00093 
00094     cur_err.clear();
00095     ncd.clear(); cgd.clear();
00096     return err;
00097 }
00098 
00099 REAL CGBoost::train_gd () {
00100     _boost_cg bcg(this);
00101     iterative_optimize(_conjugate_gradient<_boost_cg,BoostWgt,REAL,REAL>
00102                        (&bcg, convex? 1 : 0.5));
00103     return cost();
00104 }
00105 
00111 REAL CGBoost::linear_weight (const DataWgt&, const LearnModel& l) {
00112     assert(n_output() == l.n_output());
00113 
00114     REAL cor = 0, err = 0;
00115     for (UINT i = 0; i < n_samples; ++i) {
00116         assert(ncd[i] >= 0);
00117         cur_err[i] = l.c_error(l.get_output(i), ptd->y(i));
00118         if (cur_err[i] > 0.1)
00119             err += ncd[i];
00120         else cor += ncd[i];
00121     }
00122     assert(err+cor > 0);
00123 #if VERBOSE_OUTPUT
00124     std::cout << "?Weighted classification error: " <<
00125         err/(err+cor)*100 << "%%\n";
00126 #endif
00127 
00128     if (err >= cor) return -1;
00129 
00130     REAL beta;
00131     if (err <= 0)
00132         beta = 1000;
00133     else
00134         beta = cor / err;
00135     return std::log(beta) / 2;
00136 }
00137 
00143 void CGBoost::linear_smpwgt (DataWgt& sw) {
00144     // update ratio (\beta) for error and correct samples
00145     const REAL be = std::exp(lm_wgt[n_in_agg-1]), bc = 1 / be;
00146     REAL s1 = 0, s2 = 0;
00147     for (UINT i = 0; i < n_samples; ++i) {
00148         const REAL tmp = ncd[i] * (cur_err[i]? be : bc);
00149         s1 += tmp * (tmp - ncd[i]);
00150         s2 += ncd[i] * ncd[i];
00151         ncd[i] = tmp;
00152         assert(fabs(ncd[i] - n_samples * (*ptw)[i] *
00153                     std::exp(- ptd->y(i)[0]*get_output(i)[0])) < EPSILON);
00154     }
00155     assert(s2 != 0);
00156     REAL beta = s1 / s2;
00157     if (beta < 0) beta = 0;
00158 
00159 #if USE_F_FOR_D
00160     /* Compute the norm ratio between d and f, which will mutiply
00161      * f so as to keep the correct norm. */
00162     REAL d2_sum = 0;
00163     for (UINT i = 0; i < n_samples; i++)
00164         d2_sum += cgd[i] * cgd[i];
00165     const REAL cf_ratio = std::sqrt(d2_sum / n_samples);
00166 #if VERBOSE_OUTPUT
00167     std::cout << "cf_ratio = " << cf_ratio << ", ";
00168 #endif
00169 #endif
00170 
00171     REAL bw_sum = 0;
00172     for (UINT i = 0; i < n_samples; ++i) {
00173 #if USE_F_FOR_D
00174         /* if we want to use f insted of cgd */
00175         cgd[i] = (cur_err[i] > 0.1)? -cf_ratio : cf_ratio;
00176 #endif
00177         cgd[i] = ncd[i] + beta * cgd[i];
00178         bw_sum += cgd[i];
00179     }
00180 #if VERBOSE_OUTPUT
00181     std::cout << "beta = " << beta << '\n';
00182 #endif
00183 
00184     assert(bw_sum != 0);
00185     for (UINT i = 0; i < n_samples; ++i) {
00186         sw[i] = cgd[i] / bw_sum;
00187         assert(sw[i] >= 0);
00188     }
00189 }
00190 
00191 } // namespace lemga