LEMGA: boosting.cpp Source File

00001 
00005 #include <assert.h>
00006 #include "vectorop.h"
00007 #include "optimize.h"
00008 #include "boosting.h"
00009 
00010 REGISTER_CREATOR(lemga::Boosting);
00011 
00012 #define _cost(F,y)          cost_functor.cost(F[0],y[0])
00013 #define _cost_deriv(F,y)    cost_functor.deriv1(F[0],y[0])
00014 
00015 namespace lemga {
00016 
00020 Boosting::Boosting (bool cvx, const cost::Cost& c)
00021     : Aggregating(), convex(cvx), grad_desc_view(false),
00022       min_cst(0), min_err(-1), cost_functor(c)
00023 { /* empty */ }
00024 
00025 Boosting::Boosting (const Aggregating& s)
00026     : Aggregating(s), lm_wgt(lm.size(), 1), convex(false),
00027       grad_desc_view(false), min_cst(0), min_err(-1),
00028       cost_functor(cost::_cost)
00029 {}
00030 
00031 bool Boosting::serialize (std::ostream& os, ver_list& vl) const {
00032     SERIALIZE_PARENT(Aggregating, os, vl, 1);
00033     assert(lm_wgt.size() == lm.size());
00034     for (UINT i = 0; i < lm_wgt.size(); ++i)
00035         os << lm_wgt[i] << ' ';
00036     if (!lm_wgt.empty()) os << '\n';
00037     return (os << convex << '\n');
00038 }
00039 
00040 bool Boosting::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00041     if (d != id() && d != NIL_ID) return false;
00042     UNSERIALIZE_PARENT(Aggregating, is, vl, 1, v);
00043 
00044     const UINT n = lm.size();
00045     lm_wgt.resize(n);
00046     for (UINT i = 0; i < n; ++i)
00047         if (!(is >> lm_wgt[i])) return false;
00048 
00049     UINT c;
00050     if (!(is >> c)) {
00051         if (v != 0) return false;
00052         convex = false; // some old version: no convex
00053     }
00054     else if (c > 1) return false;
00055     convex = c;
00056     return true;
00057 }
00058 
00059 void Boosting::reset () {
00060     Aggregating::reset();
00061     lm_wgt.clear();
00062 #if BOOSTING_OUTPUT_CACHE
00063     clear_cache();
00064 #endif
00065 }
00066 
00067 REAL Boosting::margin_norm () const {
00068     return convex? 1 : model_weight_sum();
00069 }
00070 
00071 REAL Boosting::margin_of (const Input& x, const Output& y) const {
00072     assert(std::fabs(y[0]*y[0]-1) < INFINITESIMAL);
00073     return (*this)(x)[0] * y[0];
00074 }
00075 
00076 REAL Boosting::margin (UINT i) const {
00077     REAL y = ptd->y(i)[0];
00078     assert(std::fabs(y*y-1) < INFINITESIMAL);
00079     return get_output(i)[0] * y;
00080 }
00081 
00082 Output Boosting::operator() (const Input& x) const {
00083     assert(n_in_agg <= lm.size() && lm.size() == lm_wgt.size() && _n_out > 0);
00084 #ifndef NDEBUG
00085     for (UINT i = 0; i < n_in_agg; ++i)
00086         assert(lm_wgt[i] >= 0);
00087 #endif
00088 
00089     Output y(_n_out, 0);
00090     for (UINT i = 0; i < n_in_agg; ++i) {
00091         assert(lm[i] != 0 && exact_dimensions(*lm[i]));
00092         Output out = (*lm[i])(x);
00093         for (UINT j = 0; j < _n_out; ++j)
00094             y[j] += (out[j] > 0)? lm_wgt[i] : -lm_wgt[i];
00095     }
00096 
00097     if (convex && n_in_agg > 0) {
00098         using namespace op;
00099         y *= 1 / model_weight_sum();
00100     }
00101     return y;
00102 }
00103 
00104 Output Boosting::get_output (UINT idx) const {
00105     assert(n_in_agg <= lm.size() && lm.size() == lm_wgt.size() && _n_out > 0);
00106     assert(ptw != 0); // no data sampling
00107 
00108 #if BOOSTING_OUTPUT_CACHE
00109     if (cache_n[idx] > n_in_agg)
00110         clear_cache(idx);
00111     Output& y = cache_y[idx];
00112     UINT start = cache_n[idx];
00113     cache_n[idx] = n_in_agg;
00114     if (start == 0) { // y is either empty, or already filled with 0
00115         assert(y.empty() || y[0] == 0); // only check y[0]
00116         y.resize(_n_out, 0);
00117     }
00118 #else
00119     Output y(_n_out, 0);
00120     UINT start = 0;
00121 #endif
00122     assert(y.size() == _n_out);
00123     for (UINT i = start; i < n_in_agg; ++i) {
00124         assert(lm[i] != 0 && exact_dimensions(*lm[i]));
00125         assert(lm[i]->train_data() == ptd);
00126         Output out = lm[i]->get_output(idx);
00127         for (UINT j = 0; j < _n_out; ++j)
00128             y[j] += (out[j] > 0)? lm_wgt[i] : -lm_wgt[i];
00129     }
00130 
00131     if (convex && n_in_agg > 0) {
00132         using namespace op;
00133 #if BOOSTING_OUTPUT_CACHE
00134         Output y2 = y;
00135         return (y2 *= 1 / model_weight_sum());
00136 #else
00137         y *= 1 / model_weight_sum();
00138 #endif
00139     }
00140     return y;
00141 }
00142 
00143 #if BOOSTING_OUTPUT_CACHE
00144 void Boosting::set_train_data (const pDataSet& pd, const pDataWgt& pw) {
00145     pDataSet old_ptd = ptd;
00146     Aggregating::set_train_data(pd, pw);
00147     if (old_ptd != ptd) clear_cache();
00148 }
00149 #endif
00150 
00151 void Boosting::train () {
00152     assert(ptd != 0 && ptw != 0);
00153     assert(lm_base != 0); // we need lm_base to create new hypotheses
00154     set_dimensions(*ptd);
00155 
00156     if (grad_desc_view) {
00157         train_gd();
00158         return;
00159     }
00160 
00161     n_in_agg = size();
00162     pDataWgt sample_wgt = sample_weight();
00163 
00164     while (n_in_agg < max_n_model) {
00165         const pLearnModel p = train_with_smpwgt(sample_wgt);
00166 
00167         // update sample_wgt, set up hypothesis wgt (lm_wgt)
00168         const REAL w = assign_weight(*sample_wgt, *p);
00169         if (w <= 0) break;
00170 
00171         set_dimensions(*p);
00172         lm.push_back(p); lm_wgt.push_back(w);
00173         n_in_agg++;
00174         if (min_cst > 0 && cost() < min_cst) break;
00175         if (min_err >= 0 && train_c_error() <= min_err) break;
00176         sample_wgt = update_smpwgt(*sample_wgt, *p);
00177     }
00178 }
00179 
00180 void Boosting::train_gd () {
00181     _boost_gd bgd(this);
00182     iterative_optimize(_line_search<_boost_gd,BoostWgt,REAL,REAL>
00183                        (&bgd, convex? 1.0 : 0.5));
00184 }
00185 
00186 pLearnModel Boosting::train_with_smpwgt (const pDataWgt& sw) const {
00187 #if VERBOSE_OUTPUT
00188     std::cout << "=== " << id()
00189               << " [" << (convex? "convex" : "linear") << "] #"
00190               << n_in_agg+1 << " / " << max_n_model << " ===\n";
00191 #endif
00192     LearnModel *plm = lm_base->clone();
00193     assert(plm != 0);
00194 
00195     plm->set_train_data(ptd, sw);
00196     plm->train();
00197     // put back ptd for future get_output() call, and put back ptw to
00198     // save memory -- however, plm has to support sample weight
00199     assert(plm->support_weighted_data());
00200     plm->set_train_data(ptd, ptw);
00201     return plm;
00202 }
00203 
00204 REAL Boosting::convex_weight (const DataWgt&, const LearnModel&) {
00205     OBJ_FUNC_UNDEFINED("convex_weight");
00206 }
00207 REAL Boosting::linear_weight (const DataWgt&, const LearnModel&) {
00208     OBJ_FUNC_UNDEFINED("linear_weight");
00209 }
00210 
00211 void Boosting::convex_smpwgt (DataWgt&) {
00212     OBJ_FUNC_UNDEFINED("convex_smpwgt");
00213 }
00214 void Boosting::linear_smpwgt (DataWgt&) {
00215     OBJ_FUNC_UNDEFINED("linear_smpwgt");
00216 }
00217 
00218 REAL Boosting::cost () const {
00219     assert(ptd != 0 && ptw != 0);
00220     REAL cst = 0;
00221     for (UINT i = 0; i < n_samples; ++i) {
00222         REAL c = _cost(get_output(i), ptd->y(i));
00223         cst += c * (*ptw)[i];
00224     }
00225     return cst;
00226 }
00227 
00232 pDataWgt Boosting::sample_weight () const {
00233     assert(ptd != 0 && ptw != 0);
00234     if (n_in_agg == 0) return ptw;
00235 
00236     DataWgt* pdw = new DataWgt(n_samples);
00237     REAL sum = 0;
00238     for (UINT i = 0; i < n_samples; ++i) {
00239         REAL yi = ptd->y(i)[0];
00240         REAL p = - (*ptw)[i] / yi * _cost_deriv(get_output(i), ptd->y(i));
00241         assert(p >= 0);
00242         (*pdw)[i] = p; sum += p;
00243     }
00244     assert(sum > 0);
00245     const REAL k = 1 / sum;
00246     for (UINT i = 0; i < n_samples; ++i)
00247         (*pdw)[i] *= k;
00248 
00249     return pdw;
00250 }
00251 
00252 Boosting::BoostWgt& Boosting::BoostWgt::operator+= (const BoostWgt& bw) {
00253     const UINT ts = size();
00254     assert(ts+1 == bw.size());
00255 
00256     for (UINT i = 0; i < ts; ++i) {
00257         assert(lm[i] == bw.lm[i]);
00258         lm_wgt[i] += bw.lm_wgt[i];
00259     }
00260     lm.push_back(bw.lm[ts]);
00261     lm_wgt.push_back(bw.lm_wgt[ts]);
00262 
00263     return *this;
00264 }
00265 
00266 Boosting::BoostWgt Boosting::BoostWgt::operator- () const {
00267     using namespace op;
00268     return BoostWgt(lm, -lm_wgt);
00269 }
00270 
00271 Boosting::BoostWgt& Boosting::BoostWgt::operator*= (REAL r) {
00272     using namespace op;
00273     lm_wgt *= r;
00274     return *this;
00275 }
00276 
00277 } // namespace lemga