00001
00005 #include <assert.h>
00006 #include "vectorop.h"
00007 #include "optimize.h"
00008 #include "boosting.h"
00009
00010 REGISTER_CREATOR(lemga::Boosting);
00011
00012 #define _cost(F,y) cost_functor.cost(F[0],y[0])
00013 #define _cost_deriv(F,y) cost_functor.deriv1(F[0],y[0])
00014
00015 namespace lemga {
00016
00020 Boosting::Boosting (bool cvx, const cost::Cost& c)
00021 : Aggregating(), convex(cvx), grad_desc_view(false),
00022 min_cst(0), min_err(-1), cost_functor(c)
00023 { }
00024
00025 bool Boosting::serialize (std::ostream& os, ver_list& vl) const {
00026 SERIALIZE_PARENT(Aggregating, os, vl, 1);
00027 assert(lm_wgt.size() == lm.size());
00028 for (UINT i = 0; i < lm_wgt.size(); ++i)
00029 os << lm_wgt[i] << ' ';
00030 if (!lm_wgt.empty()) os << '\n';
00031 return (os << convex << '\n');
00032 }
00033
00034 bool Boosting::unserialize (std::istream& is, ver_list& vl, const id_t& d) {
00035 if (d != id() && d != empty_id) return false;
00036 UNSERIALIZE_PARENT(Aggregating, is, vl, 1, v);
00037
00038 const UINT n = lm.size();
00039 lm_wgt.resize(n);
00040 for (UINT i = 0; i < n; ++i)
00041 if (!(is >> lm_wgt[i])) return false;
00042
00043 UINT c;
00044 if (!(is >> c)) {
00045 if (v != 0) return false;
00046 convex = false;
00047 }
00048 else if (c > 1) return false;
00049 convex = c;
00050 return true;
00051 }
00052
00053 void Boosting::initialize () {
00054 Aggregating::initialize();
00055 lm_wgt.clear();
00056 #if BOOSTING_OUTPUT_CACHE
00057 clear_cache();
00058 #endif
00059 }
00060
00061 REAL Boosting::margin_norm () const {
00062 return convex? 1 : model_weight_sum();
00063 }
00064
00065 REAL Boosting::margin_of (const Input& x, const Output& y) const {
00066 if (n_in_agg == 0) return 0;
00067 assert(std::fabs(std::fabs(y[0])-1) < INFINITESIMAL);
00068 return (*this)(x)[0] * y[0];
00069 }
00070
00071 REAL Boosting::margin (UINT i) const {
00072 if (n_in_agg == 0) return 0;
00073 REAL y = ptd->y(i)[0];
00074 assert(std::fabs(std::fabs(y)-1) < INFINITESIMAL);
00075 return get_output(i)[0] * y;
00076 }
00077
00078 Output Boosting::operator() (const Input& x) const {
00079 assert(n_in_agg <= lm.size() && n_in_agg <= lm_wgt.size());
00080 #ifndef NDEBUG
00081 for (UINT i = 0; i < n_in_agg; ++i)
00082 assert(lm_wgt[i] >= 0);
00083 #endif
00084
00085 Output y(_n_out, 0);
00086
00087 for (UINT i = 0; i < n_in_agg; ++i) {
00088 assert(lm[i] != 0);
00089 Output out = (*lm[i])(x);
00090 for (UINT j = 0; j < _n_out; ++j)
00091 y[j] += (out[j] > 0)? lm_wgt[i] : -lm_wgt[i];
00092 }
00093
00094 if (convex && n_in_agg > 0) {
00095 using namespace op;
00096 y *= 1 / model_weight_sum();
00097 }
00098 return y;
00099 }
00100
00101 Output Boosting::get_output (UINT idx) const {
00102 assert(ptw != NULL);
00103
00104 #if BOOSTING_OUTPUT_CACHE
00105 if (cache_n[idx] > n_in_agg)
00106 clear_cache(idx);
00107 Output& y = cache_y[idx];
00108 UINT start = cache_n[idx];
00109 cache_n[idx] = n_in_agg;
00110 #else
00111 Output y(_n_out, 0);
00112 UINT start = 0;
00113 #endif
00114 for (UINT i = start; i < n_in_agg; ++i) {
00115 assert(lm[i] != 0);
00116 Output out = lm[i]->get_output(idx);
00117 for (UINT j = 0; j < _n_out; ++j)
00118 y[j] += (out[j] > 0)? lm_wgt[i] : -lm_wgt[i];
00119 }
00120
00121 if (convex && n_in_agg > 0) {
00122 Output y2 = y;
00123 using namespace op;
00124 y2 *= 1 / model_weight_sum();
00125 return y2;
00126 }
00127 return y;
00128 }
00129
00130 #if BOOSTING_OUTPUT_CACHE
00131 void Boosting::set_train_data (const pDataSet& pd, const pDataWgt& pw) {
00132 Aggregating::set_train_data(pd, pw);
00133 clear_cache();
00134 }
00135 #endif
00136
00137 REAL Boosting::train () {
00138 assert(n_in_agg == 0 && empty());
00139 assert(ptd != 0 && ptw != 0);
00140 assert(lm_base != 0);
00141
00142 if (grad_desc_view) return train_gd();
00143
00144 pDataWgt sample_wgt = ptw;
00145
00146 for (UINT i = 0; i < max_n_model; ++i) {
00147 const pLearnModel p = train_with_smpwgt(sample_wgt);
00148
00149
00150 const REAL w = assign_weight(*sample_wgt, *p);
00151 if (w <= 0) break;
00152
00153 lm.push_back(p); lm_wgt.push_back(w);
00154 n_in_agg++;
00155 if (min_cst > 0 && cost() < min_cst) break;
00156 if (min_err >= 0) {
00157 REAL err = 0;
00158 for (UINT j = 0; j < n_samples; ++j)
00159 err += (*ptw)[j] * (get_output(j)[0]*ptd->y(j)[0] <= 0);
00160 if (err <= min_err) break;
00161 }
00162 sample_wgt = update_smpwgt(*sample_wgt, *p);
00163 }
00164
00165 return 0;
00166 }
00167
00168 REAL Boosting::train_gd () {
00169 _boost_gd bgd(this);
00170 iterative_optimize(_line_search<_boost_gd,BoostWgt,REAL,REAL>
00171 (&bgd, convex? 1.0 : 0.5));
00172 return cost();
00173 }
00174
00175 pLearnModel Boosting::train_with_smpwgt (const pDataWgt& sw) const {
00176 #if VERBOSE_OUTPUT
00177 std::cout << "=== " << id()
00178 << " [" << (convex? "convex" : "linear") << "] #"
00179 << n_in_agg+1 << " / " << max_n_model << " ===\n";
00180 #endif
00181 LearnModel *plm = lm_base->clone();
00182 assert(plm != 0);
00183
00184 plm->set_train_data(ptd, sw);
00185 plm->train();
00186 return plm;
00187 }
00188
00189 REAL Boosting::convex_weight (const DataWgt&, const LearnModel&) {
00190 OBJ_FUNC_UNDEFINED("convex_weight");
00191 }
00192 REAL Boosting::linear_weight (const DataWgt&, const LearnModel&) {
00193 OBJ_FUNC_UNDEFINED("linear_weight");
00194 }
00195
00196 void Boosting::convex_smpwgt (DataWgt&) {
00197 OBJ_FUNC_UNDEFINED("convex_smpwgt");
00198 }
00199 void Boosting::linear_smpwgt (DataWgt&) {
00200 OBJ_FUNC_UNDEFINED("linear_smpwgt");
00201 }
00202
00203 REAL Boosting::cost () const {
00204 if (n_in_agg == 0 && convex)
00205 return INFINITY;
00206
00207
00208
00209
00210 assert(ptd != 0 && ptw != 0);
00211 REAL cst = 0;
00212 for (UINT i = 0; i < n_samples; ++i) {
00213 REAL c = _cost(get_output(i), ptd->y(i));
00214 cst += c * (*ptw)[i];
00215 }
00216 return cst;
00217 }
00218
00223 pDataWgt Boosting::sample_weight () const {
00224 assert(ptd != 0 && ptw != 0);
00225 if (n_in_agg == 0) return ptw;
00226
00227 DataWgt* pdw = new DataWgt(n_samples);
00228 REAL sum = 0;
00229 for (UINT i = 0; i < n_samples; ++i) {
00230 REAL yi = ptd->y(i)[0];
00231 REAL p = - (*ptw)[i] / yi * _cost_deriv(get_output(i), ptd->y(i));
00232 assert(p >= 0);
00233 (*pdw)[i] = p; sum += p;
00234 }
00235 assert(sum > 0);
00236 const REAL k = 1 / sum;
00237 for (UINT i = 0; i < n_samples; ++i)
00238 (*pdw)[i] *= k;
00239
00240 return pdw;
00241 }
00242
00243 Boosting::BoostWgt& Boosting::BoostWgt::operator+= (const BoostWgt& bw) {
00244 const UINT ts = size();
00245 assert(ts+1 == bw.size());
00246
00247 for (UINT i = 0; i < ts; ++i) {
00248 assert(lm[i] == bw.lm[i]);
00249 lm_wgt[i] += bw.lm_wgt[i];
00250 }
00251 lm.push_back(bw.lm[ts]);
00252 lm_wgt.push_back(bw.lm_wgt[ts]);
00253
00254 return *this;
00255 }
00256
00257 Boosting::BoostWgt Boosting::BoostWgt::operator- () const {
00258 using namespace op;
00259 return BoostWgt(lm, -lm_wgt);
00260 }
00261
00262 Boosting::BoostWgt& Boosting::BoostWgt::operator*= (REAL r) {
00263 using namespace op;
00264 lm_wgt *= r;
00265 return *this;
00266 }
00267
00268 }