00001
00005 #include <assert.h>
00006 #include <cmath>
00007 #include <iostream>
00008 #include "lpboost.h"
00009 extern "C"{
00010 #include <glpk.h>
00011 }
00012
00013 REGISTER_CREATOR(lemga::LPBoost);
00014
00015 namespace lemga {
00016
00017 #define U(i) ((i)+1) //U(0) to U(n_samples-1)
00018 #define R(t) ((t)+1) //R(0) to R(T-1)
00019
00020
00021
00022 bool lp_add_hypothesis (LPX* lp, int* ndx, double* val, const LearnModel& lm,
00023 const pDataWgt& pdw = 0, REAL maxe = 0)
00024 {
00025 const pDataSet ptd = lm.train_data();
00026 assert(ptd != 0);
00027 const UINT n = ptd->size();
00028 assert(pdw == 0 || pdw->size() == n);
00029
00030 REAL err = 0;
00031 for (UINT i = 0; i < n; ++i) {
00032 bool e = (lm.c_error(lm.get_output(i), ptd->y(i)) > 0.1);
00033 if (e && pdw) err += (*pdw)[i];
00034 ndx[i+1] = U(i);
00035 val[i+1] = e? 1 : -1;
00036 }
00037 if (err >= maxe && pdw)
00038 return false;
00039
00040 lpx_add_rows(lp, 1);
00041 int Rt = lpx_get_num_rows(lp);
00042 lpx_set_mat_row(lp, Rt, n, ndx, val);
00043 lpx_set_row_bnds(lp, Rt, LPX_LO, -1.0, 0.0);
00044
00045 return true;
00046 }
00047
00048
00049 REAL lp_solve (LPX* lp, pDataWgt& pdw) {
00050 lpx_simplex(lp);
00051 REAL sumu = -lpx_get_obj_val(lp);
00052
00053
00054 UINT n = lpx_get_num_cols(lp);
00055 DataWgt* sample_wgt = new DataWgt(n);
00056 for (UINT i = 0; i < n; ++i) {
00057 double wgt = lpx_get_col_prim(lp, U(i));
00058 assert(wgt >= -EPSILON);
00059 if (wgt < 0) wgt = 0;
00060 (*sample_wgt)[i] = wgt / sumu;
00061 }
00062 pdw = sample_wgt;
00063 return sumu;
00064 }
00065
00066 void LPBoost::train () {
00067 assert(ptd != 0 && ptw != 0);
00068 assert(lm_base != 0);
00069 assert(!grad_desc_view);
00070 set_dimensions(*ptd);
00071
00072
00073 LPX* lp = lpx_create_prob();
00074 lpx_add_cols(lp, n_samples);
00075 for (UINT i = 0; i < n_samples; ++i) {
00076 lpx_set_col_bnds(lp, U(i), LPX_DB, 0.0,
00077 RegC * (*ptw)[i] * n_samples);
00078 lpx_set_obj_coef(lp, U(i), -1);
00079 }
00080 lpx_set_obj_dir(lp, LPX_MIN);
00081
00082 int* ndx = new int[n_samples+1]; double* val = new double[n_samples+1];
00083
00084
00085 for (UINT t = 0; t < size(); ++t) {
00086 const pLearnModel p = lm[t];
00087 assert(p->train_data() == ptd);
00088 lp_add_hypothesis(lp, ndx, val, *p);
00089 }
00090 n_in_agg = size();
00091
00092 REAL sumu = RegC * n_samples;
00093 pDataWgt pdw = ptw;
00094 for (UINT t = n_in_agg; t < max_n_model; ++t) {
00095 if (t > 0)
00096 sumu = lp_solve(lp, pdw);
00097 if (sumu < EPSILON) {
00098 std::cerr << "Warning: sum u is " << sumu << "; quit earlier.\n";
00099 break;
00100 }
00101 REAL besterr = (1 - 1 / sumu) / 2;
00102
00103 const pLearnModel p = train_with_smpwgt(pdw);
00104 assert(p->train_data() == ptd);
00105 if (!lp_add_hypothesis(lp, ndx, val, *p, pdw, besterr-EPSILON))
00106 break;
00107
00108 set_dimensions(*p);
00109 lm.push_back(p); lm_wgt.push_back(0);
00110 ++n_in_agg;
00111 }
00112
00113 lpx_simplex(lp);
00114
00115
00116 assert((UINT) lpx_get_num_rows(lp) == R(n_in_agg-1));
00117 for (UINT k = 0; k < n_in_agg; ++k) {
00118 lm_wgt[k] = lpx_get_row_dual(lp, R(k));
00119 assert(lm_wgt[k] > -EPSILON);
00120 if (lm_wgt[k] < 0) lm_wgt[k] = 0;
00121 }
00122
00123 delete[] ndx; delete[] val;
00124 lpx_delete_prob(lp);
00125 }
00126
00127 }