LEMGA: lpboost.cpp Source File

00001 
00005 #include <assert.h>
00006 #include <cmath>
00007 #include <iostream>
00008 #include "lpboost.h"
00009 extern "C"{
00010 #include <glpk.h>
00011 }
00012 
00013 REGISTER_CREATOR(lemga::LPBoost);
00014 
00015 namespace lemga {
00016 
00017 #define U(i) ((i)+1)  //U(0) to U(n_samples-1)
00018 #define R(t) ((t)+1)  //R(0) to R(T-1)
00019 
00020 REAL LPBoost::train () {
00021     assert(n_in_agg == 0 && empty());
00022     assert(ptd != 0 && ptw != 0);
00023     assert(lm_base != 0); // we need lm_base to create new hypotheses
00024     assert(!grad_desc_view);
00025 
00026     // Construct inner problem
00027     LPX* lp = lpx_create_prob();
00028     lpx_add_cols(lp, n_samples);                        // u_i
00029     for (UINT i = 0; i < n_samples; ++i) {
00030         lpx_set_col_bnds(lp, U(i), LPX_DB, 0.0,
00031                          RegC * (*ptw)[i] * n_samples); // 0 <= u_i <= C_i
00032         lpx_set_obj_coef(lp, U(i), -1);                 // obj: -sum u_i
00033     }
00034     lpx_set_obj_dir(lp, LPX_MIN);                       // min obj
00035 
00036     // For adding columns
00037     int* ndx = new int[n_samples+1]; double* val = new double[n_samples+1];
00038 
00039     REAL besterr = HUGE_VAL;
00040     pDataWgt pdw = ptw;
00041 
00042     for (UINT t = 0; t < max_n_model; ++t) {
00043         const pLearnModel p = train_with_smpwgt(pdw);
00044 
00045         REAL err = 0;
00046         for (UINT i = 0; i < n_samples; ++i) {
00047             if (p->c_error(p->get_output(i), ptd->y(i)) > 0.1)
00048                 err += (*pdw)[i];
00049         }
00050         if (err >= besterr - EPSILON) // Cannot find better hypotheses
00051             break;
00052 
00053         // Add one more constraint R(t) = -sum u_i y_i h_t(x_i) >= -1
00054         lpx_add_rows(lp, 1);
00055         for (UINT i = 0; i < n_samples; ++i) {
00056             ndx[i+1] = U(i);
00057             val[i+1] = - p->get_output(i)[0] * ptd->y(i)[0];
00058         }
00059         lpx_set_mat_row(lp, R(t), n_samples, ndx, val);
00060         lpx_set_row_bnds(lp, R(t), LPX_LO, -1.0, 0.0);  // R(t) >= -1
00061 
00062         // Solve inner problem
00063         lpx_simplex(lp);
00064         REAL sumu = -lpx_get_obj_val(lp);
00065         if (sumu < EPSILON) { // we do not expect this to happen
00066             std::cerr << "Warning: sum u is " << sumu << "; quit earlier.\n";
00067             break;
00068         }
00069         besterr = (1.0 - 1.0 / sumu) / 2.0;
00070 
00071         lm.push_back(p); lm_wgt.push_back(0);
00072         ++n_in_agg;
00073 
00074         // Update sample weights
00075         DataWgt* sample_wgt = new DataWgt(n_samples);
00076         for (UINT i = 0; i < n_samples; ++i) {
00077             double wgt;
00078             wgt = lpx_get_col_prim(lp, U(i));
00079             assert(wgt >= -EPSILON);
00080             if (wgt < 0) wgt = 0;
00081             (*sample_wgt)[i] = wgt / sumu;
00082         }
00083         pdw = sample_wgt;
00084 
00085         // Update hypothesis coefficients
00086         for (UINT k = 0; k <= t; ++k) {
00087             lm_wgt[k] = lpx_get_row_dual(lp, R(k));
00088             assert(lm_wgt[k] > -EPSILON);
00089             if (lm_wgt[k] < 0)
00090                 lm_wgt[k] = 0.0;
00091         }
00092     }
00093 
00094     delete[] ndx; delete[] val;
00095     lpx_delete_prob(lp);
00096 
00097     REAL err = 0;
00098     for (UINT i = 0; i < n_samples; ++i) {
00099         err += (get_output(i)[0]*ptd->y(i)[0] <= 0);
00100     }
00101     return err / n_samples;
00102 }
00103 
00104 } // namespace lemga