LEMGA: adaboost_erp.cpp Source File

00001 
00005 #include <assert.h>
00006 #include "adaboost_erp.h"
00007 
00008 REGISTER_CREATOR(lemga::AdaBoost_ERP);
00009 
00010 namespace lemga {
00011 
00012 bool AdaBoost_ERP::ECOC_partition (UINT i, ECOC_VECTOR& p) const {
00013     if (MultiClass_ECOC::ECOC_partition(i, p)) return true;
00014     const UINT n = 2; // (n_class() + 3) / 4;
00015 
00016     switch (par_method) {
00017     case RANDOM_2:
00018         p = random_half(n);
00019         break;
00020 
00021     case MAX_2:
00022         p = max_cut_greedy(n);
00023         break;
00024 
00025     default:
00026         return AdaBoost_ECOC::ECOC_partition(i, p);
00027     }
00028 
00029     return true;
00030 }
00031 
00032 pLearnModel
00033 AdaBoost_ERP::train_with_partial_partition (const ECOC_VECTOR& p) const {
00034     LearnModel *plm = lm_base->clone();
00035     assert(plm != 0);
00036 
00037     // We will ignore examples with "color" p[] = 0.
00038     // The best way is to first group examples by their classes,
00039     // then each time collect classes with nonzero p.
00040     // We don't do this for easier modification for future changes,
00041     // e.g., if we want to allow continuous values of p[]
00042     DataSet* btd = new DataSet();
00043     DataWgt* btw = new DataWgt();
00044     REAL wsum = 0;
00045     for (UINT i = 0; i < n_samples; ++i) {
00046         int y = p[ex_class[i]];
00047         if (y == 0) continue;
00048 
00049         btd->append(ptd->x(i), Output(1, y));
00050         REAL w = 0;
00051         for (UINT c = 0; c < nclass; ++c)
00052             if (p[c] + y == 0)
00053                 w += joint_wgt[c][i];
00054         wsum += w; btw->push_back(w);
00055     }
00056     REAL r = 1 / wsum;
00057     for (UINT i = 0; i < btw->size(); ++i)
00058         (*btw)[i] *= r;
00059 
00060     plm->set_train_data(btd, btw);
00061     plm->train();
00062     return plm;
00063 }
00064 
00065 #define OUTPUT_PARTITION(p,o)                \
00066     for (UINT c = 0; c < p.size(); ++c)      \
00067         o << (p[c]>0? '+':(p[c]<0?'-':'0')); \
00068     o << std::flush
00069 
00070 pLearnModel AdaBoost_ERP::train_with_partition (ECOC_VECTOR& p) const {
00071 #if VERBOSE_OUTPUT
00072     std::cout << "    ";
00073     OUTPUT_PARTITION(p, std::cout);
00074 #endif
00075 
00076     pLearnModel plm = 0;
00077     bool calc_smpwgt = true;
00078     UINT s = lrs;
00079     while (s--) {
00080         // learning
00081         assert(calc_smpwgt);
00082         if (is_full_partition(p)) {
00083             plm = train_with_full_partition(p); // cur_smpwgt is set
00084             calc_smpwgt = false;
00085         } else
00086             plm = train_with_partial_partition(p);
00087         assert(plm != 0);
00088 #if VERBOSE_OUTPUT
00089         std::cout << " ... trained" << std::flush;
00090 #endif
00091 
00092         // Put back the full training set and an arbitrary weight
00093         // to collect the outputs (in cur_err).
00094         // Note output 0 will be put as -1.
00095         plm->set_train_data(ptd, ptw);
00096         for (UINT i = 0; i < n_samples; ++i)
00097             cur_err[i] = (plm->get_output(i)[0] > 0);  // tmp use
00098 
00099         if (!(s--)) break;
00100 
00101         // re-partitioning
00102         std::vector<REAL> mkt(nclass, 0);
00103         for (UINT i = 0; i < n_samples; ++i) {
00104             UINT y = ex_class[i];
00105             int out = (cur_err[i]? 1 : -1);
00106             for (UINT c = 0; c < nclass; ++c) {
00107                 const REAL jwo = joint_wgt[c][i] * out;
00108                 mkt[c] += jwo; mkt[y] -= jwo;
00109             }
00110         }
00111         bool changed = false;
00112         for (UINT c = 0; c < nclass; ++c) {
00113             int np = (mkt[c]>0? -1 : 1);
00114             changed |= (np != p[c]);
00115             p[c] = np;
00116         }
00117 #if VERBOSE_OUTPUT
00118         std::cout << "\n => ";
00119         if (changed) {
00120             OUTPUT_PARTITION(p, std::cout);
00121         } else
00122             std::cout << "NO CHANGE";
00123 #endif
00124 
00125         if (!changed) break;
00126         calc_smpwgt = true;
00127     }
00128 #if VERBOSE_OUTPUT
00129     std::cout << '\n';
00130 #endif
00131     assert(is_full_partition(p) /* && cur_err == output of plm */);
00132 
00133     // Update the current error & sample weights
00134     for (UINT i = 0; i < n_samples; ++i)
00135         cur_err[i] = cur_err[i] ^ (p[ex_class[i]] > 0);
00136     if (calc_smpwgt)
00137         cur_smpwgt = smpwgt_with_partition(p);
00138 
00139     return plm;
00140 }
00141 
00142 } // namespace lemga