LEMGA: adaboost_ecoc.cpp Source File

00001 
00005 #include <algorithm>
00006 #include <assert.h>
00007 #include <cmath>
00008 #include <map>
00009 #include "random.h"
00010 #include "utility.h"
00011 #include "adaboost_ecoc.h"
00012 
00013 REGISTER_CREATOR(lemga::AdaBoost_ECOC);
00014 
00015 namespace lemga {
00016 
00017 void AdaBoost_ECOC::reset_training () {
00018     joint_wgt = JointWgt(n_class(), *ptw);
00019     // to facilitate the normalization in update_training(),
00020     // we explicitly set joint_wgt[y_i][i] to zero
00021     for (UINT i = 0; i < n_samples; ++i)
00022         joint_wgt[ex_class[i]][i] = 0;
00023     cur_err.resize(n_samples);
00024     assert(n_in_agg == 0);
00025 }
00026 
00027 typedef std::vector<std::vector<REAL> > WMAT;
00028 static REAL binary_cut (const WMAT& w, const ECOC_VECTOR& p) {
00029     const UINT n = p.size();
00030     assert(w.size() == n);
00031     REAL c = 0;
00032     for (UINT i = 0; i < n; ++i) {
00033         assert(w[i].size() == n);
00034         for (UINT j = 0; j < i; ++j) {
00035             assert(w[i][j] == w[j][i]);
00036             if (p[i] != p[j])
00037                 c += w[i][j];
00038         }
00039     }
00040     return c;
00041 }
00042 
00043 // compute the edge weight
00044 WMAT AdaBoost_ECOC::confusion_matrix () const {
00045     WMAT w(nclass, std::vector<REAL>(nclass, 0));
00046     for (UINT j = 0; j < n_samples; ++j) {
00047         int y = ex_class[j];
00048         for (UINT c = 0; c < nclass; ++c)
00049             w[y][c] += joint_wgt[c][j];
00050     }
00051     for (UINT c = 0; c < nclass; ++c)
00052         for (UINT y = 0; y < c; ++y)
00053             w[y][c] = w[c][y] = w[y][c] + w[c][y];
00054     return w;
00055 }
00056 
00058 ECOC_VECTOR AdaBoost_ECOC::max_cut (UINT nr) const {
00059     assert(nr == nclass);
00060     if (nclass > 13)
00061         std::cerr << "Warning: Max-cut is very slow for too many classes\n";
00062     WMAT ewgt = confusion_matrix();
00063     for (UINT c = 0; c < nclass; ++c)
00064         assert(ewgt[c][c] == 0); // needed for the max-cut below
00065 
00066     ECOC_VECTOR maxp, p(nclass, 0);
00067     REAL maxc = 0, cut = 0;
00068     size_t flip = 0;
00069     while (gray_next(p, flip), flip != 0) { // p[0] will be fixed at 0
00070         for (UINT c = 0; c < nclass; ++c)
00071             if (p[c] == p[flip])
00072                 cut -= ewgt[c][flip];
00073             else
00074                 cut += ewgt[c][flip];
00075         if (maxc < cut) {
00076             maxp = p; maxc = cut;
00077         }
00078     }
00079     for (UINT c = 0; c < nclass; ++c)
00080         if (maxp[c] == 0) maxp[c] = -1;
00081     assert(std::fabs(maxc - binary_cut(ewgt, maxp)) < EPSILON);
00082     return maxp;
00083 }
00084 
00086 ECOC_VECTOR AdaBoost_ECOC::max_cut_greedy (UINT nr) const {
00087     assert(nr <= nclass);
00088     WMAT ewgt = confusion_matrix();
00089 
00090     // we could use O(n^2 log(K)) to find the first K edges with largest
00091     // weights. Here we just use O(n^2 log(n^2)).
00092     typedef std::multimap<REAL,UINT> MMAP;
00093     MMAP edge;
00094     for (UINT j = 1; j < nclass; ++j)
00095         for (UINT c = 0; c+j < nclass; ++c) {
00096             UINT y = c + j;
00097             edge.insert(std::pair<REAL,UINT>(-ewgt[y][c], c*nclass+y));
00098         }
00099 
00100     ECOC_VECTOR p(nclass, 0);
00101     UINT pn = 0;
00102     for (MMAP::iterator pe = edge.begin(); pn < nr && pe != edge.end(); ++pe) {
00103         UINT k1 = pe->second / nclass, k2 = pe->second % nclass;
00104         if (randu() < 0.5) std::swap(k1, k2); // not really useful
00105         if (p[k1] == 0 && p[k2] == 0) {
00106             p[k1] = 1; p[k2] = -1;
00107             pn += 2;
00108         } else if (p[k1] == 0) {
00109             p[k1] = -p[k2];
00110             ++pn;
00111         } else if (p[k2] == 0) {
00112             p[k2] = -p[k1];
00113             ++pn;
00114         }
00115     }
00116 
00117     return p;
00118 }
00119 
00120 ECOC_VECTOR AdaBoost_ECOC::random_half (UINT nr) const {
00121     assert(nr <= nclass);
00122     ECOC_VECTOR p(nclass, 0);
00123 
00124     // If nr == n, setting p to be all 1's and then randomly
00125     // flipping half of p should be the fastest approach.
00126     std::vector<UINT> idx(nclass);
00127     for (UINT i = 0; i < nclass; ++i)
00128         idx[i] = i;
00129     std::random_shuffle(idx.begin(), idx.end());
00130     while (nr--)
00131         p[idx[nr]] = (nr % 2)? 1 : -1;
00132 
00133     return p;
00134 }
00135 
00136 bool AdaBoost_ECOC::ECOC_partition (UINT i, ECOC_VECTOR& p) {
00137     if (MultiClass_ECOC::ECOC_partition(i, p)) return true;
00138     const UINT n = n_class();
00139 
00140     switch (par_method) {
00141     case RANDOM_HALF:
00142         p = random_half(n);
00143         break;
00144 
00145     case MAX_CUT:
00146         p = max_cut(n);
00147         break;
00148 
00149     case MAX_CUT_GREEDY:
00150         p = max_cut_greedy(n);
00151         break;
00152 
00153     default:
00154         assert(false);
00155     }
00156 
00157     return true;
00158 }
00159 
00160 pDataWgt AdaBoost_ECOC::smpwgt_with_partition (const ECOC_VECTOR& p) {
00161     DataWgt* btw = new DataWgt(n_samples);
00162     REAL wsum = 0;
00163     for (UINT i = 0; i < n_samples; ++i) {
00164         int y = p[ex_class[i]];
00165         assert(y == 1 || y == -1);
00166         REAL w = 0;
00167         for (UINT c = 0; c < n_class(); ++c)
00168             if (p[c] + y == 0)
00169                 w += joint_wgt[c][i];
00170         wsum += w; (*btw)[i] = w;
00171     }
00172     REAL r = 1 / wsum;
00173     for (UINT i = 0; i < n_samples; ++i)
00174         (*btw)[i] *= r;
00175     return btw;
00176 }
00177 
00178 pLearnModel AdaBoost_ECOC::train_with_partition (ECOC_VECTOR& p) {
00179     LearnModel *plm = lm_base->clone();
00180     assert(plm != 0);
00181 
00182     DataSet* btd = new DataSet();
00183     for (UINT i = 0; i < n_samples; ++i)
00184         btd->append(ptd->x(i), Output(1, p[ex_class[i]]));
00185     cur_smpwgt = smpwgt_with_partition(p);
00186 
00187     plm->set_train_data(btd, cur_smpwgt);
00188     plm->train();
00189     // To save some memory, we put back the original set and weight
00190     plm->set_train_data(ptd, ptw);
00191 
00192     for (UINT i = 0; i < n_samples; ++i)
00193         cur_err[i] = (plm->get_output(i)[0] * p[ex_class[i]] <= 0);
00194 
00195     return plm;
00196 }
00197 
00198 REAL AdaBoost_ECOC::assign_weight (const ECOC_VECTOR& p, const LearnModel& l) {
00199     const DataWgt& sw = *cur_smpwgt;
00200     REAL err = 0;
00201     for (UINT i = 0; i < n_samples; ++i) {
00202         if (cur_err[i])
00203             err += sw[i];
00204     }
00205     if (err >= 0.5) return -1;
00206 
00207     REAL beta;
00208     if (err <= 0)
00209         beta = 1000;
00210     else
00211         beta = 1/err - 1;
00212     return std::log(beta) / 2;
00213 }
00214 
00215 void AdaBoost_ECOC::update_training (const ECOC_VECTOR& p) {
00216     const UINT n = n_class();
00217     const REAL beta = std::exp(lm_wgt[n_in_agg-1]);
00218     const REAL ibeta = 1 / beta;
00219 
00220     REAL wsum = 0;
00221     for (UINT i = 0; i < n_samples; ++i) {
00222         int y = p[ex_class[i]];
00223         REAL r = (cur_err[i]? beta : ibeta);
00224         for (UINT c = 0; c < n; ++c) {
00225             if (p[c] != y)
00226                 joint_wgt[c][i] *= r;
00227             wsum += joint_wgt[c][i];
00228         }
00229     }
00230     REAL r = n / wsum;
00231     if (r > 1e-5 && r < 1e5) return;
00232     // normalize the joint_wgt
00233     for (UINT i = 0; i < n_samples; ++i)
00234         for (UINT c = 0; c < n; ++c)
00235             joint_wgt[c][i] *= r;
00236 }
00237 
00238 } // namespace lemga