LEMGA: adaboost_ecoc.cpp Source File

00001 
00005 #include <algorithm>
00006 #include <assert.h>
00007 #include <cmath>
00008 #include <map>
00009 #include "random.h"
00010 #include "utility.h"
00011 #include "vectorop.h"
00012 #include "adaboost_ecoc.h"
00013 
00014 REGISTER_CREATOR(lemga::AdaBoost_ECOC);
00015 
00016 namespace lemga {
00017 
00018 void AdaBoost_ECOC::setup_aux () {
00019     joint_wgt = JointWgt(n_class(), *ptw);
00020     // to facilitate the normalization in update_aux(),
00021     // we explicitly set joint_wgt[y_i][i] to zero
00022     for (UINT i = 0; i < n_samples; ++i)
00023         joint_wgt[ex_class[i]][i] = 0;
00024     cur_err.resize(n_samples);
00025     if (n_in_agg == 0) return;
00026 
00027     REAL wsum = 0;
00028     for (UINT i = 0; i < n_samples; ++i) {
00029         const std::vector<REAL>& d = distances(i);
00030         UINT y = ex_class[i];
00031         for (UINT c = 0; c < nclass; ++c) {
00032             joint_wgt[c][i] *= std::exp(d[y] - d[c]);
00033             wsum += joint_wgt[c][i];
00034         }
00035     }
00036     REAL r = nclass / wsum;
00037     if (r > 1e-5 && r < 1e5) return;
00038     using namespace op;
00039     joint_wgt *= r; // normalize
00040 }
00041 
00042 typedef std::vector<std::vector<REAL> > WMAT;
00043 static REAL binary_cut (const WMAT& w, const ECOC_VECTOR& p) {
00044     const UINT n = p.size();
00045     assert(w.size() == n);
00046     REAL c = 0;
00047     for (UINT i = 0; i < n; ++i) {
00048         assert(w[i].size() == n);
00049         for (UINT j = 0; j < i; ++j) {
00050             assert(w[i][j] == w[j][i]);
00051             if (p[i] != p[j])
00052                 c += w[i][j];
00053         }
00054     }
00055     return c;
00056 }
00057 
00058 // compute the edge weight
00059 WMAT AdaBoost_ECOC::confusion_matrix () const {
00060     WMAT w(nclass, std::vector<REAL>(nclass, 0));
00061     for (UINT j = 0; j < n_samples; ++j) {
00062         int y = ex_class[j];
00063         for (UINT c = 0; c < nclass; ++c)
00064             w[y][c] += joint_wgt[c][j];
00065     }
00066     for (UINT c = 0; c < nclass; ++c)
00067         for (UINT y = 0; y < c; ++y)
00068             w[y][c] = w[c][y] = w[y][c] + w[c][y];
00069     return w;
00070 }
00071 
00073 ECOC_VECTOR AdaBoost_ECOC::max_cut (UINT nr) const {
00074     assert(nr == nclass);
00075     if (nclass > 13)
00076         std::cerr << "Warning: Max-cut is very slow for too many classes\n";
00077     WMAT ewgt = confusion_matrix();
00078     for (UINT c = 0; c < nclass; ++c)
00079         assert(ewgt[c][c] == 0); // needed for the max-cut below
00080 
00081     ECOC_VECTOR maxp, p(nclass, 0);
00082     REAL maxc = 0, cut = 0;
00083     size_t flip = 0;
00084     while (gray_next(p, flip), flip != 0) { // p[0] will be fixed at 0
00085         for (UINT c = 0; c < nclass; ++c)
00086             if (p[c] == p[flip])
00087                 cut -= ewgt[c][flip];
00088             else
00089                 cut += ewgt[c][flip];
00090         if (maxc < cut) {
00091             maxp = p; maxc = cut;
00092         }
00093     }
00094     for (UINT c = 0; c < nclass; ++c)
00095         if (maxp[c] == 0) maxp[c] = -1;
00096     assert(std::fabs(maxc - binary_cut(ewgt, maxp)) < EPSILON);
00097     return maxp;
00098 }
00099 
00101 ECOC_VECTOR AdaBoost_ECOC::max_cut_greedy (UINT nr) const {
00102     assert(nr <= nclass);
00103     WMAT ewgt = confusion_matrix();
00104 
00105     // we could use O(n^2 log(K)) to find the first K edges with largest
00106     // weights. Here we just use O(n^2 log(n^2)).
00107     typedef std::multimap<REAL,UINT> MMAP;
00108     MMAP edge;
00109     for (UINT j = 1; j < nclass; ++j)
00110         for (UINT c = 0; c+j < nclass; ++c) {
00111             UINT y = c + j;
00112             edge.insert(std::pair<REAL,UINT>(-ewgt[y][c], c*nclass+y));
00113         }
00114 
00115     ECOC_VECTOR p(nclass, 0);
00116     UINT pn = 0;
00117     for (MMAP::iterator pe = edge.begin(); pn < nr && pe != edge.end(); ++pe) {
00118         UINT k1 = pe->second / nclass, k2 = pe->second % nclass;
00119         if (randu() < 0.5) std::swap(k1, k2); // not really useful
00120         if (p[k1] == 0 && p[k2] == 0) {
00121             p[k1] = 1; p[k2] = -1;
00122             pn += 2;
00123         } else if (p[k1] == 0) {
00124             p[k1] = -p[k2];
00125             ++pn;
00126         } else if (p[k2] == 0) {
00127             p[k2] = -p[k1];
00128             ++pn;
00129         }
00130     }
00131 
00132     return p;
00133 }
00134 
00135 ECOC_VECTOR AdaBoost_ECOC::random_half (UINT nr) const {
00136     assert(nr <= nclass);
00137     ECOC_VECTOR p(nclass, 0);
00138 
00139     // If nr == n, setting p to be all 1's and then randomly
00140     // flipping half of p should be the fastest approach.
00141     std::vector<UINT> idx(nclass);
00142     for (UINT i = 0; i < nclass; ++i)
00143         idx[i] = i;
00144     std::random_shuffle(idx.begin(), idx.end());
00145     while (nr--)
00146         p[idx[nr]] = (nr % 2)? 1 : -1;
00147 
00148     return p;
00149 }
00150 
00151 bool AdaBoost_ECOC::ECOC_partition (UINT i, ECOC_VECTOR& p) const {
00152     if (MultiClass_ECOC::ECOC_partition(i, p)) return true;
00153     const UINT n = n_class();
00154 
00155     switch (par_method) {
00156     case RANDOM_HALF:
00157         p = random_half(n);
00158         break;
00159 
00160     case MAX_CUT:
00161         p = max_cut(n);
00162         break;
00163 
00164     case MAX_CUT_GREEDY:
00165         p = max_cut_greedy(n);
00166         break;
00167 
00168     default:
00169         assert(false);
00170     }
00171 
00172     return true;
00173 }
00174 
00175 pDataWgt AdaBoost_ECOC::smpwgt_with_partition (const ECOC_VECTOR& p) const {
00176     assert(is_full_partition(p));
00177     DataWgt* btw = new DataWgt(n_samples);
00178     REAL wsum = 0;
00179     for (UINT i = 0; i < n_samples; ++i) {
00180         int y = p[ex_class[i]];
00181         REAL w = 0;
00182         for (UINT c = 0; c < n_class(); ++c)
00183             if (p[c] + y == 0)
00184                 w += joint_wgt[c][i];
00185         wsum += w; (*btw)[i] = w;
00186     }
00187     REAL r = 1 / wsum;
00188     for (UINT i = 0; i < n_samples; ++i)
00189         (*btw)[i] *= r;
00190     return btw;
00191 }
00192 
00193 pLearnModel
00194 AdaBoost_ECOC::train_with_full_partition (const ECOC_VECTOR& p) const {
00195     LearnModel *plm = lm_base->clone();
00196     assert(plm != 0);
00197 
00198     DataSet* btd = new DataSet();
00199     for (UINT i = 0; i < n_samples; ++i)
00200         btd->append(ptd->x(i), Output(1, p[ex_class[i]]));
00201     cur_smpwgt = smpwgt_with_partition(p); // saved for future use
00202 
00203     plm->set_train_data(btd, cur_smpwgt);
00204     plm->train();
00205     return plm;
00206 }
00207 
00208 pLearnModel AdaBoost_ECOC::train_with_partition (ECOC_VECTOR& p) const {
00209     assert(is_full_partition(p));
00210     pLearnModel plm = train_with_full_partition(p);
00211     // Put back the full training set and an arbitrary weight
00212     plm->set_train_data(ptd, ptw);
00213     for (UINT i = 0; i < n_samples; ++i)
00214         cur_err[i] = (plm->get_output(i)[0] * p[ex_class[i]] <= 0);
00215     return plm;
00216 }
00217 
00218 REAL
00219 AdaBoost_ECOC::assign_weight (const ECOC_VECTOR&, const LearnModel&) const {
00220     const DataWgt& sw = *cur_smpwgt;
00221     REAL err = 0;
00222     for (UINT i = 0; i < n_samples; ++i) {
00223         if (cur_err[i])
00224             err += sw[i];
00225     }
00226     if (err >= 0.5) return -1;
00227 
00228     REAL beta;
00229     if (err <= 0)
00230         beta = 1000;
00231     else
00232         beta = 1/err - 1;
00233     return std::log(beta) / 2;
00234 }
00235 
00236 void AdaBoost_ECOC::update_aux (const ECOC_VECTOR& p) {
00237     assert(is_full_partition(p));
00238     const REAL beta = std::exp(lm_wgt[n_in_agg-1]);
00239     const REAL ibeta = 1 / beta;
00240 
00241     REAL wsum = 0;
00242     for (UINT i = 0; i < n_samples; ++i) {
00243         int y = p[ex_class[i]];
00244         REAL r = (cur_err[i]? beta : ibeta);
00245         for (UINT c = 0; c < nclass; ++c) {
00246             if (p[c] != y)
00247                 joint_wgt[c][i] *= r;
00248             wsum += joint_wgt[c][i];
00249         }
00250     }
00251     REAL r = nclass / wsum;
00252     if (r > 1e-5 && r < 1e5) return;
00253     using namespace op;
00254     joint_wgt *= r; // normalize
00255 }
00256 
00257 } // namespace lemga