dataset.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 #ifndef __LEMGA_DATASET_H__
00003 #define __LEMGA_DATASET_H__
00004 
00012 #include <vector>
00013 #include <utility>
00014 #include "random.h"
00015 
00016 namespace lemga {
00017 
00035 template <typename Tx, typename Ty>
00036 class dataset {
00044     std::vector< std::pair<Tx,Ty> > d;
00045 
00046 public:
00047     typedef Tx x_type;
00048     typedef Ty y_type;
00049 
00050     dataset () : d() {}
00051 
00053     UINT size () const { return d.size(); }
00054     bool empty () const { return d.empty(); }
00055     void clear () { d.clear(); }
00056     const Tx& x (UINT i) const { return d[i].first; }
00057     const Ty& y (UINT i) const { return d[i].second; }
00059 
00061     template <typename IIX, typename IIY>
00062     dataset (IIX xb, IIX xe, IIY yb, IIY ye) { import(xb, xe, yb, ye); }
00063 
00077     template <typename IIX, typename IIY>
00078     void import (IIX xb, IIX xe, IIY yb, IIY ye) {
00079         d.clear();
00080         d.reserve(xe - xb);
00081         for (; xb != xe && yb != ye; ++xb, ++yb)
00082             append(*xb, *yb);
00083     }
00085 
00087 
00093     dataset* random_sample (UINT n) const {
00094         const UINT dn = d.size();
00095         assert(n == 0 || dn > 0);
00096 
00097         dataset* rd = new dataset();
00098         rd->d.reserve(n);
00099         while (n--) {
00100             const UINT sel = UINT(randu() * dn);
00101             assert(sel < dn);
00102             rd->d.push_back(d[sel]);
00103         }
00104         return rd;
00105     }
00106 
00112     template <typename W>
00113     dataset* random_sample (const W& wgt, UINT n) const {
00114         const UINT dn = d.size();
00115         assert(n == 0 || dn > 0);
00116 
00117         std::vector<PROBAB> cdf(dn+1);
00118         cdf[0] = 0;
00119         for (UINT i = 0; i < dn; ++i)
00120             cdf[i+1] = cdf[i] + wgt[i];
00121         assert(cdf[dn]-1 > -EPSILON && cdf[dn]-1 < EPSILON);
00122 
00123         dataset* rd = new dataset();
00124         rd->d.reserve(n);
00125         while (n--) {
00126             const PROBAB r = randu();
00127 
00128             UINT b = 0, e = dn, m;
00129             while (b+1 < e) {
00130                 m = (b + e) / 2;
00131                 if (r < cdf[m]) e = m;
00132                 else b = m;
00133             }
00134 
00135             rd->d.push_back(d[b]);
00136         }
00137         return rd;
00138     }
00140 
00148     dataset& operator+= (const dataset& ds) {
00149         const UINT n = ds.d.size();
00150         d.reserve(d.size() + n);
00151         for (UINT i = 0; i < n; ++i)
00152             d.push_back(ds.d[i]);
00153         return *this;
00154     }
00155 
00156     void append (const Tx& _x, const Ty& _y) {
00157         d.resize(d.size()+1);           // 1 constructor + 1 copy
00158         d.back().first = _x;            // 1 copy
00159         d.back().second = _y;
00160         //d.push_back(make_pair(_x, _y));   // 2 copy
00161     }
00162 
00163     void replace (UINT i, const Tx& _x, const Ty& _y) {
00164         assert (i < d.size());
00165         d[i].first = _x;
00166         d[i].second = _y;
00167     }
00168 };
00169 
00170 } // namespace lemga
00171 
00172 #ifdef  __DATASET_H__
00173 #warning "This header file may conflict with another `dataset.h' file."
00174 #endif
00175 #define __DATASET_H__
00176 #endif

Generated on Mon Jan 9 23:43:24 2006 for LEMGA by  doxygen 1.4.6