00001
00002 #ifndef __LEMGA_DATASET_H__
00003 #define __LEMGA_DATASET_H__
00004
00012 #include <vector>
00013 #include <utility>
00014 #include "random.h"
00015
00016 namespace lemga {
00017
00035 template <typename Tx, typename Ty>
00036 class dataset {
00044 std::vector< std::pair<Tx,Ty> > d;
00045
00046 public:
00047 typedef Tx x_type;
00048 typedef Ty y_type;
00049
00050 dataset () : d() {}
00051
00053 UINT size () const { return d.size(); }
00054 bool empty () const { return d.empty(); }
00055 void clear () { d.clear(); }
00056 const Tx& x (UINT i) const { return d[i].first; }
00057 const Ty& y (UINT i) const { return d[i].second; }
00059
00061 template <typename IIX, typename IIY>
00062 dataset (IIX xb, IIX xe, IIY yb, IIY ye) { import(xb, xe, yb, ye); }
00063
00077 template <typename IIX, typename IIY>
00078 void import (IIX xb, IIX xe, IIY yb, IIY ye) {
00079 d.clear();
00080 d.reserve(xe - xb);
00081 for (; xb != xe && yb != ye; ++xb, ++yb)
00082 append(*xb, *yb);
00083 }
00085
00087
00093 dataset* random_sample (UINT n) const {
00094 const UINT dn = d.size();
00095 assert(n == 0 || dn > 0);
00096
00097 dataset* rd = new dataset();
00098 rd->d.reserve(n);
00099 while (n--) {
00100 const UINT sel = UINT(randu() * dn);
00101 assert(sel < dn);
00102 rd->d.push_back(d[sel]);
00103 }
00104 return rd;
00105 }
00106
00112 template <typename W>
00113 dataset* random_sample (const W& wgt, UINT n) const {
00114 const UINT dn = d.size();
00115 assert(n == 0 || dn > 0);
00116
00117 std::vector<PROBAB> cdf(dn+1);
00118 cdf[0] = 0;
00119 for (UINT i = 0; i < dn; ++i)
00120 cdf[i+1] = cdf[i] + wgt[i];
00121 assert(cdf[dn]-1 > -EPSILON && cdf[dn]-1 < EPSILON);
00122
00123 dataset* rd = new dataset();
00124 rd->d.reserve(n);
00125 while (n--) {
00126 const PROBAB r = randu();
00127
00128 UINT b = 0, e = dn, m;
00129 while (b+1 < e) {
00130 m = (b + e) / 2;
00131 if (r < cdf[m]) e = m;
00132 else b = m;
00133 }
00134
00135 rd->d.push_back(d[b]);
00136 }
00137 return rd;
00138 }
00140
00148 dataset& operator+= (const dataset& ds) {
00149 const UINT n = ds.d.size();
00150 d.reserve(d.size() + n);
00151 for (UINT i = 0; i < n; ++i)
00152 d.push_back(ds.d[i]);
00153 return *this;
00154 }
00155
00156 void append (const Tx& _x, const Ty& _y) {
00157 d.resize(d.size()+1);
00158 d.back().first = _x;
00159 d.back().second = _y;
00160
00161 }
00162
00163 void replace (UINT i, const Tx& _x, const Ty& _y) {
00164 assert (i < d.size());
00165 d[i].first = _x;
00166 d[i].second = _y;
00167 }
00168 };
00169
00170 }
00171
00172 #ifdef __DATASET_H__
00173 #warning "This header file may conflict with another `dataset.h' file."
00174 #endif
00175 #define __DATASET_H__
00176 #endif