X-Boost  2.3.8
DataSetUtils.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _DATA_SET_UTILS_H
22 #define _DATA_SET_UTILS_H
23 
27 #include <vector>
28 #include <_stdint.h>
29 #include <fstream>
30 #include <iostream>
31 #include <map> // multimap
32 #include <cmath>
33 #include <stdlib.h> // rand
34 #include "DataSet.h"
35 
36 #include "Utility/bitset.h"
37 
38 
40 template<class AggTypeDst, class AggTypeSrc>
42 
45 template<class AggTypeDst, class AggTypeSrc>
47 
50 template<class AggTypeDst, class AggTypeSrc>
52 
57 template<class AggTypeDst, class AggTypeSrc>
59 
62 template<class AggTypeDst, class AggTypeSrc>
64 
66 template<class AggTypeDst, class AggTypeSrc>
67 void RandomSample(DataSetHandle<AggTypeDst> & dst, DataSetHandle<AggTypeDst> * validation, const DataSetHandle<AggTypeSrc> & src, int nA, int nB);
68 
71 template<class AggTypeDst, class AggTypeSrc>
73 
75 
76 template<class AggTypeDst, class AggTypeSrc>
78 {
79  p.Clear();
80  p.width = src.width;
81  p.height = src.height;
82 
83  n.Clear();
84  n.width = src.width;
85  n.height = src.height;
86 
87  for(typename std::vector<typename AggTypeSrc::PatternType >::const_iterator i = src.templates.begin(); i!= src.templates.end(); ++i)
88  if(i->category == 1)
89  p.Insert(*i);
90  else
91  n.Insert(*i);
92 
93 }
94 
95 template<class AggTypeDst, class AggTypeSrc>
97 {
98  if(m >= src.Size())
99  {
100  //
101  for(int i=0; i<src.Size(); ++i)
102  dst.Insert( src.templates[ i ] );
103  }
104  else
105  {
106  std::multimap<double,int> d;
107 
108  for(int i=0; i<src.Size(); ++i)
109  d.insert(std::pair<double,int>(src.templates[i].d, i) );
110 
111  std::multimap<double,int>::reverse_iterator rit = d.rbegin();
112  for(int i=0; i<m; ++i)
113  {
114  dst.Insert( src.templates[ rit->second ] );
115  ++rit;
116  }
117 
118  }
119 
120 }
121 
122 template<class AggTypeDst, class AggTypeSrc>
124 {
125  if(m >= src.Size())
126  {
127  //
128  for(int i=0; i<src.Size(); ++i)
129  dst.Insert( src.templates[ i ] );
130  }
131  else
132  {
133  std::multimap<double,int> d;
134 
135  for(int i=0; i<src.Size(); ++i)
136  d.insert(std::pair<double,int>(src.templates[i].d, i) );
137 
138  std::multimap<double,int>::iterator rit = d.begin();
139  for(int i=0; i<m; ++i)
140  {
141  dst.Insert( src.templates[ rit->second ] );
142  ++rit;
143  }
144 
145  }
146 
147 }
148 
149 template<class AggTypeDst, class AggTypeSrc>
151 {
152  // sum of weight
153  double wP, wN;
154  wP = wN = 0.0;
155  for(int i=0; i<src.Size(); ++i)
156  {
157  if(src.templates[i].category == 1)
158  wP += src.templates[i].d;
159  else
160  wN += src.templates[i].d;
161  }
162 
163  std::cout << "Ammount of Energy: " << wP << "(+) " << wN << "(-)" << std::endl;
164  float th = perc * std::min(wP,wN);
165  std::cout << "\tthreshold = " << th << std::endl;
166 
167  random_bit_set rnd(src.Size());
168  // accumulators
169  double aP, aN;
170  aP = aN = 0.0;
171  while(aP < th || aN < th)
172  {
173  int i = rnd();
174  if((src.templates[i].category == 1) && (aP < th))
175  {
176  aP += src.templates[i].d;
177  dst.Insert(src.templates[i]);
178  }
179  else if((src.templates[i].category == -1) && (aN < th))
180  {
181  aN += src.templates[i].d;
182  dst.Insert(src.templates[i]);
183  }
184  }
185 }
186 
187 template<class AggTypeDst, class AggTypeSrc>
189 {
190  if(m >= src.Size())
191  {
192  //
193  for(int i=0; i<src.Size(); ++i)
194  dst.Insert( src.templates[ i ] );
195  }
196  else
197  {
198  double *d = new double [src.Size()];
199  // TODO: use an equation to convert d in pdf
200  // sqrt have no meaning, but seems to work
201  double dsum = 0.0;
202  for(int i =0; i<src.Size(); ++i)
203  dsum += sqrt(src.templates[i].d);
204 
205  double daux = 0.0;
206  for(int i =0; i<src.Size(); ++i)
207  {
208  daux += std::sqrt(src.templates[i].d) / dsum;
209  d[i] = daux;
210  }
211 
212 // double dtest = 0.0;
213  for(int i=0; i<m; ++i)
214  {
215  int j = randslot(d, src.Size());
216 // dtest += src.templates[j].d;
217  dst.Insert( src.templates[ j ] );
218  }
219 
220 // std::cout << "Input Weight: " << dsum / src.Size() << ". Output Weight: " << dtest / m << std::endl;
221 
222  delete [] d;
223  }
224 }
225 
226 template<class AggTypeDst, class AggTypeSrc>
228 {
229  random_bit_set rnd(src.Size());
230 
231  for(int i=0; i<m; ++i)
232  dst.Insert( src.templates[ rnd() ] );
233 }
234 
235 template<class AggTypeDst, class AggTypeSrc>
237 {
238  int m = src.Size();
239  std::vector<bool> bitset(m);
240 
241  dst.Clear();
242  dst.templates.reserve(nA+nB);
243  dst.width = src.width;
244  dst.height = src.height;
245 
246  if(validation)
247  {
248  validation->Clear();
249  validation->templates.reserve(m-(nA+nB));
250  validation->width = src.width;
251  validation->height = src.height;
252  }
253 
254  for(int i =0; i<m; ++i) bitset[i]=false;
255  for(int i =0; i<nA+nB; ++i)
256  {
257  int n;
258  bool failed;
259  do {
260  do {
261  n = rand() % m;
262  } while(bitset[n]);
263 
264  failed = false;
265  if( src.templates[n].category == 1 && nA==0)
266  failed = true;
267  if( src.templates[n].category == -1 && nB==0)
268  failed = true;
269 
270  } while(failed);
271 
272  if( src.templates[n].category == 1)
273  {
274  nA--;
275  }
276  if( src.templates[n].category == -1)
277  {
278  nB--;
279  }
280 
281  bitset[n] = true;
282 
283  dst.Insert(src.templates[n]);
284  }
285 
286  if(validation)
287  {
288  for(int i=0; i<m; ++i)
289  if(!bitset[i])
290  validation->Insert(src.templates[i]);
291  }
292 }
293 
294 
295 #endif
296 
void ExtractWorst(DataSetHandle< AggTypeDst > &dst, const DataSetHandle< AggTypeSrc > &src, int m)
Definition: DataSetUtils.h:96
void Insert(const PatternType &p)
add a new pattern to the list
Definition: DataSet.h:117
void ExtractReweighingEnergy(DataSetHandle< AggTypeDst > &dst, const DataSetHandle< AggTypeSrc > &src, float perc)
Definition: DataSetUtils.h:150
Definition: DataSet.h:50
unsigned int Size() const
Return number of allocated samples (complete size of DataSet)
Definition: DataSet.h:101
ListType templates
a collection of Pattern used in this dataset
Definition: DataSet.h:73
an exclusive random generator
Definition: bitset.h:32
void Clear()
Reset (but not release memory)
Definition: DataSet.h:94
void RandomSample(DataSetHandle< AggTypeDst > &dst, DataSetHandle< AggTypeDst > *validation, const DataSetHandle< AggTypeSrc > &src, int nA, int nB)
Definition: DataSetUtils.h:236
int randslot(const double *slots, int n)
void RandomSampleWithDistribution(DataSetHandle< AggTypeDst > &dst, const DataSetHandle< AggTypeSrc > &src, int m)
Definition: DataSetUtils.h:188
void Split(DataSetHandle< AggTypeDst > &p, DataSetHandle< AggTypeDst > &n, const DataSetHandle< AggTypeSrc > &src)
Definition: DataSetUtils.h:77
additional typedef, for portability under win32
declare a DataSet
void RandomUniqueSample(DataSetHandle< AggTypeDst > &dst, const DataSetHandle< AggTypeSrc > &src, int m)
Definition: DataSetUtils.h:227
some usefull bit operator function
void ExtractBest(DataSetHandle< AggTypeDst > &dst, const DataSetHandle< AggTypeSrc > &src, int m)
Definition: DataSetUtils.h:123