X-Boost  2.3.8
RealDecisionStumpOracle.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _REAL_DECISION_STUMP_ORACLE_H
22 #define _REAL_DECISION_STUMP_ORACLE_H
23 
28 #ifdef _MULTITHREAD
29 # include "Thread/thread.h"
30 #endif
31 
32 #include "Utility/timer.h"
33 
34 #include "Classifier/RealDecisionStump.h"
36 
37 #include "DataSet.h"
40 
41 
42 
50 template<class FeatureGenerator, class Aggregator, class Metric>
51 struct RealDecisionStumpOracle: public Metric {
52 public:
53 
56 
59 
60  DECLARE_AGGREGATOR
61 
62 
63 private:
65  double m_dP, m_dN;
66 
68  PatternResponse *m_store;
69 
71  int m_preloadCount;
72 
73 #ifdef _MULTITHREAD
74  int m_threadCount;
76 #endif
77 
79  FeatureGenerator * m_featureGenerator;
80 
82  DataSetHandle<Aggregator> m_training_set;
83 
84 private:
85 
86  void InitWeight();
87 
91  template<class R>
92  void Internal_StreamEvaluate_1(const R * h, int size, unsigned int i0, unsigned int i1);
93 
99  template<class FeatureExtractor>
100  void Evaluate_1(const FeatureExtractor & h)
101  {
102  ExtractFeature(m_store, h, m_training_set);
103  }
104 
106  template<class R>
107  void StreamEvaluate_1(const R * h, int size);
108 
110  void Internal_Sort_Store(int i0, int i1)
111  {
112  for(int i=i0; i<i1; ++i)
113  {
114  sort_pattern(&m_store[i * m_training_set.Size()], m_training_set.Size());
115  // std::sort(&m_store[i * Size()], &m_store[i * Size()+Size()]);
116  }
117  }
118 
119 
122  double Optimize(PatternResponse *store, ClassifierType & h)
123  {
124  int samples = m_training_set.Size();
125 
126  // ordino m_store in ordine crescente di value della feature
127  // il 54.98% del tempo viene perso su questa riga
128  std::sort(&store[0], &store[samples]);
129  // Internal_Sort_Store(0, 1);
130 
131  // trovo la soglia ottima per questo classificatore, usando la matrica least squares
132  // TRA GENTLE E REAL CAMBIA SOLO QUASTA RIGA
133  return Metric::optimize(h, store, samples, m_dP, m_dN);
134  }
135 
136 
138  void Internal_Optimize(double *out, ClassifierType *h, int i0, int i1)
139  {
140  for(int i = i0; i< i1; ++i)
141  {
142  out[i] = Optimize(&m_store[i * m_training_set.Size()], h[i]);
143  }
144 
145  }
146 
147 
148 public:
149  RealDecisionStumpOracle() : m_store(0), m_preloadCount(0),
150 #ifdef _MULTITHREAD
151  m_threadCount(1),
152 #endif
153  m_featureGenerator(0) { }
154 
156 
159  {
160  m_training_set = set;
161  }
162 
165  {
166  m_featureGenerator = &f;
167  }
168 
169  void SetFastHeuristic(bool enable, bool reoptimize, int size) {
170  // TODO
171  }
172 
175  {
176  return m_training_set;
177  }
178 
179 #ifdef _MULTITHREAD
180  void SetNumberOfThreads(int th) {
182  m_threadCount = th;
183  }
184 #endif
185 
187  void SetPreloadSize(int n) {
188  m_preloadCount = n;
189  }
190 
194  bool GetHypothesis(ClassifierType & bestH);
195 
198  {
199  Evaluate_1(h);
200  // optimize h using response store in 'm_store'
201  return Optimize(m_store, h);
202  }
203 
204 
205 };
206 
208 
209 template<class FeatureType, class Aggregator, class Metric>
211 {
212  delete [] m_store;
213 }
214 
215 // TODO: should be outside
216 template<class FeatureType, class Aggregator, class Metric>
218 {
219  // TODO: Adaboost?
220 // double energy;
221 
222  // conto i pesi dei due schieramenti
223  m_dP = m_dN = 0.0f;
224  for(unsigned int i =0; i<m_training_set.Size(); i++)
225  if(m_training_set.templates[i].category == 1)
226  m_dP += m_training_set.templates[i].d; // A
227  else
228  m_dN += m_training_set.templates[i].d; // B
229 }
230 
231 
232 template<class FeatureType, class Aggregator, class Metric>
234 {
235  double bestS = 1.0;
236  Timer t;
237  int count = 0;
238 
239  if(m_featureGenerator == 0)
240  {
241  std::cerr << "No Feature Generator loaded. Use SetFeatureGenerator API before call GetHypothesis" << std::endl;
242  return false;
243  }
244 
245  bestH.debug_name( "internal error" );
246  t.Start();
247 
248  if(m_training_set.Size() == 0)
249  {
250  std::cerr << "No pattern loaded. Init Trainer Failed" << std::endl;
251  return false;
252  }
253 
254  delete [] m_store;
255  m_store = 0;
256 
257  std::cout << "Train with:" << m_training_set.n_patternP << "(+), " << m_training_set.n_patternN << "(-) using " << m_featureGenerator->Count() << " features.\n";
258 
259  InitWeight();
260 
261  m_featureGenerator->Reset();
262 
263  if(m_preloadCount<=1)
264  {
265  ClassifierType h;
266  // **** without preload *****
267  m_store = new PatternResponse [m_training_set.Size()];
268 
269  // PER OGNI FEATURE POSSIBILE
270  while(m_featureGenerator->Next(h))
271  {
272  double s;
273 
274  // compute weight of feature h
275  s = Optimize(h);
276 
277  count ++;
278 
279  if(s < bestS)
280  {
281  bestH = h;
282  bestS = s;
283 
284  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " th:" << bestH.th << " p:" << bestH.pr << " n:" << bestH.nr << " Z:" << 2.0*bestS << std::endl;
285  }
286 
287  if((count & (32*1024-1))==0)
288  {
289  float fs = count/t.GetTime();
290  float pr = (float)count/(float)m_featureGenerator->Count();
291  int rem = (m_featureGenerator->Count() - count)/fs;
292  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
293  if(rem<120)
294  std::cout << rem << "s remaining" << std::endl;
295  else
296  std::cout << rem/60 << "min remaining" << std::endl;
297  }
298 
299  }
300 
301  }
302  else
303  {
304 #ifndef _MULTITHREAD
305  static const int m_threadCount = 1;
306 #endif
307 // **** with preload ***
308 // analizzo m_preloadCount feature contemporaneamente
309  int h_count = 0; // number of feature to evaluate in this step
310 
311  m_store = new PatternResponse [m_training_set.Size() * m_preloadCount];
312 
313 // an array of hypothesis to be evaluated
314  ClassifierType *h = new ClassifierType[m_preloadCount];
315  double *responses = new double[m_preloadCount];
316 
317  std::cout << (sizeof(PatternResponse) * m_training_set.Size() * m_preloadCount + sizeof(ClassifierType) * m_preloadCount)/(1024*1204) << "Mb used for cache ("<< m_preloadCount << " features evaluated on " << m_threadCount << " threads)" << std::endl;
318 
319 // analizzo a blocchi di [m_preloadCount] alla volta
320  do {
321  /* questa parte sviene parallelizzata dividendo gli h in sottoparti su piu' thread **/
322  h_count =0;
323 
324 // inizializzo [m_preloadCount] classifier (per via della non perfetta corrispondenza, potrebbero essere anche meno)
325 // carico m_preloadCount feature in memoria
326  for(int i =0; i<m_preloadCount; i++)
327  if(m_featureGenerator->Next(h[h_count]) )
328  h_count++;
329  else
330  break;
331 
332 // Calcolo gli output di un certo numero di feature in parallelo
333 // genera gli m_store: NOTE viene paralellizato per feature
334  StreamEvaluate_1(h, h_count);
335 
336  // NOTE: Optimize can destroy m_store
337 
338 #ifdef _MULTITHREAD
339 // parallelizzo per feature
340  if(m_threadCount>1)
341  {
342  sprint::thread_group thread_pool_;
343  for(int k=0; k<m_threadCount; ++k)
344  {
345  unsigned int i0 = (h_count * k) / m_threadCount;
346  unsigned int i1 = (h_count * (k+1)) / m_threadCount;
347 
348  thread_pool_.create_thread(sprint::thread_bind(&RealDecisionStumpOracle::Internal_Optimize, this, responses, h, i0, i1));
349  }
350 
351  thread_pool_.join_all();
352  }
353  else
354 #endif
355  Internal_Optimize(responses, h, 0, h_count);
356 
357  for(int i =0; i<h_count; ++i)
358  {
359  if(responses[i] < bestS)
360  {
361  bestS = responses[i];
362  bestH = h[i];
363 
364  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " th:" << bestH.th << " p:" << bestH.pr << " n:" << bestH.nr << " s:" << bestS << std::endl;
365  }
366 
367  count++;
368  if((count & (32*1024-1))==0)
369  {
370  float fs = count/t.GetTime();
371  float pr = (float)count/(float)m_featureGenerator->Count();
372  int rem = (m_featureGenerator->Count() - count)/fs;
373  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
374  if(rem<120)
375  std::cout << rem << "s remaining" << std::endl;
376  else
377  std::cout << rem/60 << "min remaining" << std::endl;
378  }
379 
380  }
381 
382  } while(h_count==m_preloadCount);
383 
384  delete [] h;
385  delete [] responses;
386  }
387 
388 // no more
389  return bestS<1.0;
390 }
391 
392 template<class FeatureType, class Aggregator, class Metric>
393 template<class R>
395 {
396 #ifdef _MULTITHREAD
397  if(m_threadCount>1)
398  {
399  sprint::thread_group thread_pool_;
400  for(int k=0; k<m_threadCount; ++k)
401  {
402  unsigned int i0 = (m_training_set.Size() * k) / m_threadCount;
403  unsigned int i1 = (m_training_set.Size() * (k+1)) / m_threadCount;
404 
405  thread_pool_.create_thread(sprint::thread_bind(&RealDecisionStumpOracle::template Internal_StreamEvaluate_1<R>, this, h, size, i0, i1));
406  }
407 
408  thread_pool_.join_all();
409  }
410  else
411 #endif
412  Internal_StreamEvaluate_1(h, size, 0, m_training_set.Size());
413 }
414 
415 template<class FeatureType, class Aggregator, class Metric>
416 template<class R>
417 void RealDecisionStumpOracle<FeatureType, Aggregator, Metric>::Internal_StreamEvaluate_1(const R * h, int size, unsigned int i0, unsigned int i1)
418 {
419  ComputeFeaturesResponse(m_store, h, size, m_training_set, i0, i1);
420 }
421 #endif
422 
423 
424 
bool GetHypothesis(ClassifierType &bestH)
Definition: RealDecisionStumpOracle.h:233
void SetTrainingSet(const DataSetHandle< Aggregator > &set)
Set the training set used train the classifier.
Definition: RealDecisionStumpOracle.h:158
Feature FeatureType
The feature type generate by this generator.
Definition: FeatureGenerator.h:41
Definition: timer.h:84
DataSetHandle< Aggregator > & GetTrainingSet()
return R/W the training set
Definition: RealDecisionStumpOracle.h:174
A classifier composed by a Feature Extractor and an Evaluation Policy A "Second Level" classifier...
Definition: BinaryClassifier.h:38
FeatureGenerator::FeatureType FeatureType
The Feature Extracted by FeatureGenerator.
Definition: RealDecisionStumpOracle.h:55
Definition: FeatureGenerator.h:36
void ComputeFeaturesResponse(BinaryWeightedPatternResponse< int > *store, const FeatureExtractor *h, int n_feature, const Set &set, int i0, int i1)
Definition: WeightedPatternResponse.h:85
void ExtractFeature(BinaryWeightedPatternResponse< int > *store, const FeatureExtractor &h, const Set &set)
extract from templates feature using h and put in store, and associating the weighted category d ...
Definition: WeightedPatternResponse.h:105
ClassifierType
Definition: Types.h:31
void SetPreloadSize(int n)
Definition: RealDecisionStumpOracle.h:187
Cross Platform High Performance timer.
Definition: DataSet.h:50
Definition: thread_group.h:82
image/size TODO namespace
Definition: Types.h:39
abstracting thread
void join_all()
wait all threads terminate
Definition: thread_group.h:114
WeightedPatternResponse methods and utility functions.
BinaryClassifier< FeatureType, RealDecisionStump< int > > ClassifierType
The weak classifier provided by this oracle.
Definition: RealDecisionStumpOracle.h:58
BinaryWeightedPatternResponse< int > PatternResponse
A weighted pattern response with integer values.
Definition: WeightedPatternResponse.h:64
this file declare the virtual feature generator for Haar Features
void sort_pattern(PatternResponse *store, int n)
Definition: RealDecisionStumpOracle.h:51
a FeatureExtractor return a scalar number without relationship with classification ...
Definition: Types.h:35
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
A Binary Classifier. Implements the class BinaryClassifier.
void SetFeatureGenerator(FeatureGenerator &f)
Associate a Feature Generator to Decision Stump Generator.
Definition: RealDecisionStumpOracle.h:164
double Optimize(ClassifierType &h)
Using current metrics try to recompute parameters associated to this feature.
Definition: RealDecisionStumpOracle.h:197
declare a DataSet
Definition: WeightedPatternResponse.h:38