X-Boost  2.3.8
BinaryClassifierOracle.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _BINARY_CLASSIFIER_ORACLE_H
22 #define _BINARY_CLASSIFIER_ORACLE_H
23 
29 #ifdef _MULTITHREAD
30 # include "Thread/thread.h"
31 # include "Thread/bind.h"
32 # include "Thread/thread_group.h"
33 #endif
34 
35 #include "DataSet.h"
37 #include "Utility/timer.h"
40 
49 template<class FeatureGenerator, class ClassifierPolicy, class Metric, class Aggregator>
51 public:
52 
55 
58 
61 
62  DECLARE_AGGREGATOR
63 
64 private:
66  double m_dP, m_dN;
67 
69  PatternResponse *m_store;
70 
72  int m_preloadCount;
73 
75  bool m_useFastOptimize;
76 
78  unsigned int m_fastOptimizeBinSize;
79 
81  FeatureGenerator * m_featureGenerator;
82 
84  DataSetHandle<Aggregator> m_training_set;
85 
86 #ifdef _MULTITHREAD
87  int m_threadCount;
89 #endif
90 
91 private:
92 
96  template<class R>
97  void Internal_StreamEvaluate_1(const R * h, int size, unsigned int i0, unsigned int i1);
98 
102  template<class R>
103  void Internal_StreamEvaluateAndBucket_1(const R * h, int size, unsigned int i0, unsigned int i1);
104 
106  void Internal_Sort_Store(int i0, int i1)
107  {
108  for(int i=i0; i<i1; ++i)
109  {
110  sort_pattern(&m_store[i * m_training_set.Size()], m_training_set.Size());
111  // std::sort(&m_store[i * Size()], &m_store[i * Size()+Size()]);
112  }
113  }
114 
121  void Internal_Optimize(int i0, int i1, WeakClassifierType *bestH, double *bestW, WeakClassifierType *h)
122  {
123  unsigned int n = m_training_set.Size();
124 // int n_feat = m_featureGenerator->Count();
125  PatternResponse * work = new PatternResponse[m_fastOptimizeBinSize]; // histogram generated (used only if m_useFastOptimize is enabled)
126 
127  for(int i=i0; i<i1; ++i)
128  {
129  double curW;
130 
131  if(m_useFastOptimize)
132  {
133  // compute the bucket for feature h[i], with response precomputed in m_store
134  GenerateBucket(work, m_fastOptimizeBinSize, &m_store[i * n], n);
135  // chiama l'ottimizzazione su m_store
136  curW = Metric::optimize(h[i], work, m_fastOptimizeBinSize, m_dP, m_dN);
137  }
138  else
139  {
140 // ordino m_store in ordine crescente di value della feature
141 // il 54.98% del tempo viene perso su questa riga
142 
143  sort_pattern(&m_store[i * n], n);
144 
145 // compute the best threshold for classifier H on sorted feature response
146  curW = Metric::optimize(h[i], &m_store[i * n], n, m_dP, m_dN );
147  }
148 
149  if(curW > *bestW)
150  {
151  *bestH = h[i];
152  *bestW = curW;
153  }
154  }
155 
156  delete [] work;
157  }
158 
159  /*
160  template<class FeatureExtractor>
161  void Evaluate_1(PatternResponse *store, const FeatureExtractor & h, const PatternType* templates, int n) const
162  {
163  for(unsigned int i =0; i<n; i++)
164  {
165  // feature value: nota la memoria punta al pixel (-1,-1)
166  store[i].value = h.response( getData1( templates[i].data+m_training_set.width+1+1, m_training_set.width+1);
167  // variation of weight d associated
168  // d, peso associato al pattern, value >0 se e' uno di tipo A, altri < 0
169  // note: non gestisce il caso nativo di astensione
170  store[i].d = templates[i].category * templates[i].d;
171  }
172  }
173  */
174  /*
175  template<class FeatureExtractor>
176  void Default_Evaluate_1(PatternResponse *store, const FeatureExtractor & h, const PatternType* templates, int n) const
177  {
178  for(unsigned int i =0; i<n; i++)
179  {
180  store[i].value = h.response(m_training_set.templates[i].data+m_training_set.width+1+1, m_training_set.width+1);
181  store[i].d = templates[i].category;
182  }
183  }
184  */
190  template<class FeatureExtractor>
191  void Evaluate_1(const FeatureExtractor & h)
192  {
193  ExtractFeature(m_store, h, m_training_set);
194  }
195 
196 // /// riempie m_store con l'istogramma
197 // template<class FeatureExtractor>
198 // void EvaluateAndBucket_1(const FeatureExtractor & h)
199 // {
200 // int *work = new int [ m_training_set.Size() ];
201 // ExtractFeatureAndBucket(m_store, h, m_training_set, m_fastOptimizeBinSize, work);
202 // delete [] work;
203 // }
204 
207  template<class R>
208  void StreamEvaluate_1(const R * h, int size);
209 
212  template<class R>
213  void StreamEvaluateAndBucket_1(const R * h, int size);
214 
216  void InitWeight();
217 
218 
219 public:
220  BinaryClassifierOracle() : m_store(0), m_preloadCount(0), m_useFastOptimize(false), m_fastOptimizeBinSize(1), m_featureGenerator(0)
221 #ifdef _MULTITHREAD
222  , m_threadCount(1)
223 #endif
224  { }
225 
227 
229  template<class SourceDataSet>
230  void SetTrainingSet(const SourceDataSet & set)
231  {
232  m_training_set = set;
233  }
234 
237  {
238  m_featureGenerator = &f;
239  }
240 
243  {
244  return m_training_set;
245  }
246 
247 #ifdef _MULTITHREAD
248  void SetNumberOfThreads(int th) {
250  m_threadCount = th;
251  }
252 #endif
253 
255  void SetPreloadSize(int n) {
256  m_preloadCount = n;
257  }
258 
262  void SetFastHeuristic(bool enable, bool reoptimize, int size) {
263  if(size<1)
264  {
265  std::cerr << "SetFastHeuristic called with an invalid parameter: size=" << size << std::endl;
266  size = 1;
267  }
268 
269  m_useFastOptimize = enable;
270  m_fastOptimizeBinSize = size;
271  }
272 
276  bool GetHypothesis(WeakClassifierType & bestH);
277 
282  {
283  // salva la response nei bucket
284  int *work = new int [ m_training_set.Size() ];
285  ExtractFeatureAndBucket(m_store, h, m_training_set, m_fastOptimizeBinSize, work);
286  delete [] work;
287 
288  // chiama l'ottimizzazione
289  return Metric::optimize(h, m_store, m_fastOptimizeBinSize, m_dP, m_dN);
290  }
291 
294  {
295  // calcolo l'output di tutte i pattern usando h in m_store
296  // il 30.71% viene perso qua
297  Evaluate_1(h);
298 
299  // ordino m_store in ordine crescente di value della feature
300  // il 54.98% del tempo viene perso su questa riga
301  // std::sort(&m_store[0], &m_store[Size()]);
302  Internal_Sort_Store(0, 1);
303  //BubbleSort(&m_store[0], Size());
304 
305  // trovo la soglia ottima per questo classificatore
306  return Metric::optimize(h, m_store, m_training_set.Size(), m_dP, m_dN);
307  }
308 
309 };
310 
312 
313 template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
315 {
316  delete [] m_store;
317 }
318 
319 // TODO: should be outside
320 template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
322 {
323  // TODO: Adaboost?
324  double energy;
325 
326  // conto i pesi dei due schieramenti
327  m_dP = m_dN = 0.0f;
328  for(unsigned int i =0; i<m_training_set.Size(); i++)
329  if(m_training_set.templates[i].category == 1)
330  m_dP += m_training_set.templates[i].d; // positive set
331  else
332  m_dN += m_training_set.templates[i].d; // negative set
333 
334  energy = m_dP + m_dN;
335  double f = 1.0 / energy;
336  m_dP *= f;
337  m_dN *= f;
338  for(unsigned int i =0; i<m_training_set.Size(); i++)
339  m_training_set.templates[i].d *= f;
340  std::cout << "* W+:" << m_dP << " W-:" << m_dN << " (reweighted from " << energy << ")" << std::endl;
341 }
342 
343 template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
344 template<class R>
346 {
347  // Cio
348  ComputeFeaturesResponse(m_store, h, size, m_training_set, i0, i1);
349 }
350 
351 // template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
352 // template<class R>
353 // void BinaryClassifierOracle<FeatureType, ClassifierPolicy, Metric, Aggregator>::Internal_StreamEvaluateAndBucket_1(const R * h, int size, unsigned int i0, unsigned int i1)
354 // {
355 // // setup store[i] memory
356 // for(int i = m_fastOptimizeBinSize * i0;i<m_fastOptimizeBinSize * j0; ++i)
357 // {
358 // m_store[i].value = min_resp + bin_size * i;
359 // m_store[i].d = 0.0;
360 // }
361 //
362 // for(int i=i0; i<i1; ++i)
363 // {
364 // ExtractFeatureAndBucket(&m_store[i * m_fastOptimizeBinSize], h[i], m_training_set, m_fastOptimizeBinSize, heuristicSupportVector);
365 // }
366 // }
367 
368 
369 template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
370 template<class R>
372 {
373 #ifdef _MULTITHREAD
374  if(m_threadCount>1)
375  {
376  sprint::thread_group thread_pool_;
377  for(int k=0; k<m_threadCount; ++k)
378  {
379  unsigned int i0 = (m_training_set.Size() * k) / m_threadCount;
380  unsigned int i1 = (m_training_set.Size() * (k+1)) / m_threadCount;
381 
382  thread_pool_.create_thread(sprint::thread_bind(&BinaryClassifierOracle::template Internal_StreamEvaluate_1<R>, this, h, size, i0, i1));
383  }
384 
385  thread_pool_.join_all();
386  }
387  else
388 #endif
389  Internal_StreamEvaluate_1(h, size, 0, m_training_set.Size());
390 }
391 
392 
393 // template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
394 // template<class R>
395 // void BinaryClassifierOracle<FeatureType, ClassifierPolicy, Metric, Aggregator>::StreamEvaluateAndBucket_1(const R * h, int size)
396 // {
397 // int n = m_training_set.Size();
398 //
399 // #ifdef _MULTITHREAD
400 // if(m_threadCount>1)
401 // {
402 // sprint::thread_group thread_pool_;
403 // for(int k=0; k<m_threadCount; ++k)
404 // {
405 // unsigned int i0 = (n * k) / m_threadCount;
406 // unsigned int i1 = (n * (k+1)) / m_threadCount;
407 //
408 // thread_pool_.create_thread(sprint::thread_bind(&BinaryClassifierOracle::template Internal_StreamEvaluateAndBucket_1<R>, this, h, size, i0, i1));
409 // }
410 //
411 // thread_pool_.join_all();
412 // }
413 // else
414 // #endif
415 // Internal_StreamEvaluateAndBucket_1(h, size, 0, n);
416 // }
417 
418 template<class FeatureType, class ClassifierPolicy, class Metric, class Aggregator>
420 {
421  double bestW = 0.0; // NOTA: sotto a 0.5 vuol dire che non puo' migliorare
422  double maxW;
423  int count = 0;
424  int n = m_training_set.Size();
425  // Internal training timer
426  Timer t;
427  double last_check;
428 
429 
430  if(m_featureGenerator == 0)
431  {
432  std::cerr << "No Feature Generator loaded. Use SetFeatureGenerator API before call GetHypothesis" << std::endl;
433  return false;
434  }
435 
436  bestH.debug_name( "internal error" );
437 
438  if(n == 0)
439  {
440  std::cerr << "No pattern loaded. Init Trainer Failed" << std::endl;
441  return false;
442  }
443 
444  // release memory allocated in previous step (vabbe)
445  delete [] m_store;
446  m_store = 0;
447 
448  std::cout << "Train with:" << m_training_set.n_patternP << "(+), " << m_training_set.n_patternN << "(-) using " << m_featureGenerator->Count() << " features.\n";
449 
450  InitWeight();
451 
452  maxW = m_dP + m_dN; // TODO ASSERT != 1
453 
454  m_featureGenerator->Reset();
455 
456  int n_feat = m_featureGenerator->Count();
457 
458  t.Start();
459 
460  last_check = t.GetTime();
461 
463  if(m_preloadCount<=1)
464  {
466 
467  // **** without preload *****
468 
469  // m_store deve poter memorizzare o m_training_set o i bin
470  // TODO: allocare anche la memoria per salvare le response della Fast_Optimize
471  m_store = new PatternResponse [std::max<int>(n, m_fastOptimizeBinSize) ];
472 
473  // PER OGNI FEATURE POSSIBILE
474  while(m_featureGenerator->Next(h))
475  {
476  double curW;
477 
478  if(m_useFastOptimize)
479  curW = Fast_Optimize(h);
480  else
481  curW = Optimize(h);
482 
483  count ++;
484 
485  if(curW > bestW)
486  {
487  bestH = h;
488  bestW = curW;
489 
490  std::cout << count <<" (" << (100.0f * (double)count/(double)n_feat) << "%): ";
491  if(bestH.debug_name()) std::cout << "name:" << bestH.debug_name() << " ";
492  std::cout << "parity:" << bestH.parity << " th:" << bestH.th << " w+:"<< bestW << ", w-:" << maxW-bestW << std::endl;
493  }
494 
495  // some stats
496  if ((t.GetTime() - last_check) > 10.0)
497  {
498  float fs = count/t.GetTime();
499  float pr = (float)count/(float)n_feat;
500  int rem = (n_feat - count)/fs;
501  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
502  if(rem<120)
503  std::cout << rem << "s remaining" << std::endl;
504  else
505  std::cout << rem/60 << "min remaining" << std::endl;
506 
507  last_check = t.GetTime();
508  }
509 
510  }
511 
512 
513  }
515  else
516  {
517 #ifndef _MULTITHREAD
518  static const int m_threadCount = 1;
519 #endif
520  // some vars usend in multithread
521  double *t_bestW = new double[m_threadCount];
522  WeakClassifierType *t_bestH = new WeakClassifierType[m_threadCount];
523  double prev_w = 0.0;
524 
525 // **** with preload ***
526 // analizzo m_preloadCount feature contemporaneamente
527  int h_count = 0; // number of feature to evaluate in this step
528 
529  // in modalita' multithread vengono processari i pattern in parallelo. Le response vengono calcolate in maniera multithread e salvate in m_store
530  m_store = new PatternResponse [n * m_preloadCount];
531  std::cout << (sizeof(PatternResponse) * n * m_preloadCount + sizeof(WeakClassifierType) * m_preloadCount)/(1024*1204) << "Mb used for cache" << std::endl;
532 
533  // an array of hypothesis to be evaluated (in multithread)
534  WeakClassifierType *h = new WeakClassifierType[m_preloadCount];
535 
536 // analizzo a blocchi di [m_preloadCount] alla volta
537  do {
538  /* questa parte sviene parallelizzata dividendo gli h in sottoparti su piu' thread **/
539  h_count =0;
540 
541 // inizializzo [m_preloadCount] classifier (per via della non perfetta corrispondenza, potrebbero essere anche meno)
542 // carico m_preloadCount feature in memoria
543  for(int i =0; i<m_preloadCount; i++)
544  if(m_featureGenerator->Next(h[h_count]) )
545  h_count++;
546  else
547  break;
548 
549 // Calcolo gli output di un certo numero di feature in parallelo
550 // genera gli m_store: NOTE per massimizzare l'uso della cache, viene internamente paralellizato per pattern del training set
551  StreamEvaluate_1(h, h_count);
552 
553 #ifdef _MULTITHREAD
554 // NOTE: viene paralellizato per feature
555  if(m_threadCount>1)
556  {
557  sprint::thread_group thread_pool_;
558  for(int k=0; k<m_threadCount; ++k)
559  {
560  unsigned int i0 = (h_count * k) / m_threadCount;
561  unsigned int i1 = (h_count * (k+1)) / m_threadCount;
562 
563  t_bestW[k] = bestW;
564 
565  thread_pool_.create_thread(sprint::thread_bind(&BinaryClassifierOracle::Internal_Optimize, this, i0, i1, &t_bestH[k], &t_bestW[k], h));
566  }
567 
568  thread_pool_.join_all();
569 
570  // search for the best-best classifier
571  for(int k=0; k<m_threadCount; ++k)
572  {
573  if(t_bestW[k] > bestW)
574  {
575  bestW = t_bestW[k];
576  bestH = t_bestH[k];
577  }
578  }
579  }
580  else
581 #endif
582  {
583 
584  Internal_Optimize(0, h_count, &bestH, &bestW, h);
585 
586  }
587 
588  count += h_count;
589 
590  // check if the feature is changed and print some infos
591  if(bestW > prev_w)
592  {
593  float pr = (float)count/(float)n_feat;
594  std::cout << " (" << 100.0f * pr << "%) ";
595  if(bestH.debug_name()) std::cout << "name:" << bestH.debug_name() << " ";
596  std::cout << "parity:" << bestH.parity << " th:" << bestH.th << " w+:"<< bestW << " w-:" << maxW - bestW << std::endl;
597  prev_w = bestW;
598  }
599 
600  // some stats
601  if ((t.GetTime() - last_check) > 10.0)
602  {
603  float fs = count/t.GetTime();
604  float pr = (float)count/(float)n_feat;
605  int rem = (n_feat - count)/fs;
606  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
607  if(rem<120)
608  std::cout << rem << "s remaining" << std::endl;
609  else
610  std::cout << rem/60 << "min remaining" << std::endl;
611 
612  last_check = t.GetTime();
613  }
614 
615  } while(h_count==m_preloadCount);
616 
617  // free memory for heuristic and threads
618 
619  delete [] h;
620 
621  delete [] t_bestW;
622  delete [] t_bestH;
623 
624  }
625 
626 
627  if(m_useFastOptimize)
628  {
629  std::cout << "[reoptimize] [before] parity:" << bestH.parity << " th:" << bestH.th << " w+:"<< bestW << ", w-:" << maxW-bestW << std::endl;
630 
631  // reoptimize
632  bestW = Optimize(bestH);
633 
634  std::cout << "[reoptimize] [after] parity:" << bestH.parity << " th:" << bestH.th << " w+:"<< bestW << ", w-:" << maxW-bestW << std::endl;
635  }
636 
637  if(bestW<maxW*0.5)
638  std::cout << "best classifier: " << bestW << "/" << maxW <<": exit" << std::endl;
639 
640 // no more
641  return bestW>=maxW*0.5;
642 }
643 
644 
645 #endif
Feature FeatureType
The feature type generate by this generator.
Definition: FeatureGenerator.h:41
FeatureGenerator::FeatureType FeatureType
The Feature Extracted by FeatureGenerator.
Definition: BinaryClassifierOracle.h:54
Definition: timer.h:84
A classifier composed by a Feature Extractor and an Evaluation Policy A "Second Level" classifier...
Definition: BinaryClassifier.h:38
a Voting Boostable classifier
bool GetHypothesis(WeakClassifierType &bestH)
Definition: BinaryClassifierOracle.h:419
Definition: FeatureGenerator.h:36
void SetFeatureGenerator(FeatureGenerator &f)
Associate a Feature Generator to Decision Stump Generator.
Definition: BinaryClassifierOracle.h:236
void ComputeFeaturesResponse(BinaryWeightedPatternResponse< int > *store, const FeatureExtractor *h, int n_feature, const Set &set, int i0, int i1)
Definition: WeightedPatternResponse.h:85
void ExtractFeature(BinaryWeightedPatternResponse< int > *store, const FeatureExtractor &h, const Set &set)
extract from templates feature using h and put in store, and associating the weighted category d ...
Definition: WeightedPatternResponse.h:105
void SetTrainingSet(const SourceDataSet &set)
Set the training set used to recover the threshold.
Definition: BinaryClassifierOracle.h:230
void ExtractFeatureAndBucket(BinaryWeightedPatternResponse< int > *store, const FeatureExtractor &h, const Set &set, int n_bucket, int *value)
Definition: WeightedPatternResponse.h:129
Cross Platform High Performance timer.
Definition: DataSet.h:50
double Optimize(WeakClassifierType &h)
Using current metrics try to recompute parameters associated to this feature.
Definition: BinaryClassifierOracle.h:293
Definition: thread_group.h:82
image/size TODO namespace
Definition: Types.h:39
abstracting thread
void join_all()
wait all threads terminate
Definition: thread_group.h:114
WeightedPatternResponse methods and utility functions.
proposal 1 for thread group
double Fast_Optimize(WeakClassifierType &h)
Definition: BinaryClassifierOracle.h:281
BoostableClassifier< WeakClassifierType > ClassifierType
Additive Regression Final Classifier.
Definition: BinaryClassifierOracle.h:60
Definition: SourceDataSet.h:46
BinaryWeightedPatternResponse< int > PatternResponse
A weighted pattern response with integer values.
Definition: WeightedPatternResponse.h:64
void SetFastHeuristic(bool enable, bool reoptimize, int size)
Definition: BinaryClassifierOracle.h:262
void SetPreloadSize(int n)
Definition: BinaryClassifierOracle.h:255
void sort_pattern(PatternResponse *store, int n)
a FeatureExtractor return a scalar number without relationship with classification ...
Definition: Types.h:35
method to create function pointer for thread call
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
A Binary Classifier. Implements the class BinaryClassifier.
declare a DataSet
Definition: WeightedPatternResponse.h:38
Definition: BinaryClassifierOracle.h:50
Definition: BoostableClassifier.h:40
DataSetHandle< Aggregator > & GetTrainingSet()
return R/W the training set
Definition: BinaryClassifierOracle.h:242
void GenerateBucket(BinaryWeightedPatternResponse< int > *store, int n_bucket, const BinaryWeightedPatternResponse< int > *source, int n_element)
BinaryClassifier< FeatureType, ClassifierPolicy > WeakClassifierType
The weak classifier provided by this oracle.
Definition: BinaryClassifierOracle.h:57