X-Boost  2.3.8
BayesianStumpOracle.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _BAYESIAN_STUMP_ORACLE_H
22 #define _BAYESIAN_STUMP_ORACLE_H
23 
29 
30 #ifdef _MULTITHREAD
31 template<class T>
32 struct detail_thread_bayesian_stump {
33  double w; // response
34  T h; // hypothesis
35  int *mem; // response memory buffer
36 };
37 #endif
38 
40 template<class FeatureGenerator, class Aggregator>
42 
43 public:
44 
45 
48 
51 
54 
55  DECLARE_AGGREGATOR
56 
57 private:
58 
60  int m_threadCount;
61 
63  int m_preloadCount;
64 
66  FeatureGenerator * m_featureGenerator;
67 
69  DataSetHandle<Aggregator> m_training_set;
70 
72  int *response;
73 
74 private:
75 
76 // /// return correct detection W
77 // /// @note generalizzare
78 // double evaluate(const WeakClassifierType & h) const {
79 // double w = 0.0;
80 // // std::cout << "\nevaluate:";
81 // for(int i =0;i<m_training_set.Size();++i)
82 // {
83 // int n = h.classify(m_store[i].a, m_store[i].b);
84 // // std::cout << i << ',' << m_store[i].a <<';' << m_store[i].b << ',' << m_training_set.templates[i].d << ',' << m_training_set.templates[i].category << ';' << n << ' ';
85 // if( n == m_training_set.templates[i].category)
86 // {
87 // // std::cout << i << ' ';
88 // w+=m_training_set.templates[i].d;
89 // }
90 // }
91 //
92 // return w;
93 // };
94 //
96  double OptimizeClassifier(WeakClassifierType & h_best, int *mem)
97  {
98 // static const char *class_name[]={"1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32"};
99  int f_min, f_max;
100  double p[32], n[32];
101 
102  // compute response to feature operator h, min and max
103  for(unsigned int i =0; i<m_training_set.Size(); ++i)
104  {
105  mem[i] = h_best.response( getData1( m_training_set.templates[i], m_training_set), getData2(m_training_set.templates[i], m_training_set) );
106 // std::cout << response[i] << ' ';
107  if(i==0) {
108  f_min = f_max = mem[0];
109  }
110  else
111  {
112  if(mem[i] < f_min)
113  f_min = mem[i];
114  if(mem[i] > f_max)
115  f_max = mem[i];
116  }
117  }
118 
119  for(int i =0; i<32; ++i) p[i]=n[i]=0.0;
120 
121  h_best.m_featRange = f_max - f_min + 1;
122  h_best.m_minFeatValue = f_min;
123  h_best.m_truthTable = 0;
124 
125  // create histogram
126  for(unsigned int i =0; i<m_training_set.Size(); ++i)
127  {
128  int bin = (h_best.bin_size*(mem[i] - h_best.m_minFeatValue)) / h_best.m_featRange;
129 // nt bin = (response[i] - h.m_minFeatValue) / h.m_featRange;
130 
131  if(bin<0 || bin > h_best.bin_size)
132  {
133  std::cerr << "invalid feature: " << bin << "/" << h_best.bin_size << " | min = " << f_min << " | max = " << f_max << " " << h_best.debug_name() << std::endl;
134  return 0.0;
135  }
136 
137  if( m_training_set.templates[i].category == 1)
138  p[bin] += m_training_set.templates[i].d;
139  else
140  n[bin] += m_training_set.templates[i].d;
141  }
142 
143  // compute w and thruth-table
144  double w = 0.0;
145 
146 
147  for(int i =0; i<h_best.bin_size; ++i) {
148  if(p[i]>n[i])
149  {
150  h_best.m_truthTable |= 1<<i;
151  w+= p[i];
152  }
153  else
154  {
155  w+= n[i];
156  }
157  }
158 
159  return w;
160  }
161 #ifdef _MULTITHREAD
162  void Thread_OptimizeClassifier(detail_thread_bayesian_stump<WeakClassifierType> * ptr )
163  {
164  ptr->w = OptimizeClassifier(ptr->h, ptr->mem);
165  }
166 #endif
167 public:
168 
171  {
172  m_training_set = set;
173  }
174 
177  {
178  m_featureGenerator = &f;
179  }
180 
181  void SetFastHeuristic(bool enable, bool reoptimize, int size) {
182  // TODO
183  }
184 
186  void LimitTrainingSetWeight(double d)
187  {
188  int n = 0;
189  for(unsigned int i =0; i<m_training_set.Size(); i++)
190  if( m_training_set.templates[i].d > d)
191  {
192  m_training_set.templates[i].d = d;
193  ++n;
194  }
195 
196  std::cout << n << " samples of " << m_training_set.Size() << " (" << (n*100)/m_training_set.Size() << "%) have been limited in weight (" << d <<")" << std::endl;
197  }
198 
201  {
202  return m_training_set;
203  }
204 
206  void SetNumberOfThreads(int th)
207  {
208 #ifdef _MULTITHREAD
209  m_threadCount = th;
210 #endif
211  }
212 
213  void SetPreloadSize(int n) {
214  m_preloadCount = n;
215  }
216 
219  {
220  double w;
221  response = new int [m_training_set.Size() ];
222  w = OptimizeClassifier(h, response);
223 
224  delete [] response;
225  response = 0;
226  return w;
227  }
228 
229  bool GetHypothesis(WeakClassifierType & bestH)
230  {
231 
232  Timer t;
233  double bestW = 0.0; // NOTA: sotto a 0.5 vuol dire che non puo' migliorare
234  int count = 0;
235 
236  if(m_featureGenerator == 0)
237  {
238  std::cerr << "No Feature Generator loaded. Use SetFeatureGenerator API before call GetHypothesis" << std::endl;
239  return false;
240  }
241 
242  bestH.debug_name("internal error");
243  t.Start();
244 
245  if(m_training_set.Size() == 0)
246  {
247  std::cerr << "No pattern loaded. Init Trainer Failed" << std::endl;
248  return false;
249  }
250 
251  if(m_threadCount<=1) m_threadCount = 1;
252 
253  // if(m_preloadCount<=1)
254  if(1)
255  {
256  m_featureGenerator->Reset();
257 
258  response = new int [m_training_set.Size() * m_threadCount ];
259 
260  // PER OGNI FEATURE POSSIBILE
261 #ifdef _MULTITHREAD
262  detail_thread_bayesian_stump<WeakClassifierType> * storage = new detail_thread_bayesian_stump<WeakClassifierType>[m_threadCount];
263  int token = 0;
264  for(int i =0; i<m_threadCount; ++i)
265  storage[i].mem = response + i * m_training_set.Size();
266 
267  while( m_featureGenerator->Next( storage[token].h ) )
268  {
269  token++;
270  if(token == m_threadCount)
271  {
272  token = 0;
273 
274  if(m_threadCount>1)
275  {
276  sprint::thread_group thread_pool_;
277  for(int i =0; i<m_threadCount; ++i)
278  thread_pool_.create_thread(sprint::thread_bind(&BayesianStumpOracle::Thread_OptimizeClassifier, this, &storage[i]));
279  thread_pool_.join_all();
280  }
281  else
282  {
283  Thread_OptimizeClassifier(&storage[0]);
284  }
285 
286  for(int i =0; i<m_threadCount; ++i)
287  {
288  count ++;
289  double w = storage[i].w;
290  if(w > bestW)
291  {
292  bestW = w;
293  bestH = storage[i].h;
294  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " w+:"<< bestW << std::endl;
295  }
296 
297  if((count & (32*1024-1))==0)
298  {
299  float fs = count/t.GetTime();
300  float pr = (float)count/(float)m_featureGenerator->Count();
301  int rem = (m_featureGenerator->Count() - count)/fs;
302  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
303  if(rem<120)
304  std::cout << rem << "s remaining" << std::endl;
305  else
306  std::cout << rem/60 << "min remaining" << std::endl;
307  }
308  }
309 
310  }
311  }
312 
313  if(token>0)
314  {
315  if(token>1)
316  {
317  sprint::thread_group thread_pool_;
318  for(int i =0; i<token; ++i)
319  thread_pool_.create_thread(sprint::thread_bind(&BayesianStumpOracle::Thread_OptimizeClassifier, this, &storage[i]));
320  thread_pool_.join_all();
321  }
322  else
323  {
324  Thread_OptimizeClassifier(&storage[0]);
325  }
326 
327  for(int i =0; i<token; ++i)
328  {
329  count ++;
330  double w = storage[i].w;
331  if(w > bestW)
332  {
333  bestW = w;
334  bestH = storage[i].h;
335  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " w+:"<< bestW << std::endl;
336  }
337 
338  }
339  }
340 
341  delete [] storage;
342 
343 #else
345 
346  while( m_featureGenerator->Next(h) )
347  {
348  double w = OptimizeClassifier(h, response);
349 
350  count ++;
351 
352  if(w > bestW)
353  {
354  bestW = w;
355  bestH = h;
356  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " w+:"<< bestW << std::endl;
357  }
358 
359  if((count & (32*1024-1))==0)
360  {
361  float fs = count/t.GetTime();
362  float pr = (float)count/(float)m_featureGenerator->Count();
363  int rem = (m_featureGenerator->Count() - count)/fs;
364  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
365  if(rem<120)
366  std::cout << rem << "s remaining" << std::endl;
367  else
368  std::cout << rem/60 << "min remaining" << std::endl;
369  }
370 
371 
372  }
373 #endif
374 
375  delete [] response;
376  response = 0;
377 
378  std::cout << "Expected W:" << bestW<< std::endl;
379  return true;
380  }
381  else
382  {
383  // WITH PRELOAD
384 
385 
386 
387  }
388 
389  }
390 };
391 
392 
393 #endif
394 
Feature FeatureType
The feature type generate by this generator.
Definition: FeatureGenerator.h:41
Definition: timer.h:84
DataSetHandle< Aggregator > & GetTrainingSet()
return R/W the training set
Definition: BayesianStumpOracle.h:200
A classifier composed by a Feature Extractor and an Evaluation Policy A "Second Level" classifier...
Definition: BinaryClassifier.h:38
virtual void Reset()=0
reset any interal counters
double Optimize(ClassifierType &h)
Using current metrics try to recompute parameters associated to this feature.
Definition: BayesianStumpOracle.h:218
FeatureGenerator::FeatureType FeatureType
The Feature Extracted by FeatureGenerator.
Definition: BayesianStumpOracle.h:47
Definition: FeatureGenerator.h:36
BoostableClassifier< WeakClassifierType > ClassifierType
The weak classifier reported by this oracle.
Definition: BayesianStumpOracle.h:53
Definition: DataSet.h:50
Definition: thread_group.h:82
image/size TODO namespace
Definition: Types.h:39
void LimitTrainingSetWeight(double d)
Definition: BayesianStumpOracle.h:186
void join_all()
wait all threads terminate
Definition: thread_group.h:114
BinaryClassifier< FeatureType, BayesianStump< int > > WeakClassifierType
The interal weak classifier.
Definition: BayesianStumpOracle.h:50
virtual bool Next(Feature &out)=0
return the next feature, or return false
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
void SetNumberOfThreads(int th)
Change the thread number used in Multi Threading training.
Definition: BayesianStumpOracle.h:206
void SetFeatureGenerator(FeatureGenerator &f)
Associate a Feature Generator to Decision Stump Generator.
Definition: BayesianStumpOracle.h:176
void SetTrainingSet(const DataSetHandle< Aggregator > &set)
Set the training set used to recover the threshold.
Definition: BayesianStumpOracle.h:170
Definition: BoostableClassifier.h:40
virtual unsigned int Count() const =0
return the count of feature available
Definition: BayesianStumpOracle.h:41
An hystogram based Bayesian classifier Implements the class BayesianStump for BinaryClassifier.