X-Boost  2.3.8
NaiveDecisionStumpOracle.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _NAIVE_DECISION_STUMP_ORACLE_H
22 #define _NAIVE_DECISION_STUMP_ORACLE_H
23 
28 // questa classe di oracoli (Naive e Bayes) ottimizza, usando pero' i pattern diretti, non eseguendo un sorting preventivo
29 // c'e' il problema dell'inversione della polarita'/parity da gestire.
30 
32 
33 #ifdef _MULTITHREAD
34 template<class T>
36 struct detail_thread_naive_decision_stump {
37  double w;
38  T h;
39 };
40 #endif
41 
43 template<class FeatureGenerator, class Aggregator >
45 
46 public:
47 
48 
51 
54 
57 
58  DECLARE_AGGREGATOR
59 
60 private:
61 
63  int m_threadCount;
64 
66  int m_preloadCount;
67 
69  FeatureGenerator * m_featureGenerator;
70 
72  DataSetHandle<Aggregator> m_training_set;
73 
74 private:
75 #ifdef _MULTITHREAD
76  // since the threshold is fixed, this code can only evaluate performance
77  void Thread_OptimizeClassifier(detail_thread_naive_decision_stump<WeakClassifierType> * ptr )
78  {
79  ptr->w = Optimize(ptr->h);
80  }
81 #endif
82 public:
83 
86  {
87  m_training_set = set;
88  }
89 
92  {
93  m_featureGenerator = &f;
94  }
95 
97 // void LimitTrainingSetWeight(double d)
98 // {
99 // int n = 0;
100 // for(unsigned int i =0;i<m_training_set.Size();i++)
101 // if( m_training_set.templates[i].d > d)
102 // {
103 // m_training_set.templates[i].d = d;
104 // ++n;
105 // }
106 //
107 // std::cout << n << " samples of " << m_training_set.Size() << " (" << (n*100)/m_training_set.Size() << "%) have been limited in weight (" << d <<")" << std::endl;
108 // }
109 //
112  {
113  return m_training_set;
114  }
115 
117  void SetNumberOfThreads(int th)
118  {
119 #ifdef _MULTITHREAD
120  m_threadCount = th;
121 #endif
122  }
123 
124  void SetPreloadSize(int n) {
125  m_preloadCount = n;
126  }
127 
128  void SetFastHeuristic(bool enable, bool reoptimize, int size) {
129  // TODO
130  }
131 
135  {
136  double pp,pn,nn,np;
137  double pz,nz;
138 // double w = 0.0;
139  pp = pn = nn = np = pz = nz = 0.0;
140 
141  // compute response to feature operator h
142  for(typename DataSetHandle<Aggregator>::ListType::const_iterator i = m_training_set.templates.begin(); i != m_training_set.templates.end(); ++i)
143  {
144  int value = h.response( getData1( *i, m_training_set), getData2( *i, m_training_set) );
145  double d = i->d;
146 
147  if( i->category == 1)
148  {
149  if(value > 0)
150  pp += d;
151  else if(value == 0)
152  pz += d;
153  else
154  pn += d;
155  }
156  else
157  {
158  if(value > 0)
159  np += d;
160  else if(value == 0)
161  nz += d;
162  else
163  nn += d;
164 
165  }
166 
167  }
168 
169  // parity=+1 pn+=pz, nn+=nn
170  double w1 = pp + nn + nz;
171  // parity=-1 pp+=pz, np+=nz
172  double w2 = pn + np + nz;
173 
174  if(w1 >= w2)
175  {
176  return w1;
177  }
178  else
179  {
180  h.invert_polarity();
181  return w2;
182  }
183  }
184 
185 
186  bool GetHypothesis(WeakClassifierType & bestH)
187  {
188 
189  Timer t;
190  double bestW = 0.0; // NOTA: sotto a 0.5 vuol dire che non puo' migliorare
191  int count = 0;
192 
193  if(m_featureGenerator == 0)
194  {
195  std::cerr << "No Feature Generator loaded. Use SetFeatureGenerator API before call GetHypothesis" << std::endl;
196  return false;
197  }
198 
199  bestH.debug_name("internal error");
200  t.Start();
201 
202  if(m_training_set.Size() == 0)
203  {
204  std::cerr << "No pattern loaded. Init Trainer Failed" << std::endl;
205  return false;
206  }
207 
208  if(m_threadCount<=1) m_threadCount = 1;
209 
210  // if(m_preloadCount<=1)
211  if(1)
212  {
213  m_featureGenerator->Reset();
214 
215  // for any possible features:
216 #ifdef _MULTITHREAD
217  detail_thread_naive_decision_stump<WeakClassifierType> * storage = new detail_thread_naive_decision_stump<WeakClassifierType>[m_threadCount];
218  int token = 0;
219 
220  while( m_featureGenerator->Next( storage[token].h ) )
221  {
222  token++;
223  if(token == m_threadCount)
224  {
225  token = 0;
226 
227  if(m_threadCount>1)
228  {
229  sprint::thread_group thread_pool_;
230  for(int i =0; i<m_threadCount; ++i)
231  thread_pool_.create_thread(sprint::thread_bind(&NaiveDecisionStumpOracle::Thread_OptimizeClassifier, this, &storage[i]));
232  thread_pool_.join_all();
233  }
234  else
235  {
236  Thread_OptimizeClassifier(&storage[0]);
237  }
238 
239  for(int i =0; i<m_threadCount; ++i)
240  {
241  count ++;
242  double w = storage[i].w;
243  if(w > bestW)
244  {
245  bestW = w;
246  bestH = storage[i].h;
247  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " w+:"<< bestW << std::endl;
248  }
249 
250  if((count & (32*1024-1))==0)
251  {
252  float fs = count/t.GetTime();
253  float pr = (float)count/(float)m_featureGenerator->Count();
254  int rem = (m_featureGenerator->Count() - count)/fs;
255  std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
256  if(rem<120)
257  std::cout << rem << "s remaining" << std::endl;
258  else
259  std::cout << rem/60 << "min remaining" << std::endl;
260  }
261  }
262 
263  }
264  }
265 
266  if(token>0)
267  {
268  if(token>1)
269  {
270  sprint::thread_group thread_pool_;
271  for(int i =0; i<token; ++i)
272  thread_pool_.create_thread(sprint::thread_bind(&NaiveDecisionStumpOracle::Thread_OptimizeClassifier, this, &storage[i]));
273  thread_pool_.join_all();
274  }
275  else
276  {
277  Thread_OptimizeClassifier(&storage[0]);
278  }
279 
280  for(int i =0; i<token; ++i)
281  {
282  count ++;
283  double w = storage[i].w;
284  if(w > bestW)
285  {
286  bestW = w;
287  bestH = storage[i].h;
288  std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " w+:"<< bestW << std::endl;
289  }
290 
291  }
292  }
293 
294  delete [] storage;
295 
296 #else
297  // TODO: unimplemented yet
298 // WeakClassifierType h;
299 //
300 // while( m_featureGenerator->Next(h) )
301 // {
302 // double w = OptimizeClassifier(h, response);
303 //
304 // count ++;
305 //
306 // if(w > bestW)
307 // {
308 // bestW = w;
309 // bestH = h;
310 // std::cout << count <<" (" << (100.0f * (double)count/(double)m_featureGenerator->Count()) << "%): name:" << bestH.debug_name() << " w+:"<< bestW << std::endl;
311 // }
312 //
313 // if((count & (32*1024-1))==0)
314 // {
315 // float fs = count/t.GetTime();
316 // float pr = (float)count/(float)m_featureGenerator->Count();
317 // int rem = (m_featureGenerator->Count() - count)/fs;
318 // std::cout << fs << " feature/secs: " << (100.0f * pr) << "% ";
319 // if(rem<120)
320 // std::cout << rem << "s remaining" << std::endl;
321 // else
322 // std::cout << rem/60 << "min remaining" << std::endl;
323 // }
324 //
325 //
326 // }
327 #endif
328 
329  std::cout << "Expected W:" << bestW<< std::endl;
330  return true;
331  }
332  else
333  {
334  // WITH PRELOAD
335 
336 
337 
338  }
339 
340  }
341 };
342 
343 
344 #endif
345 
Feature FeatureType
The feature type generate by this generator.
Definition: FeatureGenerator.h:41
Definition: timer.h:84
A classifier composed by a Feature Extractor and an Evaluation Policy A "Second Level" classifier...
Definition: BinaryClassifier.h:38
DataSetHandle< Aggregator > & GetTrainingSet()
return R/W the training set
Definition: NaiveDecisionStumpOracle.h:111
A very simple decision stump with threshold 0 (could be a more luminance insesitive) Implements the c...
virtual void Reset()=0
reset any interal counters
Definition: FeatureGenerator.h:36
Definition: DataSet.h:50
void SetFeatureGenerator(FeatureGenerator &f)
Associate a Feature Generator to Decision Stump Generator.
Definition: NaiveDecisionStumpOracle.h:91
Definition: thread_group.h:82
image/size TODO namespace
Definition: Types.h:39
void join_all()
wait all threads terminate
Definition: thread_group.h:114
void SetNumberOfThreads(int th)
Change the thread number used in Multi Threading training.
Definition: NaiveDecisionStumpOracle.h:117
BinaryClassifier< FeatureType, NaiveDecisionStump > WeakClassifierType
The weak classifier.
Definition: NaiveDecisionStumpOracle.h:53
BoostableClassifier< WeakClassifierType > ClassifierType
The weak classifier reported by this oracle.
Definition: NaiveDecisionStumpOracle.h:56
double Optimize(WeakClassifierType &h)
Definition: NaiveDecisionStumpOracle.h:134
virtual bool Next(Feature &out)=0
return the next feature, or return false
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
FeatureGenerator::FeatureType FeatureType
The Feature Extracted by FeatureGenerator.
Definition: NaiveDecisionStumpOracle.h:50
Definition: BoostableClassifier.h:40
virtual unsigned int Count() const =0
return the count of feature available
Definition: NaiveDecisionStumpOracle.h:44
void SetTrainingSet(const DataSetHandle< Aggregator > &set)
Set the training set used to recover the threshold.
Definition: NaiveDecisionStumpOracle.h:85