X-Boost  2.3.8
PrecomputedBinaryClassifierOracle.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _PRECOMPUTED_BINARY_CLASSIFIER_ORACLE_H
22 #define _PRECOMPUTED_BINARY_CLASSIFIER_ORACLE_H
23 
29 #ifdef _MULTITHREAD
30 # include "Thread/thread.h"
31 # include "Thread/bind.h"
32 # include "Thread/thread_group.h"
33 #endif
34 
35 #include <algorithm> // sort
36 
38 #include "Classifier/IndirectBinaryClassifier.h"
40 #include "detail/BinaryClassifierOptimize.h"
41 
42 #include <iostream>
43 
44 template<class Policy>
46 public:
48 
49  template<class FeatureType>
50  struct Traits {
52  };
53 
54  bool reoptimize;
55 
56 
57 
58 private:
59 
61  template<class DataType>
62  void build_decision_stump_heuristic(double *out_w, IndirectBinaryClassifier<Policy> *c, const PrecomputedPatternResponse<DataType> *src, int f0, int f1, float wp, float wn)
63  {
64  const int histo_bin_size = src->number_of_bins;
65  double bestw = 0.0;
66  std::pair<DataType, int> best_ret;
67  const int n_samples = src->n_samples;
68  int best_i = -1;
69 
70  {
71  // temporary list used in optimizer
72  float *store = new float[histo_bin_size];
73 
74  // for each features
75  for(int i = f0; i< f1; ++i)
76  {
77  // reset and create histogram
78 
80 
81  // reset store:
82  for(int j = 0; j<histo_bin_size; ++j)
83  {
84  store[j] = 0.0f;
85  }
86 
87  // create Pattern Response histogram
88  for(int j=0; j<n_samples; ++j)
89  {
90  int k = j; //indexes[j];
91 
92  // DataType v = src->GetResponse(k)[i];
93 #ifdef USE_64_BIT
94  int bin = src->bin[int64_t(i) * n_samples + k];
95 #else
96  int bin = src->bin[uint32_t(i * n_samples + k)];
97 #endif
98  store[bin] += src->weights[k];
99  }
101 
102  // evaluate feature: bucketindex and direction
103  std::pair<int, int> ret;
104 
105  double w = SimpleOptimize(ret, store, histo_bin_size, wp, wn);
106  // ret contain the best solution (threshold and diretion). Direction is not so important due to nature of decision tree
107 
108  if(w > bestw)
109  {
110  bestw = w;
111  best_ret = ret;
112  best_i = i;
113  }
114 
115  }
116  delete [] store;
117  }
118 
119  c->feature_index = best_i;
120 
121  if(reoptimize)
122  {
123  // REOPTIMIZE
124  std::pair<DataType, float> * store = new std::pair<DataType, float> [n_samples];
125 
127 
128  // create Pattern Response
129  for(int j=0; j< n_samples; ++j)
130  {
131  int k = j; // indexes[j];
132 #ifdef USE_64_BIT
133  store[j].first = src->response[int64_t(best_i) * n_samples + k];
134 #else
135  store[j].first = src->response[uint32_t(best_i * n_samples + k)];
136 #endif
137  store[j].second = src->weights[k]; /* (double) src.category[k] * src.weights[k]; */
138  }
139 
140  // sort
141  std::sort(&store[0], &store[n_samples]);
142 
144 
145  // evaluate feature: th and direction
146  *out_w = Optimize(best_ret, store, n_samples, wp, wn, false);
147 
148  std::cout << "Before Optmization: " << bestw << " | After Optimization: " << * out_w << '\n';
149  c->th = best_ret.first;
150  c->parity = best_ret.second;
152 
153  delete [] store;
154 
155  }
156  else
157  {
159  DataType minr, maxr;
160 
161  minr = src->response_range[best_i].first;
162  maxr = src->response_range[best_i].second;
163 
164  DataType delta = (maxr > minr) ? (maxr - minr + 1) : (1);
165 
166  *out_w = bestw;
167  c->th = minr + ((best_ret.first * delta) / histo_bin_size); // ret.first;
168  c->parity = best_ret.second;
170  }
171 
172 
173  }
174 
175 public:
176 
177  PrecomputedPatternBinaryClassifierOracle(bool _reoptimize = true) : reoptimize(_reoptimize) { }
178 
179  template<class DataType>
180  double Train(IndirectBinaryClassifier<Policy> &root, const PrecomputedPatternResponse<DataType> & src, int max_concurrent_jobs) {
181  if(src.number_of_bins > 0)
182  {
183  // find the best feature!
184  double bestw = 0.0;
185 
186  const int n_samples = src.n_samples;
187 
188  // precompute Wn, Wp
189  float wp = 0.0, wn = 0.0;
190  for(int i=0; i<n_samples; ++i)
191  {
192  const int k = i;
193  if(src.category[k] == 1)
194  wp += src.weights[k];
195  else
196  wn -= src.weights[k];
197  }
198  // split using threads
199 
200 #ifdef _MULTITHREAD
201  if(max_concurrent_jobs>1)
202  {
204 
205  IndirectBinaryClassifier<Policy> *candidates = new IndirectBinaryClassifier<Policy>[max_concurrent_jobs];
206  double *outw = new double[max_concurrent_jobs];
207 
208  for(int i =0; i<max_concurrent_jobs; ++i)
209  {
210  int f0 = (src.n_features * i) / max_concurrent_jobs;
211  int f1 = (src.n_features * (i+1) ) / max_concurrent_jobs;
212 
213  pool.create_thread(sprint::thread_bind(&PrecomputedPatternBinaryClassifierOracle<Policy>::template build_decision_stump_heuristic<DataType>, this, &outw[i], &candidates[i], &src, f0, f1, wp, wn) );
214  }
215 
216  pool.join_all();
217 
218  for(int i=0; i<max_concurrent_jobs; ++i)
219  {
220  if(outw[i] > bestw)
221  {
222  bestw = outw[i];
223  root = candidates[i];
224  }
225  }
226 
227  delete [] outw;
228  delete [] candidates;
229  }
230  else
231 #endif // #ifdef _MULTITHREAD
232  {
233  build_decision_stump_heuristic(&bestw, &root, &src, 0, src.n_features, wp, wn);
234  }
235 
236 
237  return bestw;
238 
239  }
240  else
241  {
242  std::cerr << "Error: no precompute bin. This oracle is not implemented now." << std::endl;
243  return 0.0;
244  }
245  }
246 };
247 
248 
249 #endif
A classifier composed by a Feature Extractor and an Evaluation Policy A "Second Level" classifier...
Definition: BinaryClassifier.h:38
int * category
category list
Definition: PrecomputedPatternResponse.h:48
this header contains PrecomputedPatternResponse class, used to store hash information provided by a m...
double * weights
weights list
Definition: PrecomputedPatternResponse.h:50
int number_of_bins
Quantization level.
Definition: PrecomputedPatternResponse.h:58
int * bin
precomputed bin matrix (rows features, columns samples)
Definition: PrecomputedPatternResponse.h:56
Definition: thread_group.h:82
abstracting thread
void join_all()
wait all threads terminate
Definition: thread_group.h:114
proposal 1 for thread group
int n_features
number of feature preallocated
Definition: PrecomputedPatternResponse.h:46
int n_samples
number of samples
Definition: PrecomputedPatternResponse.h:44
Definition: PrecomputedPatternResponse.h:41
int feature_index
the index of the feature used during the classification stage
Definition: IndirectBinaryClassifier.h:49
method to create function pointer for thread call
std::pair< ResponseType, ResponseType > * response_range
precomputed response ranges
Definition: PrecomputedPatternResponse.h:54
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
A classifier composed by an index of a feature and an Evaluation Policy.
Definition: IndirectBinaryClassifier.h:37
A Binary Classifier. Implements the class BinaryClassifier.
ResponseType * response
response matrix. For performance reason it each row is a feature and columns are samples ...
Definition: PrecomputedPatternResponse.h:52
Definition: PrecomputedBinaryClassifierOracle.h:45
Definition: PrecomputedBinaryClassifierOracle.h:50