X-Boost  2.3.8
BootStrapProcedure.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _BOOTSTRAP_PROCEDURE_H
22 #define _BOOTSTRAP_PROCEDURE_H
23 
24 #include "NonMaximaSuppression.h"
25 #include "Candidate.h"
26 #include "IO/datasetin.h"
27 #include "Utility/ResponseUtils.h"
28 
29 #ifdef _MULTITHREAD
30 # include "Thread/thread.h"
31 # include "Thread/thread_group.h"
32 # include "Thread/bind.h"
33 # include "Thread/ref.h"
34 #endif // #ifdef _MULTITHREADING
35 
36 
41 struct BootStrapParams {
44  float scale_factor;
48  int nms_step;
52  double thMin;
53 
55 };
56 
58 extern int frameidx;
59 
68 template<class SourceDatasetType, class ClassifierType>
69 bool BootStrapProcedure( SourceDatasetType & training_set, datasetin & in, const ClassifierType& cl, const BootStrapParams & params, int num_thread)
70 {
71  int new_pattern = 0;
72  datasetitem item;
73  Timer t_elab;
74  size sz;
75  sz.width = training_set.width;
76  sz.height = training_set.height;
77 
78  while(in.next(item))
79  {
80  Image img;
81 
82 #ifdef LIMIT_PER_FRAME
83  // store
84  std::vector< ImageHandle > image_scales;
85  std::vector< Candidate > candidates;
86 #endif
87 
88  // processo per il bootstrap solo le immagini con auto_negative
89  if(item.auto_negative && pnm_load(item.filename.c_str(), img))
90  {
91 
92  int src_width = img.width;
93  int src_height = img.height;
94  std::cout << "Testing classifier on " << item.filename << " (" << src_width << 'x' << src_height << ") with step " << params.downsampling << " [ID:" << frameidx << "]" << std::endl;
95  std::vector<datasetobject> list = item.object;
96  float scale = 1.0f;
97  ImageResampler rsmp;
98  bool first_scale = true;
99  int n_negatives = 0;
100  Timer t0;
101 
102  t0.Start();
103 
104  // iterate for all the octaves (it is a slightly different code compared to the one in ObjectDetector)
105  for(;;) {
106  // the response is smaller than img because the window outside image are not considered
107  int w_r = ((int) img.width - (int) sz.width)/params.downsampling;
108  int h_r = ((int) img.height - (int) sz.height)/params.downsampling;
109 
110  std::cout << "\t" << img.width << 'x' << img.height << " (" << w_r << 'x' << h_r << ")... ";
111  std::cout.flush();
112 
113  std::vector<Candidate> elements;
114  elements.reserve(params.negative_random_samples);
115  typename SourceDatasetType::ReturnType Out;
116 
117  // terminate check (if 3x3 image is provided)
118  if(h_r < 3 || w_r < 3)
119  {
120  std::cout << "\tWarn: response image too small\n";
121  break;
122  }
123 
124 
125  double* response = new double[w_r*h_r];
126 
127  Timer t;
128 
129  t.Start();
130 
131  // precompute the preprocess image
132  training_set.Process(Out, img.data, img.width, img.height, img.stride);
133 
134  // create the optimized version for the classifier:
135 
136  typename ClassifierType::OptimizedType _Inst(cl, 0, Out.second);
137 
138  double r_max,r_min;
139 
140 #ifdef _MULTITHREAD
141  if(num_thread > 1)
142  {
143  sprint::thread_group thread_pool_;
144  for(int ii=0; ii<num_thread; ii++)
145  {
146  int startH = (ii*h_r)/num_thread;
147  int endH = ((ii+1)*h_r)/num_thread;
148 
149  // about 20% increase in performances
150  // thread_pool_.create_thread(sprint::thread_bind(&workerResponse<typename SourceDatasetType::ReturnType,ClassifierType>,response,startH,endH,w_r, &cl, &Out ));
151  thread_pool_.create_thread(sprint::thread_bind(&optimizedWorkerResponse<typename SourceDatasetType::ReturnType, typename ClassifierType::OptimizedType>,response,startH,endH,w_r, sprint::c_ref(_Inst), sprint::c_ref(Out), params.downsampling ));
152  }
153 
154  thread_pool_.join_all();
155  }
156  else
157 #endif
158  {
159  // generate the response image
160  // about 20% increase in performances
161  // workerResponse(response, 0, h_r, w_r, &cl, &Out);
162  optimizedWorkerResponse(response, 0, h_r, w_r, _Inst, Out, params.downsampling);
163  }
164 
165  double dt = t.GetTime();
166 
167  r_min = *std::min_element(response,response+w_r*h_r);//dbg
168  r_max = *std::max_element(response,response+w_r*h_r);//dbg
169 
170  std::cout << "\tr_max " << r_max << " | r_min " << r_min;
171  std::cout.flush();
172  // print RESPONSE to debug
173  /* {
174  Image test;
175  test.alloc(w_r, h_r, 3);
176  for(int j =0;j<h_r;++j)
177  for(int i =0;i<w_r;++i)
178  {
179  double r = response[i + j * w_r];
180  if(r>0.0)
181  {
182  unsigned char p = r * 255.0 / r_max;
183 
184  test.data[3*i + test.stride *j+0] = 0;
185  test.data[3*i + test.stride *j+1] = p;
186  test.data[3*i + test.stride *j+2] = 0;
187  }
188  else
189  {
190  unsigned char p = -r * 255.0 / r_min;
191 
192  test.data[3*i + test.stride *j+0] = 0;
193  test.data[3*i + test.stride *j+1] = 0;
194  test.data[3*i + test.stride *j+2] = p;
195  }
196  }
197  static int count = 0;
198  char buffer[256];
199  sprintf(buffer, "/tmp/r%06u.ppm", count);
200  count++;
201  pnm_write(test, buffer);
202  } */
203 
204  std::vector<Candidate> tmpOut;
205 
207 
208  param.cl_geom.width = training_set.width;
209  param.cl_geom.height = training_set.height;
210  param.scale = 1.0; // image is extracted by downsampled directly and not from original size image
211  param.step = params.downsampling; // downsampling factor of response image
212  param.category = 0; // unused
213 
214  // search non overlapping box TODO: this could be a parameter
215  int step = params.nms_step / params.downsampling;
216  NonMaximaSuppression(response, tmpOut, step, w_r, w_r, h_r, params.thMin, param, num_thread);
217 
218  std::cout << " | detected: "<< tmpOut.size() << " in " << dt << "s";
219  std::cout.flush();
220 
221  // TODO: check existance of NEGATIVE AREAs
222  /* {
223  static int count = 0;
224  char buffer[256];
225  Image out;
226  out.clone(img);
227  for(int i=0; i<tmpOut.size(); i++)
228  DrawRect(out, rect( tmpOut[i].roi.x0, tmpOut[i].roi.y0, tmpOut[i].roi.x1-1, tmpOut[i].roi.y1-1), 255);
229  sprintf(buffer, "/tmp/test%06u.pgm", count);
230  count++;
231  pnm_write(out, buffer);
232  } */
233 
234  for(std::vector<Candidate>::const_iterator i = tmpOut.begin(); i != tmpOut.end(); ++i)
235  {
236  bool is_overlap = false;
237  // puo' solo overlappare dei negativi (non dovrebbero essercene comunque)
238  for(std::vector<datasetobject>::const_iterator j = list.begin(); j != list.end(); ++j)
239  {
240  if(j->category >= 0 && overlap(i->box, j->roi))
241  {
242  is_overlap = true;
243  break;
244  }
245  }
246 
247  if(!is_overlap)
248  {
249  elements.push_back(*i);
250 
251  }
252  }
253 
254 
255  // this code extract at least params.negative_random_samples elements per each scale
256 #ifndef LIMIT_PER_FRAME
257  std::cout << " | " << elements.size() << "/" << params.negative_random_samples <<" false positives found" << std::endl;
258 
259 
260  // SORT (TODO or random sampling?)
261  if((int) elements.size() > params.negative_random_samples)
262  {
263  std::sort(elements.begin(), elements.end()); // sort from higher to lower
264  elements.resize(params.negative_random_samples); // erase
265  }
266 
267  for(std::vector<Candidate>::const_iterator i = elements.begin(); i != elements.end(); ++i)
268  {
269  training_set.ImportImage(img.crop(i->box),-1);
270  n_negatives++;
271  }
272 
273  new_pattern += elements.size();
274 #else
275 
276  // this code extract at least params.negative_random_samples elements per each frame
277  std::cout << " | " << elements.size() << " false positives found" << std::endl;
278 
279  // crop all elements in candidates and store response and image, for further processing
280  for(std::vector<Candidate>::const_iterator i = elements.begin(); i != elements.end(); ++i)
281  {
282  std::pair<float, Image *> d;
283  d.first = i->response;
284  d.second = new Image;
285  d.second->clone( img.crop(i->roi) );
286  candidates.push_back(d);
287  }
288 
289 #endif
290 
291  delete [] response;
292  Out.first.release(); // manually release preprocessed data
293 
294  scale *= params.scale_factor;
295 
296  // size of the response image for the next scale:
297  w_r = (int(src_width/scale) - (int) sz.width) /params.downsampling;
298  h_r = (int(src_height/scale) - (int) sz.height)/params.downsampling;
299 
300  // it is possible to search for additional scale?
301  // at least 4x4 response image need to be computed to make any sense
302  if(w_r > 3 && h_r > 3)
303  {
304 
305  if(first_scale)
306  {
307  rsmp.ImportImage(img);
308  first_scale = false;
309  }
310 
311  Image out;
312  out.alloc( (int) ((float)src_width/scale), (int) ((float)src_height/scale), 1);
313  rsmp.ExportImage(out);
314 
315  for(unsigned int i=0; i<list.size(); ++i)
316  { list[i].roi.x0 = floor((float)item.object[i].roi.x0 / scale);
317  list[i].roi.y0 = floor((float)item.object[i].roi.y0 / scale);
318  list[i].roi.x1 = ceil((float)item.object[i].roi.x1 / scale);
319  list[i].roi.y1 = ceil((float)item.object[i].roi.y1 / scale);
320  }
321 
322  std::swap(img, out);
323 
324  /* {
325  static int count = 0;
326  char buffer[256];
327  sprintf(buffer, "/tmp/downsample%06u.pgm", count);
328  count++;
329  pnm_write(img, buffer);
330  } */
331 
332  }
333  else
334  break;
335  }
336 
337 #ifdef LIMIT_PER_FRAME
338  std::cout << "\t" << candidates.size() << "/" << params.negative_random_samples <<" total false positives found" << std::endl;
339 
340  // TODO: GroupRectangle?
341 
342  if(candidates.size() > params.negative_random_samples)
343  {
344  std::sort(candidates.begin(), candidates.end());
345  }
346 
347  int n = 0;
348  for(std::vector<std::pair<float, Image *> >::const_iterator i = candidates.begin(); i != candidates.end(); ++i)
349  {
350  if(n<params.negative_random_samples)
351  training_set.ImportImage(*i->second,-1);
352 
353  delete i->second; // release pointer and associated memory
354  ++n;
355  }
356 
357  new_pattern += candidates.size();
358 #else
359  std::cout << "\tFrame processed in " << t0.GetTime() << "s. " << n_negatives << " total new negatives imported.\n";
360 #endif
361 
362 
363  frameidx++;
364  }
365  }
366 
367  std::cout << "Bootstrap completed. " << new_pattern << " added to the negative pool.\n";
368 
369  return new_pattern == 0;
370 }
371 
372 #endif
void clone(const ImageHandle &src)
clone an image
Definition: Image.h:104
int frameidx
a counter, used for debug
Definition: timer.h:84
bool pnm_load(const char *file, Image &out)
bool BootStrapProcedure(SourceDatasetType &training_set, datasetin &in, const ClassifierType &cl, const BootStrapParams &params, int num_thread)
Definition: BootStrapProcedure.h:69
size cl_geom
classifier geometry
Definition: Candidate.h:39
int downsampling
downsampling factor for computing response
Definition: BootStrapProcedure.h:50
parameters used in the NMS step
Definition: Candidate.h:33
a virtual pure pattern list reader
Definition: datasetin.h:56
a structure to hold image data (memory)
Definition: Image.h:74
Implement a local maxima search algorithm.
ImageHandle crop(int x0, int y0, int x1, int y1) const
return a subpart of the image (without copy)
Definition: Image.h:47
void optimizedWorkerResponse(double *r, int startH, int endH, int W, const ClassifierType &cl, const ReturnType &out, int step)
Definition: ResponseUtils.h:59
reference ref class
int step
response step. Multiplication factor to convert from (x,y) to box coordinates
Definition: Candidate.h:37
ClassifierType
Definition: Types.h:31
int negative_random_samples
max number of negative extract per frame or per scale
Definition: BootStrapProcedure.h:46
int category
category
Definition: Candidate.h:41
float scale_factor
scale factor between two images
Definition: BootStrapProcedure.h:44
Parameters for BootStrapProcedure.
Definition: BootStrapProcedure.h:42
double thMin
threshold
Definition: BootStrapProcedure.h:52
Definition: thread_group.h:82
image/size TODO namespace
Definition: Types.h:39
abstracting thread
void join_all()
wait all threads terminate
Definition: thread_group.h:114
proposal 1 for thread group
helping method used during resposne computations
long stride
line stride, the delta offset, in bytes, between two different scanline
Definition: Image.h:41
int nms_step
non maxima suppression step
Definition: BootStrapProcedure.h:48
method to create function pointer for thread call
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
an item used for positive
Definition: datasetin.h:44
unsigned int width
image geometry
Definition: Image.h:39
std::vector< datasetobject > object
List of object.
Definition: datasetin.h:52
bool overlap(const rect &a, const rect &b)
test if 2 rect are overlapped
virtual class to import images
std::string filename
filename
Definition: datasetin.h:46
Definition: ImageUtils.h:35
Candidate object.
unsigned char * data
initial address of the first pixel. It must be cast to correct format (uint8, uint16, rgb, etc etc)
Definition: Image.h:43
void NonMaximaSuppression(const D *src, FeatureListType &maxima, unsigned int n, long stride, unsigned int width, unsigned int height, D threshold, Param param, int nThreads=sprint::thread::hardware_concurrency())
Definition: NonMaximaSuppression.h:167
bool auto_negative
if the rest of the blob can be used as source for negative
Definition: datasetin.h:50
void alloc(unsigned int w, unsigned int h, unsigned int b)
reserve memory for the image
Definition: Image.h:98
float scale
detection scale. Multiplication factor to convert from (x,y) and size to box coordinates ...
Definition: Candidate.h:35