X-Boost  2.3.8
IntegralChannelImage.h
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 #ifndef _INTEGRALCHANNELIMAGE_HELPER_H
21 #define _INTEGRALCHANNELIMAGE_HELPER_H
22 
23 #include "ClassifierDetectorHelper.h"
24 #include <factory/ObjectDetector.h>
25 
26 #include <map>
27 #include <sstream>
28 #include <string>
29 #include <iomanip>
30 
31 // DEBUG
32 #include "IO/pnmio.h"
33 
34 // ICHN is tested with 3 different policy to understand the best performance
35 
36 //#define PARALLEL_ICHN
37 
38 
40 #ifndef WIN32
41 template<>
42 #endif
43 template <class _Instance>
44 struct ClassifierDetectHelper<IntegralChannelImagePreprocessor, _Instance>: public ObjectDetectorWrapperBase<IntegralChannelImagePreprocessor, _Instance> {
45 private:
46 
47  typedef typename IntegralChannelImagePreprocessor::ReturnType data_type;
48 
49 
50 public:
51 
55  void scale_object_detector(std::vector<Candidate> * out, const ImageHandle *src, double s, int s_index)
56  {
57  data_type data;
58  detail::CandidateParam nms_params;
59  Image scaled_img;
60  ImageHandle working_img;
61 
62  // allocate the response image (up to width x height)
63  double *response = new double[src->width * src->height];
64 
65  // estimate maximum octave
66  int nOctave = 1;
67 
68  for(unsigned int n=0; n<this->m_classifs.size(); ++n)
69  {
70  int cl_width = this->m_clsparams[n].sz.width;
71  int cl_height = this->m_clsparams[n].sz.height;
72 
73  int nOctave_test = ( (int)log2(std::min(src->width/cl_width, src->height/cl_height)) ) + 1; // rounded
74  if(nOctave_test > nOctave)
75  nOctave = nOctave_test;
76  }
77 
78  // per ogni ottava:
79  for(int o=0; o<nOctave; ++o)
80  {
81  // size of resampled image
82  int width = src->width / s;
83  int height = src->height / s;
84 
85 #ifdef DEBUG_TIMING
86  if(o == 0)
87  this->timer_preprocess_scale[s_index].Start();
88  else
89  this->timer_preprocess_scale[s_index].Resume();
90 #endif
91 
92  /******* */
93 
94  /* questa parte calcola la working_img in base alle ottave e alle scale usando il metodo piu' efficiente disponibile */
95  if(o == 0)
96  {
97  // OTTAVA 0: scale source image
98 
99  if(width == (int) src->width && height == (int) src->height)
100  {
101  // no scaling
102 
103  scaled_img.alloc(src->width, src->height, 1); // reserve memory for subsequent octave
104  working_img = *src;
105  }
106  else
107  {
108  scaled_img.alloc(width, height, 1); // reserve memory for subsequent octave
109  // for octave=0 a bilinear resampling is used
110  BilinearResample(scaled_img, *src, rect(0,0, src->width, src->height) );
111 
112  working_img = scaled_img;
113  }
114 
115  }
116  else
117  {
118  // downsample working_img -> scaled
119  scaled_img.width = working_img.width/2;
120  scaled_img.height = working_img.height/2;
121  scaled_img.stride = scaled_img.width;
122  Downsample2X(scaled_img, working_img);
123  working_img = scaled_img;
124  }
125 
126 #ifdef _DEBUG_RESAMPLING
127  {
128  static int count = 0;
129  char buffer[512];
130  sprintf(buffer, "/tmp/test-%06u.pgm", count);
131  count++;
132  pnm_write(working_img, buffer);
133  }
134 #endif
135 
136  /* FINE */
137 
138  // compute the preprocessing image
139  IntegralChannelImagePreprocessor::Process(data, working_img.data, working_img.width, working_img.height, working_img.stride);
140 
141  /****** */
142 
143  // compute data from preprocessor
144  // ObjectDetectorWrapperBase<IntegralChannelImagePreprocessor, _Instance>::compute_scale(&data, *src, width, height);
145 #ifdef DEBUG_TIMING
146  if(s_index > MAX_SCALES)
147  s_index = MAX_SCALES;
148 
149  if(o!=nOctave-1)
150  this->timer_preprocess_scale[s_index].Pause();
151  else
152  this->timer_preprocess_scale[s_index].Stop();
153 #endif
154 
155 #ifdef DEBUG_TIMING
156  if(o == 0)
157  this->timer_response_scale[s_index].Start();
158  else
159  this->timer_response_scale[s_index].Resume();
160 #endif
161 
162  // compute response and NMS for each classifiers
163  for(unsigned int n=0; n<this->m_classifs.size(); ++n)
164  {
165  // geometry of the response image
166  int r_step = this->m_clsparams[n].downsampling;
167  int r_width = (width - this->m_clsparams[n].sz.width)/r_step;
168  int r_height = (height - this->m_clsparams[n].sz.height)/r_step;
169 
170  if(this->m_clsparams[n].enabled && r_width > 3 && r_height > 3)
171  {
172 
173  if(this->require_a_mask(n))
174  {
175  unsigned char *mask = new unsigned char [r_width * r_height];
176  rect roi;
177  this->prepare_mask(n, s, mask, r_width, r_height, this->m_clsparams[n].sz.width, this->m_clsparams[n].sz.height, roi);
178  /*
179  {
180  static int count = 0;
181  char buffer[512];
182  sprintf(buffer, "mask_%06u.pgm", count);
183  count++;
184  pgm_write(buffer, mask, r_width, r_height, 255);
185  }
186  */
187 
188  // TODO: ugly workaround:
189  for(int i =0; i<r_width * r_height; ++i) response[i] = -1.0;
190  ObjectDetectorWrapperBase<IntegralChannelImagePreprocessor, _Instance>::compute_masked_response(this->m_classifs[n], data, response, r_width, roi, mask, this->m_params.concurrent_jobs);
191  delete [] mask;
192  }
193  else
194  {
195  // proces only inside ROI
196  rect roi;
197  roi.x0 = 0;
198  roi.y0 = 0;
199  roi.x1 = r_width;
200  roi.y1 = r_height;
201 
202  // compute response
203  ObjectDetectorWrapperBase<IntegralChannelImagePreprocessor, _Instance>::compute_response(this->m_classifs[n], data, response, r_width, roi, this->m_params.concurrent_jobs);
204  }
205 
206 #ifdef DEBUG_TIMING
207  if(o!=nOctave-1)
208  this->timer_response_scale[s_index].Pause();
209  else
210  this->timer_response_scale[s_index].Stop();
211 #endif
212 
213  nms_params.scale = s;
214  nms_params.step = r_step;
215  nms_params.category = this->m_clsparams[n].category;
216  nms_params.cl_geom.width = this->m_clsparams[n].sz.width;
217  nms_params.cl_geom.height = this->m_clsparams[n].sz.height;
218 
219  unsigned int step = std::min(this->m_clsparams[n].sz.width, this->m_clsparams[n].sz.height)/(2 * r_step); // max 50% occlusion TODO
220  if(step<1) step =1;
221  /*
222  {
223  static int count = 0;
224 
225  double r_min = *std::min_element(response,response+r_width*r_height);//dbg
226  double r_max = *std::max_element(response,response+r_width*r_height);//dbg
227 
228  std::cout << "\tr_max " << r_max << " | r_min " << r_min;
229  // print RESPONSE to debug
230  Image test;
231  test.alloc(r_width, r_height, 3);
232  for(int j =0; j<r_height; ++j)
233  for(int i =0; i<r_width; ++i)
234  {
235  double r = response[i + j * r_width];
236  if(r>0.0)
237  {
238  unsigned char p = r * 255.0 / r_max;
239 
240  test.data[3*i + test.stride *j+0] = 0;
241  test.data[3*i + test.stride *j+1] = p;
242  test.data[3*i + test.stride *j+2] = 0;
243  }
244  else
245  {
246  unsigned char p = -r * 255.0 / r_min;
247 
248  test.data[3*i + test.stride *j+0] = 0;
249  test.data[3*i + test.stride *j+1] = 0;
250  test.data[3*i + test.stride *j+2] = p;
251  }
252  }
253  char buffer[256];
254  sprintf(buffer, "r%06u.ppm", count);
255  count++;
256  pnm_write(test, buffer);
257  }
258  */
259 
260 #ifdef DEBUG_TIMING
261  this->timer_nms_scale[s_index].Start();
262 #endif
263  // extract candidates. step ? (max on a step area)
264  NonMaximaSuppression(response, *out, step, r_width, r_width, r_height, this->m_clsparams[n].th, nms_params, 1); // no multi-thread
265 #ifdef DEBUG_TIMING
266  this->timer_nms_scale[s_index].Stop();
267 #endif
268 
269  } // r_width>3
270  } // for classifier
271 
272  data.first.release();
273 
274  s *= 2.0; // next octave
275  }
276 
277  delete [] response;
278  }
279 public:
280 
281  // bridge for preprocessor
283 
284 
285  // detect implementa le 3 varianti
286  void detect(std::vector<Candidate>& out, const ImageHandle & src) {
287 
288 #ifdef DEBUG_TIMING
289  this->timer_total.Start();
290 #endif
291 
292  data_type data_store;
293 
294  // scale factor
295  if(!this->m_params.octave_mode)
296  {
297  std::cerr << "Linear Mode is unimplemented for Ichn feature" << std::endl;
298  return;
299  }
300  double scale_factor = (this->m_params.octave_mode) ? ( exp( log(2.0) / this->m_params.nScales) ) : (1.0 / this->m_params.nScales);
301 
303 
304  // import image in the helper
305  // this->ImportImage(src);
306 #ifdef PARALLEL_ICHN
307  std::vector< std::vector<Candidate > > tmp_out;
308  tmp_out.resize( this->m_params.nScales );
309  sprint::thread_group thread_pool_;
310 #endif
311 
312  double s = 1.0;
313 
314  // iterate on "scales"
315  for(int scale=0; scale<this->m_params.nScales; scale++)
316  {
317 #ifdef PARALLEL_ICHN
318  // ogni thread gestisce una scala
319  thread_pool_.create_thread(sprint::thread_bind(&ClassifierDetectHelper<IntegralChannelImagePreprocessor, _Instance>::scale_object_detector,this, &tmp_out[scale], &src, s, scale));
320 #else
321  scale_object_detector(&out, &src, s, scale);
322 #endif
323  if(!this->m_params.octave_mode)
324  s+=scale_factor;
325  else
326  s*=scale_factor;
327  }
328 
329 #ifdef PARALLEL_ICHN
330  thread_pool_.join_all();
331  for(int scale=0; scale<this->m_params.nScales; scale++)
332  out.insert(out.end(), tmp_out[scale].begin(), tmp_out[scale].end());
333 #endif
334 #ifdef DEBUG_TIMING
335  this->timer_total.Stop();
336 #endif
337  }
338 };
339 
340 #endif
some common method that can be used to inner detector
Definition: ClassifierDetectorHelper.h:84
PNM files I/O.
size cl_geom
classifier geometry
Definition: Candidate.h:39
parameters used in the NMS step
Definition: Candidate.h:33
Definition: Image.h:35
a structure to hold image data (memory)
Definition: Image.h:74
void Downsample2X(ImageHandle &out, const ImageHandle &in)
Downsample (a factor of 2) the whole image.
std::pair< DataType, ParamType > ReturnType
Data provided by this preprocessor.
Definition: IntegralChannelImagePreprocessor.h:101
int step
response step. Multiplication factor to convert from (x,y) to box coordinates
Definition: Candidate.h:37
bool pnm_write(const ImageHandle &in, const char *file)
int category
category
Definition: Candidate.h:41
Definition: ClassifierDetectorHelper.h:528
bool Process(std::pair< IntegralImageData, IntegralChannelImageParams > &out, const T *image, unsigned int width, unsigned int height, long stride) const
compute the ICH
Definition: IntegralChannelImagePreprocessor.h:215
Definition: thread_group.h:82
static void compute_response(const _Instance &inst, const data_type &data, double *response, int stride, const rect &roi, int nThread)
Definition: ClassifierDetectorHelper.h:324
void join_all()
wait all threads terminate
Definition: thread_group.h:114
Definition: IntegralChannelImagePreprocessor.h:70
void scale_object_detector(std::vector< Candidate > *out, const ImageHandle *src, double s, int s_index)
Definition: IntegralChannelImage.h:55
long stride
line stride, the delta offset, in bytes, between two different scanline
Definition: Image.h:41
void BilinearResample(ImageHandle &out, const ImageHandle &in, const rect &area)
Crop and Resample an Image using Bilinear Interpolation algorithm.
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
virtual classes to work on classifier. ObjectDetector exploits all the performance of classifier...
unsigned int width
image geometry
Definition: Image.h:39
a rectangle structure
Definition: Types.h:55
unsigned char * data
initial address of the first pixel. It must be cast to correct format (uint8, uint16, rgb, etc etc)
Definition: Image.h:43
void NonMaximaSuppression(const D *src, FeatureListType &maxima, unsigned int n, long stride, unsigned int width, unsigned int height, D threshold, Param param, int nThreads=sprint::thread::hardware_concurrency())
Definition: NonMaximaSuppression.h:167
void alloc(unsigned int w, unsigned int h, unsigned int b)
reserve memory for the image
Definition: Image.h:98
float scale
detection scale. Multiplication factor to convert from (x,y) and size to box coordinates ...
Definition: Candidate.h:35