X-Boost  2.3.8
Test.h
Go to the documentation of this file.
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _XBOOST_TEST_H
22 #define _XBOOST_TEST_H
23 
27 #include <set>
28 #include <iosfwd>
29 
30 #ifdef _MULTITHREAD
31 # include "Thread/thread_group.h"
32 # include "Thread/bind.h"
33 #endif
34 
35 #include <iostream>
36 
39 struct ReportTest {
40  int TP,TN,FP,FN;
41 
42  double pmargin, nmargin;
43 
44  inline double Fpr() const {
45  return double(FP) / double(TN + FP);
46  }
47  inline double ErrorRate() const {
48  return double(FN+FP) / double(TP+TN+FN+FP);
49  }
51  inline double Recall() const {
52  return double(TP) / double(TP + FN);
53  }
54  inline double Precision() const {
55  return double(TP)/double(FP+TP);
56  }
57  inline double Accuracy() const {
58  return double(TP+TN)/double(TP+TN+FN+FP);
59  }
60  inline double Margin() const {
61  return pmargin - nmargin;
62  }
63 
64  void print(std::ostream & out)
65  {
66  out << "TP:" << TP << ", TN:" << TN << ", FN:" << FN <<", FP:" << FP << '\n';
67  out << "Correct Detection: " << TP+TN << " (" << (100*(TP+TN))/(TP+TN+FN+FP) <<"%), Errors: " << FN+FP << '\n';
68  out << "Accuracy: " << Accuracy()
69  << " Error Rate: " << ErrorRate()
70  << " Precision: " << Precision()
71  << " Recall: " << Recall() << std::endl;
72 
73  }
74 
75 
76 
77 };
78 
80 template<class ClassifierType, class TrainingSet>
81 void DumpRoc(std::ostream & out, const ClassifierType & classifier, const TrainingSet & train_set)
82 {
83  std::set<double> th;
84  int n = train_set.Size();
85  double *response = new double [n];
86 
87 
88  for(int i =0; i<n; ++i)
89  {
90  response[i] = classifier.response( train_set.templates[i] );
91  th.insert(response[i]);
92 // std::cerr << response[i] << ' ' << train_set.templates[i].category << std::endl;
93  }
94 
95  for( std::set<double>::const_iterator j = th.begin(); j != th.end(); ++j)
96  {
97  int TP, TN, FP, FN;
98  TP = TN = 0;
99  FP = FN = 0;
100  for(int i =0; i<n; ++i)
101  {
102  int test = ((response[i] > (*j)) ? 1 : -1);
103 
104  if( train_set.templates[i].category == 1)
105  {
106  if(test == 1) TP++;
107  else FN++;
108  }
109  else
110  {
111  if(test == 1) FP++;
112  else TN++;
113  }
114 
115  }
116  // [Threshold] [TP] [TN] [FP] [FN] [Recall/TPR] [FPR] [Precision]
117  out << (*j) << '\t' << TP << '\t' << TN << '\t' << FP << '\t' << FN << '\t' << (double)TP/(double)(TP+FN) << '\t' << (double)FP/(double)(TN+FP) <<'\t' << (double)TP/(double)(TP+FP) <<'\n';
118  }
119 
120  delete [] response;
121 }
122 
124 template<class ClassifierType, class TrainingSet>
126 void TestSimple(const ClassifierType & classifier, const TrainingSet & train_set, double th = 0.0)
127 {
128  int n = train_set.Size();
129  int TP, TN, FP, FN;
130  TP = TN = 0;
131  FP = FN = 0;
132 
133  for(int i =0; i<n; ++i)
134  {
135  double response = classifier.response( train_set.templates[i] );
136  int test = ((response > th) ? 1 : -1);
137 
138  if( train_set.templates[i].category == 1)
139  {
140  if(test == 1) TP++;
141  else FN++;
142  }
143  else
144  {
145  if(test == 1) FP++;
146  else TN++;
147  }
148 
149  }
150 
151  std::cout <<"TP: " <<TP << " | TN: " <<TN << " | FP: " << FP << " | FN: " << FN << " | TPR: " << (double)TP/(double)(TP+FN) << " | FPR: " << (double)FP/(double)(TN+FP) << " | Precision: " << (double)TP/(double)(TP+FP) << std::endl;
152 }
153 
155 template<class DataType, class ClassifierType>
156 bool Test(const ClassifierType & c, const DataType & data, double threshold = 0.0)
157 {
158  unsigned int cd[2],er[2];
159 // compute margin
160 // pmargin contiene il valore del piu' piccolo dei positivi, nmargin il valore del piu' grande dei negativi
161  double pmargin = 1000000000.0; // a large positive number
162  double nmargin = -1000000000.0; // a large negative number
163  cd[0] = er[0] = cd[1] = er[1] = 0;
164 
165  double pavg,navg;
166  pavg=navg = 0.0;
167 
168  double *r = new double[ data.Size() ];
169 
170  for(unsigned int i = 0; i<data.templates.size(); i++)
171  {
172  // m_templates[i].data punta al byte -1,-1
173  double ret = c( getData1(data.templates[i], data), getData2(data.templates[i], data) );
174  r[i] = ret;
175 
176  if(data.templates[i].category == 1)
177  {
178  // gt is positive
179  pavg += ret;
180 
181  if(ret < pmargin)
182  pmargin = ret;
183 
184  if(ret>threshold)
185  {
186  cd[0]++;
187  }
188  else
189  {
190  er[0]++;
191  }
192  }
193  else
194  {
195  // gt is negative
196  navg += ret;
197  if(ret > nmargin)
198  nmargin = ret;
199  if(ret<threshold)
200  {
201  cd[1]++;
202  }
203  else
204  {
205  er[1]++;
206  }
207  }
208  }
209 
210  pavg /= (double) (cd[0]+er[0]);
211  navg /= (double) (cd[1]+er[1]);
212 
213  std::cout << "TP:" << cd[0] << ", TN:" << cd[1] << ", FN:" << er[0] <<", FP:" << er[1] << '\n';
214  std::cout << "Correct Detection: " << cd[0]+cd[1] << " (" << (100*(cd[0]+cd[1]))/(cd[0]+cd[1]+er[0]+er[1]) <<"%), Errors: " << er[0]+er[1] << '\n';
215  std::cout << "Accuracy: " << (float)(cd[0]+cd[1])/(float)(cd[0]+cd[1]+er[0]+er[1])
216  << " Error Rate: " << (float)(er[0]+er[1])/(float)(cd[0]+cd[1]+er[0]+er[1])
217  << " Precision: " << (float)(cd[0])/(float)(er[1]+cd[0])
218  << " Recall: " << (float)(cd[0])/(float)(er[0]+cd[0]) << std::endl;
219 
220  std::cout << "Average: " << pavg << "(+) " << navg << "(-)\n";
221 
222  if(pmargin>nmargin)
223  std::cout << "Margin: " << pmargin-nmargin << '/' << 2*c.max_response() << std::endl;
224  else
225  {
226  int pm, nm;
227  pm = nm = 0;
228  for(unsigned int i = 0; i<data.templates.size(); i++)
229  {
230  if(data.templates[i].category == 1 && r[i]<nmargin)
231  pm++;
232  if(data.templates[i].category == -1 && r[i]>pmargin)
233  nm++;
234  }
235 
236  std::cout << "No separation: " << pmargin-nmargin << '/' << 2*c.max_response() << " (" << pm+nm << " pattern inside margin: " << pm << " +," << nm << " -)" << std::endl;
237  }
238 
239  delete [] r;
240 
241  return (er[0]+er[1])==0; // no error
242 }
243 
244 namespace detail {
245 
248 template<class DataType, class ClassifierType>
249 void compute_response(double *r, const ClassifierType * c, const DataType * data, int s0, int s1)
250 {
251  for(int i = s0; i<s1; i++)
252  {
253  // m_templates[i].data punta al byte -1,-1
254  double ret = (*c)( getData1(data->templates[i], *data), getData2(data->templates[i], *data) );
255  r[i] = ret;
256  }
257 }
258 }
259 
260 
264 template<class DataType, class ClassifierType>
265 ReportTest TestAndExportStat(const ClassifierType & c, const DataType & data, double threshold, int max_concurrent_jobs)
266 {
267  unsigned int cd[2],er[2];
268  ReportTest report;
269 // compute margin
270 // pmargin contiene il valore del piu' piccolo dei positivi, nmargin il valore del piu' grande dei negativi
271  double pmargin = 1000000000.0; // a large positive number
272  double nmargin = -1000000000.0; // a large negative number
273  cd[0] = er[0] = cd[1] = er[1] = 0;
274 
275  double pavg,navg;
276  pavg=navg = 0.0;
277 
278  int n_samples = data.Size();
279 
280  // store the response to count the number of pattern inside the margin
281  double *r = new double[ n_samples ];
282 
283  // TODO: estimate the number of samples useful
284 #ifdef _MULTITHREAD
285  if(max_concurrent_jobs > 1)
286  {
287  sprint::thread_group thread_pool_;
288 
289  for(int k=0; k<max_concurrent_jobs; ++k)
290  {
291  int s0 = (k * n_samples) / max_concurrent_jobs;
292  int s1 = ((k+1) * n_samples) / max_concurrent_jobs;
293 
294  thread_pool_.create_thread(sprint::thread_bind(&detail::compute_response<DataType,ClassifierType>, r, &c, &data, s0, s1));
295 
296  }
297  thread_pool_.join_all();
298 
299  for(int i = 0; i<n_samples; i++)
300  {
301  double ret = r[i];
302 
303  if(data.templates[i].category == 1)
304  {
305  // gt is positive
306  pavg += ret;
307 
308  if(ret < pmargin)
309  pmargin = ret;
310 
311  if(ret>threshold)
312  {
313  cd[0]++;
314  }
315  else
316  {
317  er[0]++;
318  }
319  }
320  else
321  {
322  // gt is negative
323  navg += ret;
324  if(ret > nmargin)
325  nmargin = ret;
326  if(ret<threshold)
327  {
328  cd[1]++;
329  }
330  else
331  {
332  er[1]++;
333  }
334  }
335 
336  }
337  }
338  else
339 #endif // #ifdef _MULTITHREAD
340  {
341 
342  for(int i = 0; i<n_samples; i++)
343  {
344  // m_templates[i].data punta al byte -1,-1
345  double ret = c( getData1(data.templates[i], data), getData2(data.templates[i], data) );
346  r[i] = ret;
347 
348  if(data.templates[i].category == 1)
349  {
350  // gt is positive
351  pavg += ret;
352 
353  if(ret < pmargin)
354  pmargin = ret;
355 
356  if(ret>threshold)
357  {
358  cd[0]++;
359  }
360  else
361  {
362  er[0]++;
363  }
364  }
365  else
366  {
367  // gt is negative
368  navg += ret;
369  if(ret > nmargin)
370  nmargin = ret;
371  if(ret<threshold)
372  {
373  cd[1]++;
374  }
375  else
376  {
377  er[1]++;
378  }
379  }
380  }
381 
382  }
383 
384  pavg /= (double) (cd[0]+er[0]);
385  navg /= (double) (cd[1]+er[1]);
386  report.TP = cd[0];
387  report.TN = cd[1];
388  report.FN = er[0];
389  report.FP = er[1];
390 
391  report.pmargin = pmargin;
392  report.nmargin = nmargin;
393 
394  report.print(std::cout);
395  std::cout << "Average: " << pavg << "(+) " << navg << "(-)\n";
396 
397  if(pmargin>nmargin)
398  std::cout << "Margin: " << pmargin-nmargin << '/' << 2*c.max_response() << std::endl;
399  else
400  {
401  int pm, nm;
402  pm = nm = 0;
403  for(unsigned int i = 0; i<data.templates.size(); i++)
404  {
405  if(data.templates[i].category == 1 && r[i]<nmargin)
406  pm++;
407  if(data.templates[i].category == -1 && r[i]>pmargin)
408  nm++;
409  }
410 
411  std::cout << "No separation: " << pmargin-nmargin << '/' << 2*c.max_response() << " (" << pm+nm << " pattern inside margin: " << pm << " +," << nm << " -)" << std::endl;
412  }
413 
414  delete [] r;
415  return report;
416 }
417 
421 ReportTest ExportStat(const double *r, const int *category, int n_samples, double threshold);
422 
423 #endif
void DumpRoc(std::ostream &out, const ClassifierType &classifier, const TrainingSet &train_set)
TODO Va bene per chi ha response Template.
Definition: Test.h:81
ReportTest ExportStat(const double *r, const int *category, int n_samples, double threshold)
void TestSimple(const ClassifierType &classifier, const TrainingSet &train_set, double th=0.0)
TODO Va bene per chi ha response Template.
Definition: Test.h:126
bool Test(const ClassifierType &c, const DataType &data, double threshold=0.0)
Definition: Test.h:156
ClassifierType
Definition: Types.h:31
Definition: thread_group.h:82
void join_all()
wait all threads terminate
Definition: thread_group.h:114
proposal 1 for thread group
double Recall() const
Recall/TruePositiveRate.
Definition: Test.h:51
method to create function pointer for thread call
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
ReportTest TestAndExportStat(const ClassifierType &c, const DataType &data, double threshold, int max_concurrent_jobs)
Definition: Test.h:265
Definition: Test.h:39