21 #ifndef _SOFTCASCADE_TRAINER_H
22 #define _SOFTCASCADE_TRAINER_H
37 #include "Classifier/SoftCascade.h"
40 void computeRejectionDistribution(std::vector<double> & v,
double alpha,
double r,
int n)
46 for(
int i =0; i<n; ++i)
48 double tau = (double)i/(
double)n;
49 double y = (alpha>=0.0) ? exp( alpha * tau) : exp ( -alpha * (1.0 - tau) );
54 for(
int i =0; i<n; ++i)
67 template<
class DataSetType>
68 double findBestRejectionThreshold(
const DataSetType & data,
const std::vector<double> & response,
int target)
70 std::map<double, int> h;
71 double psum = 0.0, nsum = 0.0;
72 int pcnt = 0, ncnt = 0;
80 for(
unsigned int i =0; i<data.templates.size(); ++i)
82 double v = response[i];
83 if(data.templates[i].category == 1)
100 std::cout <<
"\tPositive Response Average = " << psum / (double) pcnt <<
" | Negative Response Average = " << nsum / (
double) ncnt <<
'\n';
101 std::cout <<
"\tResponses range from " << h.begin()->first <<
" to " << h.rbegin()->first << std::endl;
104 for(std::map<double, int>::const_reverse_iterator i = h.rbegin(); i != h.rend(); ++i)
110 double th1 = i->first;
114 return th1 - std::abs(th1) * 0.01;
116 double th2 = i->first;
118 std::cout <<
"\tThreshold found: " << th1 <<
" | accum: " << accum <<
" | count: " << count << std::endl;
119 return (th1 + th2)/2.0;
124 return h.begin()->first - std::abs(h.begin()->first) * 0.01;
130 template<
class DataSetType>
131 double findMinRejectionThreshold(
const DataSetType & data,
const std::vector<double> & response)
133 double vmin = 100000000000000.0;
136 for(
unsigned int i =0; i<data.templates.size(); ++i)
138 double v = response[i];
139 if(data.templates[i].category == 1)
146 return vmin - 0.00001;
177 template<
class DataSetType>
178 void rejectSamples(DataSetType & data, std::vector<double> & response,
double th)
181 std::vector<typename DataSetType::PatternType> newList;
182 std::vector<double> newResponse;
184 newResponse.reserve(response.size());
185 newList.reserve(data.templates.size());
187 for(
unsigned int i = 0; i<data.templates.size(); i++)
191 if(data.templates[i].category==1)
200 newList.push_back(data.templates[i]);
201 newResponse.push_back(response[i]);
206 std::swap(data.templates, newList);
207 std::swap(response, newResponse);
214 template<
class DataSetType>
215 void rejectPositiveSamples(DataSetType & data,
const std::vector<double> & response,
double th)
217 std::vector<typename DataSetType::PatternType> newList;
218 newList.reserve(data.templates.size());
220 for(
unsigned int i = 0; i<data.templates.size(); i++)
222 if((response[i]>=th) || (data.templates[i].category==-1))
223 newList.push_back(data.templates[i]);
226 std::swap(data.templates, newList);
232 template<
class DataSetType>
233 double computeEdge(
const DataSetType & data,
const std::vector<double> & response)
239 for(
unsigned int i = 0; i<data.templates.size(); i++)
241 if(data.templates[i].category == 1)
254 return (pos/npos) - (neg/nneg);
262 inline double quad(
double x)
268 template<
class DataSetType>
269 double computeEdge2(
const DataSetType & data,
const std::vector<double> & response)
277 for(
unsigned int i = 0; i<data.templates.size(); i++)
279 if(data.templates[i].category == 1)
282 pos2 += response[i]*response[i];
288 neg2 += response[i]*response[i];
313 double sigma = std::sqrt( (pos2 + neg2 - pos*pos*npos - neg*neg*nneg) / (
double) data.templates.size() );
315 return (pos - neg) / sigma;
324 template<
class Classifier,
class DataSetType>
325 void inner_compute_raw_response(
const DataSetType & data,
double* response,
const Classifier & classifier,
int s0,
int s1)
327 for(
int i = s0; i<s1; i++)
329 response[i] = classifier.raw(getData1(data.templates[i],data),getData2(data.templates[i],data));
333 template<
class Classifier,
class DataSetType>
334 void inner_compute_response(
const DataSetType & data,
double * response,
const Classifier & classifier,
int s0,
int s1)
336 for(
int i = s0; i<s1; i++)
338 response[i] = classifier(getData1(data.templates[i],data),getData2(data.templates[i],data));
342 template<
class Classifier,
class DataSetType>
343 void inner_update_response(
const DataSetType & data,
double * response,
const Classifier & classifier,
int s0,
int s1)
345 for(
int i = s0; i<s1; i++)
347 response[i] += classifier(getData1(data.templates[i],data),getData2(data.templates[i],data));
365 template<
class WeakClassifier,
class DataSetType>
368 response.resize( data.Size() );
371 if(max_concurrent_jobs>1)
374 int n_jobs = data.Size();
375 for(
int ii=0; ii<max_concurrent_jobs; ii++)
377 int s0 = (ii*n_jobs)/max_concurrent_jobs;
378 int s1 = ((ii+1)*n_jobs)/max_concurrent_jobs;
388 inner_compute_raw_response(data, &response[0], classifier, 0, data.Size());
395 template<
class WeakClassifier,
class DataSetType>
396 static void computeResponse(
const DataSetType & data, std::vector<double> & response,
const BoostClassifier<WeakClassifier> & classifier,
int max_concurrent_jobs)
398 response.resize( data.Size() );
401 if(max_concurrent_jobs>1)
404 int n_jobs = data.Size();
405 for(
int ii=0; ii<max_concurrent_jobs; ii++)
407 int s0 = (ii*n_jobs)/max_concurrent_jobs;
408 int s1 = ((ii+1)*n_jobs)/max_concurrent_jobs;
418 inner_compute_response(data, &response[0], classifier, 0, data.Size());
424 template<
class DataSetType>
425 static void resetResponse(
const DataSetType & data, std::vector<double> & response)
427 response.resize( data.Size() );
429 for(
int i = 0; i<data.templates.size(); i++)
437 template<
class WeakClassifier,
class DataSetType>
438 static void updateResponse(
const DataSetType & data, std::vector<double> & response,
const WeakClassifier & classifier,
int max_concurrent_jobs)
442 if(max_concurrent_jobs>1)
445 int n_jobs = data.Size();
446 for(
int ii=0; ii<max_concurrent_jobs; ii++)
448 int s0 = (ii*n_jobs)/max_concurrent_jobs;
449 int s1 = ((ii+1)*n_jobs)/max_concurrent_jobs;
451 thread_pool_.
create_thread(sprint::thread_bind(&inner_update_response< WeakClassifier, DataSetType>, sprint::c_ref(data), &response[0], sprint::c_ref(classifier), s0, s1));
459 inner_update_response(data, &response[0], classifier, 0, data.Size());
465 static const char *str_metric[] = {
"none",
"weight",
"edge",
"mahalanobis"};
468 enum SoftCascadeRankingAlgo {
483 template<
class WeakClassifier,
class DataSetType>
484 void TrainSoftCascade(DataSetType & training_set,
const BoostClassifier<WeakClassifier> & _source,
SoftCascadeClassifier<WeakClassifier> & dest,
int max_stages, SoftCascadeRankingAlgo sort_algo,
bool dbp,
double ratio,
double alpha,
bool blind_complete,
int max_concurrent_jobs,
bool verbose =
true)
487 double p = training_set.n_patternP;
488 std::vector<double> v;
490 int n = (max_stages < 1) ? (source.
size()) : ( std::min<int>(source.
size(), max_stages) );
493 int initialNegative = training_set.n_patternN;
494 int initialPositive = training_set.n_patternP;
496 std::vector<double> response;
499 std::cout <<
"[+] using " << n <<
" features of " << source.
size() <<
" | metric = " << sort_algo <<
" ("<< str_metric[sort_algo] <<
")" << std::endl;
506 computeResponse(training_set, response, source, max_concurrent_jobs);
508 int p0 = (1.0-ratio) * training_set.n_patternP;
509 double r = findBestRejectionThreshold(training_set, response, p0);
511 std::cout <<
"Using threshold " << r <<
" to remove too difficult patterns (try to keep " << p0 <<
" patterns)" << std::endl;
514 rejectPositiveSamples(training_set, response, r);
518 std::cout << training_set.n_patternP <<
"/" << initialPositive <<
" patterns survived" << std::endl;
524 computeRejectionDistribution(v, alpha, ratio * training_set.n_patternP, n );
528 for(
int t =0; t<n; ++t)
530 int curPositive, curNegative;
533 curPositive = training_set.n_patternP;
534 curNegative = training_set.n_patternN;
539 std::cout <<
'#' << t << std::endl;
547 std::cout <<
'#' << t <<
" | Positive requested = " << p <<
"/" << training_set.n_patternP << std::endl;
553 computeResponse(training_set, response, dest, max_concurrent_jobs);
555 double bestEdge = 0.0;
558 std::vector<double> bestResponse;
566 bestResponse = response;
569 if(j->alpha > jbest->alpha)
574 updateResponse(training_set, bestResponse, *jbest, max_concurrent_jobs);
584 std::vector<double> test = response;
587 updateResponse(training_set, test, *j, max_concurrent_jobs);
595 double edge = computeEdge(training_set, test);
596 if(j == source.
list().begin() || (edge > bestEdge))
606 std::cout <<
"\tavg edge = " << bestEdge <<
'\n';
609 case Ranking_Mahalanobis:
616 std::vector<double> test = response;
619 updateResponse(training_set, test, *j, max_concurrent_jobs);
628 double edge = computeEdge2(training_set, test);
629 if(j == source.
list().begin() || (edge > bestEdge))
639 std::cout <<
"\tavg edge = " << bestEdge <<
'\n';
646 bestResponse = response;
648 updateResponse(training_set, bestResponse, *jbest, max_concurrent_jobs);
657 std::cout <<
"\talpha = " << jbest->alpha << std::endl;
665 r = findMinRejectionThreshold(training_set, bestResponse);
671 r = findBestRejectionThreshold(training_set, bestResponse, (
int) p);
675 std::cout <<
"\tthreshold = " << r << std::endl;
678 dest.insert(*jbest, jbest->alpha, r);
681 source.
list().erase(jbest);
684 rejectSamples(training_set, bestResponse, r);
689 std::cout <<
"\tpositive = " << training_set.n_patternP <<
" (" << (100 * training_set.n_patternP) / initialPositive <<
"%) " << (
int) training_set.n_patternP - curPositive
690 <<
" | negative = " << training_set.n_patternN <<
" (" << (100 * training_set.n_patternN) / initialNegative <<
"%) " << (
int) training_set.n_patternN - curNegative << std::endl;
693 sumNegative += training_set.n_patternN;
694 if(training_set.n_patternN == 0 || training_set.n_patternP == 0)
697 std::cout <<
"no pattern lefts. terminated." << std::endl;
708 std::cout <<
"Insert additional " << source.
list().size() <<
" features..." << std::endl;
711 dest.insert(*j, j->alpha, -100000.0);
717 std::cout <<
"[+] Average number of stages evaluated for negative samples = " << (double) sumNegative / (
double) initialNegative << std::endl;
ClassifierListType & list()
Return the inner list of classifier.
Definition: BoostClassifier.h:125
Definition: thread_group.h:82
void join_all()
wait all threads terminate
Definition: thread_group.h:114
proposal 1 for thread group
int size() const
return the number of weak classifiers
Definition: BoostClassifier.h:119
method to create function pointer for thread call
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
Definition: SoftCascade.h:76
a voting for majority classifier.