21 #ifndef _ORACLE_DECISION_TREE_H
22 #define _ORACLE_DECISION_TREE_H
33 #include "detail/BinaryClassifierOptimize.h"
36 template<
class DataType>
41 const int n_samples = indexes.size();
43 double wp = 0.0, wn = 0.0;
44 for(
int i=0; i<n_samples; ++i)
54 std::cout << max_depth <<
"-- build decision tree with " << n_samples <<
" elements. wp:" << wp <<
", wn:" << wn <<
", wp+wn:" << wp+wn << std::endl;
62 std::cout <<
"\tmax depth reached: positive with " << wp <<
"\n";
70 std::cout <<
"\tmax depth reached: negative with " << wn <<
"\n";
80 std::pair<DataType, double> * store =
new std::pair<DataType, double> [n_samples];
86 for(
int j=0; j< n_samples; ++j)
90 store[j].second = src.
weights[k];
94 std::sort(&store[0], &store[n_samples]);
99 std::pair<DataType, int> ret;
101 Optimize(ret, store, n_samples, wp, wn,
false);
109 for(
int j=0; j< n_samples; ++j)
117 w_left += std::abs(src.
weights[k]);
122 std::cout <<
"\t" << i <<
" | th: " << ret.first <<
", polarity:" << ret.second <<
", w:" << w <<
", left:" << left_size <<
"(" << w_left <<
"), right:" << n_samples - left_size <<
"(" << wn+wp-w_left <<
")"<< std::endl;
125 if((left_size == 0) || (left_size == n_samples ))
129 std::cout <<
"\t invalid solution" << std::endl;
136 std::vector<int> left_resp, right_resp;
138 left_resp.reserve(left_size);
139 right_resp.reserve(n_samples - left_size);
142 for(
int j=0; j< n_samples; ++j)
147 left_resp.push_back(k);
151 right_resp.push_back(k);
161 std::cout << max_depth <<
"-- left --\n";
165 std::cout << max_depth <<
"-- right --\n";
170 std::cout << max_depth <<
"-- w:" << w << std::endl;
199 std::cout <<
"no solution found:";
204 std::cout <<
"\tpositive with w:" << wp <<
"\n";
212 std::cout <<
"\tnegative with w:" << wn <<
"\n";
222 std::cout <<
"solution found with w:" << bestw << std::endl;
230 template<
class DataType>
231 void build_decision_tree_heuristic(
double *outw,
IndirectDecisionTree<DataType> * root,
const PrecomputedPatternResponse<DataType> * src,
int f0,
int f1,
int max_depth,
const int * indexes,
int n_samples,
int histo_bin_size,
float wp,
float wn,
bool optimize)
235 float *store =
new float[histo_bin_size];
237 for(
int i = f0; i< f1; ++i)
243 for(
int j = 0; j<histo_bin_size; ++j)
249 for(
int j=0; j<n_samples; ++j)
259 std::pair<DataType, int> ret;
261 SimpleOptimize(ret, store, histo_bin_size, wp, wn);
268 for(
int j=0; j<n_samples; ++j)
277 w_left += std::abs(src.
weights[k]);
282 if((left_size == 0) || (left_size == n_samples ))
292 std::vector<int> left_resp, right_resp;
294 left_resp.reserve(left_size);
295 right_resp.reserve(n_samples - left_size);
298 for(
int j=0; j<n_samples; ++j)
303 left_resp.push_back(k);
307 right_resp.push_back(k);
318 w = BuildDecisionTreeHeuristic(*left, *src, max_depth-1, left_resp, 1, histo_bin_size, optimize);
319 w += BuildDecisionTreeHeuristic(*right, *src, max_depth-1, right_resp, 1, histo_bin_size, optimize);
334 DataType delta = (maxr > minr) ? (maxr - minr + 1) : (1);
336 root->
th = minr + ((ret.first * delta) / histo_bin_size);
362 template<
class DataType>
367 const int n_samples = indexes.size();
370 float wp = 0.0, wn = 0.0;
371 for(
int i=0; i<n_samples; ++i)
381 std::cout << max_depth <<
"-- build decision tree with " << n_samples <<
" elements. wp:" << wp <<
", wn:" << wn <<
", wp+wn:" << wp+wn << std::endl;
389 std::cout <<
"\tmax depth reached: positive with " << wp <<
"\n";
397 std::cout <<
"\tmax depth reached: negative with " << wn <<
"\n";
406 if(max_concurrent_jobs>1)
411 double *outw =
new double[max_concurrent_jobs];
413 for(
int i =0; i<max_concurrent_jobs; ++i)
415 int f0 = (src.
n_features * i) / max_concurrent_jobs;
416 int f1 = (src.
n_features * (i+1) ) / max_concurrent_jobs;
418 pool.
create_thread(sprint::thread_bind(&detail::build_decision_tree_heuristic<DataType> , &outw[i], &candidates[i], &src, f0, f1, max_depth, &indexes[0], n_samples, histo_bin_size, wp, wn, reoptimize) );
423 for(
int i=0; i<max_concurrent_jobs; ++i)
428 root = candidates[i];
433 delete [] candidates;
437 detail::build_decision_tree_heuristic(&bestw, &root, &src, 0, src.
n_features, max_depth, &indexes[0], n_samples, histo_bin_size, wp, wn, reoptimize);
446 std::cout <<
"no solution found:";
451 std::cout <<
"\tpositive with w:" << wp <<
"\n";
459 std::cout <<
"\tnegative with w:" << wn <<
"\n";
469 std::cout <<
"solution found with w:" << bestw << std::endl;
476 template<
class DataType>
482 template<
class DataType>
483 void build_decision_tree_heuristic_v2(
double *outw,
IndirectDecisionTree<DataType> * root,
const PrecomputedPatternResponse<DataType> * src,
int f0,
int f1,
int max_depth,
const int * indexes,
int n_samples,
int histo_bin_size,
float wp,
float wn,
bool reoptimize)
486 std::pair<DataType, int> best_ret;
489 float *store =
new float[histo_bin_size];
492 for(
int i = f0; i< f1; ++i)
499 for(
int j = 0; j<histo_bin_size; ++j)
505 for(
int j=0; j<n_samples; ++j)
515 std::pair<DataType, int> ret;
517 double w = SimpleOptimize(ret, store, histo_bin_size, wp, wn);
535 std::pair<DataType, float> * store =
new std::pair<DataType, float> [n_samples];
540 for(
int j=0; j< n_samples; ++j)
544 store[j].second = src->
weights[k];
548 std::sort(&store[0], &store[n_samples]);
553 Optimize(best_ret, store, n_samples, wp, wn,
false);
555 root->
th = best_ret.first;
569 DataType delta = (maxr > minr) ? (maxr - minr + 1) : (1);
571 root->
th = minr + ((best_ret.first * delta) / histo_bin_size);
580 for(
int j=0; j<n_samples; ++j)
589 w_left += std::abs(src.
weights[k]);
594 if((left_size == 0) || (left_size == n_samples ))
605 std::vector<int> left_resp, right_resp;
607 left_resp.reserve(left_size);
608 right_resp.reserve(n_samples - left_size);
611 for(
int j=0; j<n_samples; ++j)
616 left_resp.push_back(k);
620 right_resp.push_back(k);
630 w = BuildDecisionTreeHeuristic_v2(*left, *src, max_depth-1, left_resp, 1, histo_bin_size, reoptimize);
631 w += BuildDecisionTreeHeuristic_v2(*right, *src, max_depth-1, right_resp, 1, histo_bin_size, reoptimize);
646 template<
class DataType>
651 const int n_samples = indexes.size();
654 float wp = 0.0, wn = 0.0;
655 for(
int i=0; i<n_samples; ++i)
680 if(max_concurrent_jobs>1)
685 double *outw =
new double[max_concurrent_jobs];
687 for(
int i =0; i<max_concurrent_jobs; ++i)
689 int f0 = (src.
n_features * i) / max_concurrent_jobs;
690 int f1 = (src.
n_features * (i+1) ) / max_concurrent_jobs;
692 pool.
create_thread(sprint::thread_bind(&detail::build_decision_tree_heuristic_v2<DataType> , &outw[i], &candidates[i], &src, f0, f1, max_depth, &indexes[0], n_samples, histo_bin_size, wp, wn, reoptimize) );
697 for(
int i=0; i<max_concurrent_jobs; ++i)
702 root = candidates[i];
707 delete [] candidates;
711 detail::build_decision_tree_heuristic_v2(&bestw, &root, &src, 0, src.
n_features, max_depth, &indexes[0], n_samples, histo_bin_size, wp, wn, reoptimize);
int * category
category list
Definition: PrecomputedPatternResponse.h:48
IndirectDecisionTree< DataType > * left
sub-nodes (if null is a leaf)
Definition: IndirectDecisionTree.h:45
this header contains PrecomputedPatternResponse class, used to store hash information provided by a m...
double * weights
weights list
Definition: PrecomputedPatternResponse.h:50
int category
if not a node, a category
Definition: IndirectDecisionTree.h:42
bool BuildDecisionTree(DecisionTree< FeatureExtractor > &root, const std::vector< PatternType > &patterns, Param1 param, FeatureGenerator &f, int max_depth)
Definition: DecisionTreeLearner.h:172
int * bin
precomputed bin matrix (rows features, columns samples)
Definition: PrecomputedPatternResponse.h:56
Definition: thread_group.h:82
void join_all()
wait all threads terminate
Definition: thread_group.h:114
proposal 1 for thread group
int n_features
number of feature preallocated
Definition: PrecomputedPatternResponse.h:46
int classifier
index to a Binary Classifier
Definition: IndirectDecisionTree.h:37
int n_samples
number of samples
Definition: PrecomputedPatternResponse.h:44
Definition: PrecomputedPatternResponse.h:41
method to create function pointer for thread call
std::pair< ResponseType, ResponseType > * response_range
precomputed response ranges
Definition: PrecomputedPatternResponse.h:54
bool create_thread(const sprint::thread_function &p)
create an additional thread
Definition: thread_group.h:102
ResponseType * response
response matrix. For performance reason it each row is a feature and columns are samples ...
Definition: PrecomputedPatternResponse.h:52
implement a decision tree with feature stored in a map
DataType th
Classifier Threshold.
Definition: IndirectDecisionTree.h:39
Definition: IndirectDecisionTree.h:33