X-Boost  2.3.8
SVM.h
1 /* XBoost: Ada-Boost and Friends on Haar/ICF/HOG Features, Library and ToolBox
2  *
3  * Copyright (c) 2008-2014 Paolo Medici <medici@ce.unipr.it>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 #ifndef _SVM_H
22 #define _SVM_H
23 
24 #include <cmath>
25 #include <algorithm>
26 #include <vector>
27 
28 #include <iostream>
29 
31 inline double dot(const double *x1, const double *x2, int m)
32 {
33  double s = 0.0;
34  for(int i =0;i<m;++i)
35  s += x1[i] * x2[i];
36  return s;
37 }
38 
40 inline double evaluate(const double *alpha, const int *y, const double **x, int n, int m)
41 {
42  double s = 0.0;
43  for(int i =0;i<n;++i)
44  s -= alpha[i];
45 
46  for(int i=0;i<n;++i)
47  for(int j=0;j<n;++j)
48  s+= y[i] * y[j] * dot(x[i], x[j], m) * alpha[i] * alpha[j];
49 
50  return s;
51 }
52 
54 inline double linear(const double *x, const double *w, double b, int m)
55 {
56  return dot(x,w,m) - b;
57 }
58 
60 inline double margin(const double *w, int m)
61 {
62  return 1.0 / std::sqrt(dot(w,w,m));
63 }
64 
67 
68 
70  std::vector<double *> m_x;
71 
73  int m_n;
75  int m_m;
76 
77 
78 public:
79 
81  {
82  }
83  ~metric_linear()
84  {
85  }
86 
87  void computeCache(int n, int m, const std::vector<double *> & x)
88  {
89  m_n = n;
90  m_m = m;
91  m_x = x;
92  std::cerr << "Cache completed" << std::endl;
93  }
94 
95  inline double f(int i, int j) const
96  {
97  return dot(m_x[i],m_x[j], m_m);
98  }
99 
100 };
101 
102 
105 
106  double *m_k; // cache
107 
109  int m_n;
111  int m_m;
112 
113 
114 public:
115 
116  metric_cache()
117  {
118  m_k = 0;
119  }
120  ~metric_cache()
121  {
122  delete [] m_k;
123  }
124 
125  void computeCache(int n, int m, const std::vector<double *> & x)
126  {
127  m_n = n;
128  m_m = m;
129 
130  std::cerr << "Expected " << n*n*sizeof(double)/(1024*1024) << " Mb for cache" << std::endl;
131  delete [] m_k;
132  m_k = new double[n*n];
133 
134  std::cerr << "Builindg cache..." << std::endl;
135 
136  for(int j=0;j<n;++j)
137  for(int i=0;i<n;++i)
138  m_k[i + j * n] = dot(x[i], x[j], m_m);
139 
140  std::cerr << "Cache completed" << std::endl;
141  }
142 
143  inline double f(int i, int j) const
144  {
145  return m_k[i + j * m_n];
146  }
147 
148 };
149 
151 template<class Functor>
152 struct svm_train: public Functor {
153 
155  int n;
157  int m;
158 
159 
161  std::vector<double *> x;
162 
164  std::vector<double> alpha;
165 
167  std::vector<int> y;
168 
170  double *w;
172  double bias;
173 
175  double C;
176 
178  double epsilon;
180  double tol;
181 
182 private:
183 
184  double *e; // error cache;
185 
186  int minimum;
187  int maximum;
188 
189 private:
190 
191 double cache(int i,int j) const {
192  return Functor::f(i,j);
193 }
194 
195 double learnedFunc( int k ) const
196 {
197  double s = 0.0;
198  long i;
199  for( i = 0; i < n; i++ ) {
200  if( alpha[i] > 0.0 )
201  s += alpha[i]*y[i]*cache(i,k);
202  }
203  s -= bias;
204  return s;
205 }
206 
207 
208 void updateErrorCache()
209 {
210  for(int i =0;i<n;++i)
211  e[i] = (alpha[i]>0.0 && alpha[i]<C) ? ( linear(x[i], w, bias, m) - y[i] ) : 0.0;
212 }
213 
215 int nonBoundLagrangeMultipliers() const
216 {
217  int result = 0;
218 
219  for (int i = 0; i < n; i++)
220  {
221  if (alpha[i] > 0.0 && alpha[i] < C)
222  {
223  result++;
224  }
225  }
226 
227  return result;
228 }
229 
231 int nonZeroLagrangeMultipliers() const
232 {
233  int result = 0;
234 
235  for (int i = 0; i < n; i++)
236  {
237  if (alpha[i] > 0.0)
238  {
239  result++;
240  }
241  }
242 
243  return result;
244 }
245 
246 
247 bool takeStep(int i1, int i2)
248 {
249 if(i1==i2)
250  return false;
251 
252 // std::cout << "examine " << i1 << ' ' << i2 << std::endl;
253 
254 double L,H;
255 double a1 = alpha[i1];
256 double a2 = alpha[i2];
257 int y1 = y[i1];
258 int y2 = y[i2];
259 int s = y1*y2;
260 
261 if(y1 != y2)
262 {
263  L = std::max<double>(0, a2 - a1);
264  H = std::min<double>(C, C + a2 - a1);
265 }
266 else
267 {
268  L = std::max<double>(0, a2 + a1 - C);
269  H = std::min<double>(C, a2 + a1);
270 }
271 if(L==H)
272 {
273 // std::cout << "\tL:" << L << "==H"<< std::endl;
274  return false;
275 }
276 
277 // std::cout << "L:" << L << " H:" << H << std::endl;
278 
279 /* recompute Lagrange multiplier for pattern i2 */
280 double e1 = (a1 > 0.0 && a1 < C) ? e[i1] : ( learnedFunc(i1) - y1 );
281 double e2 = (a2 > 0.0 && a2 < C) ? e[i2] : ( learnedFunc(i2) - y2 );
282 
283 double k11 = cache(i1,i1);
284 double k22 = cache(i2,i2);
285 double k12 = cache(i1,i2);
286 double eta = k11 + k22 - 2 * k12;
287 
288 // std::cout << "e1:" << e1 << " e2:" << e2 << " eta:" << eta << std::endl;
289 
290 if(eta > 0.0)
291  {
292  double a2_new = a2 + y2 * (e1 - e2) / eta;
293  // clamp inside the box
294  a2_new = (a2_new >= H) ? H : (a2_new <= L) ? L : a2_new;
295 
296  if (std::abs(a2_new-a2) < epsilon*(a2_new+a2+epsilon))
297  {
298 // std::cout << "rej1" << std::endl;
299  return false;
300  }
301 
302 
303  double a1_new = a1 + s * (a2 - a2_new);
304 
305  // update threshold
306  double w1 = y1*(a1_new - a1);
307  double w2 = y2*(a2_new - a2);
308  double b1 = e1 + w1*k11 + w2*k12;
309  double b2 = e2 + w1*k12 + w2*k22;
310  double bold = bias;
311 
312  bias += 0.5*(b1 + b2);
313  // update weight vector
314 
315 // w += y1 * (a1_new - a1) * x[i1] + y2 * (a2_new - a2) * x[i2];
316 
317  // update error cache
318  /* update error cache->*/
319 
320  if (std::abs(b1-b2) < epsilon)
321  {
322  e[i1] = 0.0;
323  e[i2] = 0.0;
324  }
325  else
326  {
327  if (a1 > 0.0 && a1 < C)
328  {
329  e[i1] = learnedFunc(i1) - y1;
330  }
331 
332  if (a2 > 0.0 && a2 < C)
333  {
334  e[i2] = learnedFunc(i2) - y2;
335  }
336  }
337 
338  if (e[i1] > e[i2])
339  {
340  minimum = i2;
341  maximum = i1;
342  }
343  else
344  {
345  minimum = i1;
346  maximum = i2;
347  }
348 
349  for (int i = 0; i < n; i++)
350  {
351  if (alpha[i] > 0.0 && alpha[i] < C && i != i1 && i != i2)
352  {
353  e[i] += w1 * cache(i1,i)
354  + w2 * cache(i2,i)
355  + bold - bias;
356 
357  if (e[i] > e[maximum])
358  {
359  maximum = i;
360  }
361 
362  if (e[i] < e[minimum])
363  {
364  minimum = i;
365  }
366  }
367  }
368 
369 
370 // std::cout << "a1:" << a1_new << " a2:" << a2_new << std::endl;
371 
372  alpha[i1] = a1_new;
373  alpha[i2] = a2_new;
374 
375  return true;
376  }
377 else
378 // std::cout << "\teta:" << eta << std::endl;
379 return false;
380 }
381 
382 bool examineNonBound(int i2)
383 {
384  int i1 = rand() % n;
385 
386  for (int i = 0; i < n; i++)
387  {
388  if (alpha[i1] > 0.0 && alpha[i1] < C)
389  {
390  if (takeStep(i1, i2))
391  {
392  return true;
393  }
394  }
395  i1 = (i1 + 1) % n;
396  }
397 
398  return false;
399 }
400 
401 bool examineBound(int i2)
402 {
403  int i1 = rand() % n;
404 
405  for (int i = 0; i < n; i++)
406  {
407  if (alpha[i1] == 0.0 || alpha[i1] == C)
408  {
409  if (takeStep(i1, i2))
410  {
411  return true;
412  }
413  }
414  i1 = (i1 + 1) % n;
415  }
416 
417  return false;
418 }
419 
420 
421 bool examineFirstChoice(int i2)
422 {
423  if (minimum > -1)
424  {
425  double e2 = e[i2];
426  if (std::abs(e2 - e[minimum]) > std::abs(e2 - e[maximum]))
427  {
428  if (takeStep(minimum, i2))
429  {
430  return true;
431  }
432  }
433  else
434  {
435  if (takeStep(maximum, i2))
436  {
437  return true;
438  }
439  }
440  }
441 
442  return false;
443 }
444 
445 bool examineExample(int i2)
446 {
447  int y2 = y[i2];
448  double a2 = alpha[i2];
449  double e2 = e[i2];
450  double r2 = e2 * y2;
451 
452 // std::cout << "examine " << i2 << " e:" << e2 << " r2:" << r2 << std::endl;
453 
454  if((r2 < - tol && a2 < C) || (r2 > tol && a2 > 0.0))
455  {
456 
457  if(examineFirstChoice(i2))
458  return true;
459 
460  if(examineNonBound(i2))
461  return true;
462 
463  if(examineBound(i2))
464  return true;
465 
466  }
467 
468 return false;
469 }
470 
471 public:
472 
473 svm_train(int _m) : m(_m)
474  {
475  n = 0;
476 
477  w = new double[m];
478  for(int i=0;i<m;++i)
479  w[i] = 0.0;
480  bias = 0.0;
481 
482  minimum = -1;
483  maximum = -1;
484 
485  epsilon = 1e-12;
486  tol = 1e-3;
487  e = 0;
488  }
489 
490 ~svm_train()
491 {
492  for(int i=0;i<n;++i)
493  delete [] x[i];
494  delete [] w;
495  delete [] e;
496 }
497 
498 void insert(int y, double *x)
499 {
500  this->y.push_back(y);
501  this->x.push_back(x);
502  this->alpha.push_back(0.0);
503 
504  ++n;
505 }
506 
507 void train()
508 {
509  double examineAll = true;
510  int numChanged = 0;
511 
512  if(n==0)
513  return;
514 
515  delete [] e;
516  e = new double [n];
517 
518  Functor::computeCache(n,m,x);
519 
520  for (int i = 0; i < n; i++)
521  {
522 // this->C[i] = c*z[i];
523 
524  if (alpha[i] > 0.0 && alpha[i] < C)
525  {
526 
527  // e[i] = learnedFunc(i) - y[i];
528  }
529  else
530  {
531  // e[i] = 0.0;
532  }
533  e[i] = - y[i];
534  }
535 
536  while(numChanged > 0 || examineAll)
537  {
538 
539 // std::cout << "iter...\n";
540  numChanged = 0;
541  if(examineAll)
542  {
543  for(int i =0;i<n;++i)
544  numChanged += examineExample(i) ? 1 : 0;
545  }
546  else
547  {
548  for(int i =0;i<n;++i)
549  if (alpha[i] > 0 && alpha[i] < C)
550  numChanged += examineExample(i) ? 1 : 0;
551  }
552 
553  if(examineAll)
554  examineAll = false;
555  else
556  if(numChanged == 0)
557  examineAll = true;
558  }
559 
560 
561  for(int j =0;j<m;++j)
562  {
563  w[j]=0.0;
564  for(int i =0;i<n;++i)
565  w[j] += y[i]*alpha[i] * x[i][j];
566  }
567 
568  std::cerr << numChanged << std::endl;
569 
570 // int imax = 0;
571 // double amx = alpha[0];
572 //
573 // for(int i =0;i<n;++i)
574 // if(alpha[i]>amx)
575 // {
576 // amx = alpha[i];
577 // imax = i;
578 // }
579 // for(int i=0;i<m;++i)
580 // std::cout << ' ' << w[i];
581 // std::cout << std::endl;
582 // for(int i=0;i<m;++i)
583 // std::cout << ' ' << x[imax][i];
584 // std::cout << std::endl;
585 // std::cout <<"dit"<<dot(w, x[imax],m)<<std::endl;
586  // bias = dot(w, x[imax],m) - y[imax];
587 }
588 
589 };
590 
591 #endif
int m
number of feature (space size)
Definition: SVM.h:157
dot product
Definition: SVM.h:66
SVM trainer.
Definition: SVM.h:152
std::vector< double > alpha
alpha weights
Definition: SVM.h:164
int n
number of samples
Definition: SVM.h:155
std::vector< double * > x
input patterns
Definition: SVM.h:161
double tol
tollerance
Definition: SVM.h:180
std::vector< int > y
train category {-1,+1}
Definition: SVM.h:167
double epsilon
epsilon
Definition: SVM.h:178
double * w
hyperplane vector [out]
Definition: SVM.h:170
double C
C constant SVM.
Definition: SVM.h:175
double bias
hyperplane bias [out]
Definition: SVM.h:172
una metrica che calcola la cache
Definition: SVM.h:104