Medical Imaging Interaction Toolkit  2016.11.0
Medical Imaging Interaction Toolkit
svm.cpp
Go to the documentation of this file.
1 /*===================================================================
2 
3 The Medical Imaging Interaction Toolkit (MITK)
4 
5 Copyright (c) German Cancer Research Center,
6 Division of Medical and Biological Informatics.
7 All rights reserved.
8 
9 This software is distributed WITHOUT ANY WARRANTY; without
10 even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 A PARTICULAR PURPOSE.
12 
13 See LICENSE.txt or http://www.mitk.org for details.
14 
15 ===================================================================*/
16 
17 /*===================================================================
18 
19 Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
20 All rights reserved.
21 
22 Redistribution and use in source and binary forms, with or without
23 modification, are permitted provided that the following conditions
24 are met:
25 
26 1. Redistributions of source code must retain the above copyright
27 notice, this list of conditions and the following disclaimer.
28 
29 2. Redistributions in binary form must reproduce the above copyright
30 notice, this list of conditions and the following disclaimer in the
31 documentation and/or other materials provided with the distribution.
32 
33 3. Neither name of copyright holders nor the names of its contributors
34 may be used to endorse or promote products derived from this software
35 without specific prior written permission.
36 
37 
38 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
39 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
40 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
41 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
42 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
43 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
44 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
45 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
46 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
47 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
48 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 
50 ===================================================================*/
51 
52 #include <math.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <ctype.h>
56 #include <float.h>
57 #include <string.h>
58 #include <stdarg.h>
59 #include <limits.h>
60 #include <locale.h>
61 #include <mitkLocaleSwitch.h>
62 #include "svm.h"
64 typedef float Qfloat;
65 typedef signed char schar;
66 #ifndef min
67 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
68 #endif
69 #ifndef max
70 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
71 #endif
72 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
73 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
74 {
75  dst = new T[n];
76  memcpy((void *)dst,(void *)src,sizeof(T)*n);
77 }
78 static inline double powi(double base, int times)
79 {
80  double tmp = base, ret = 1.0;
81 
82  for(int t=times; t>0; t/=2)
83  {
84  if(t%2==1) ret*=tmp;
85  tmp = tmp * tmp;
86  }
87  return ret;
88 }
89 #define INF HUGE_VAL
90 #define TAU 1e-12
91 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
92 
93 static void print_string_stdout(const char *s)
94 {
95  fputs(s,stdout);
96  fflush(stdout);
97 }
98 static void (*svm_print_string) (const char *) = &print_string_stdout;
99 #if 1
100 static void info(const char *fmt,...)
101 {
102  char buf[BUFSIZ];
103  va_list ap;
104  va_start(ap,fmt);
105  vsprintf(buf,fmt,ap);
106  va_end(ap);
107  (*svm_print_string)(buf);
108 }
109 #else
110 static void info(const char *fmt,...) {}
111 #endif
112 
113 //
114 // Kernel Cache
115 //
116 // l is the number of total data items
117 // size is the cache size limit in bytes
118 //
119 class Cache
120 {
121 public:
122  Cache(int l,long int size);
123  ~Cache();
124 
125  // request data [0,len)
126  // return some position p where [p,len) need to be filled
127  // (p >= len if nothing needs to be filled)
128  int get_data(const int index, Qfloat **data, int len);
129  void swap_index(int i, int j);
130 private:
131  int l;
132  long int size;
133  struct head_t
134  {
135  head_t *prev, *next; // a circular list
136  Qfloat *data;
137  int len; // data[0,len) is cached in this entry
138  };
139 
140  head_t *head;
141  head_t lru_head;
142  void lru_delete(head_t *h);
143  void lru_insert(head_t *h);
144 };
145 
146 Cache::Cache(int l_,long int size_):l(l_),size(size_)
147 {
148  head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
149  size /= sizeof(Qfloat);
150  size -= l * sizeof(head_t) / sizeof(Qfloat);
151  size = max(size, 2 * (long int) l); // cache must be large enough for two columns
152  lru_head.next = lru_head.prev = &lru_head;
153 }
154 
155 Cache::~Cache()
156 {
157  for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
158  free(h->data);
159  free(head);
160 }
161 
162 void Cache::lru_delete(head_t *h)
163 {
164  // delete from current location
165  h->prev->next = h->next;
166  h->next->prev = h->prev;
167 }
168 
169 void Cache::lru_insert(head_t *h)
170 {
171  // insert to last position
172  h->next = &lru_head;
173  h->prev = lru_head.prev;
174  h->prev->next = h;
175  h->next->prev = h;
176 }
177 
178 int Cache::get_data(const int index, Qfloat **data, int len)
179 {
180  head_t *h = &head[index];
181  if(h->len) lru_delete(h);
182  int more = len - h->len;
183 
184  if(more > 0)
185  {
186  // free old space
187  while(size < more)
188  {
189  head_t *old = lru_head.next;
190  lru_delete(old);
191  free(old->data);
192  size += old->len;
193  old->data = 0;
194  old->len = 0;
195  }
196 
197  // allocate new space
198  h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
199  size -= more;
200  swap(h->len,len);
201  }
202 
203  lru_insert(h);
204  *data = h->data;
205  return len;
206 }
207 
208 void Cache::swap_index(int i, int j)
209 {
210  if(i==j) return;
211 
212  if(head[i].len) lru_delete(&head[i]);
213  if(head[j].len) lru_delete(&head[j]);
214  swap(head[i].data,head[j].data);
215  swap(head[i].len,head[j].len);
216  if(head[i].len) lru_insert(&head[i]);
217  if(head[j].len) lru_insert(&head[j]);
218 
219  if(i>j) swap(i,j);
220  for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
221  {
222  if(h->len > i)
223  {
224  if(h->len > j)
225  swap(h->data[i],h->data[j]);
226  else
227  {
228  // give up
229  lru_delete(h);
230  free(h->data);
231  size += h->len;
232  h->data = 0;
233  h->len = 0;
234  }
235  }
236  }
237 }
238 
239 //
240 // Kernel evaluation
241 //
242 // the static method k_function is for doing single kernel evaluation
243 // the constructor of Kernel prepares to calculate the l*l kernel matrix
244 // the member function get_Q is for getting one column from the Q Matrix
245 //
246 class QMatrix {
247 public:
248  virtual Qfloat *get_Q(int column, int len) const = 0;
249  virtual double *get_QD() const = 0;
250  virtual void swap_index(int i, int j) const = 0;
251  virtual ~QMatrix() {}
252 };
253 
254 class Kernel: public QMatrix {
255 public:
256  Kernel(int l, svm_node * const * x, const svm_parameter& param);
257  virtual ~Kernel();
258 
259  static double k_function(const svm_node *x, const svm_node *y,
260  const svm_parameter& param);
261  virtual Qfloat *get_Q(int column, int len) const = 0;
262  virtual double *get_QD() const = 0;
263  virtual void swap_index(int i, int j) const // no so const...
264  {
265  swap(x[i],x[j]);
266  if(x_square) swap(x_square[i],x_square[j]);
267  }
268 protected:
269 
270  double (Kernel::*kernel_function)(int i, int j) const;
271 
272 private:
273  const svm_node **x;
274  double *x_square;
275 
276  // svm_parameter
277  const int kernel_type;
278  const int degree;
279  const double gamma;
280  const double coef0;
281 
282  static double dot(const svm_node *px, const svm_node *py);
283  double kernel_linear(int i, int j) const
284  {
285  return dot(x[i],x[j]);
286  }
287  double kernel_poly(int i, int j) const
288  {
289  return powi(gamma*dot(x[i],x[j])+coef0,degree);
290  }
291  double kernel_rbf(int i, int j) const
292  {
293  return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
294  }
295  double kernel_sigmoid(int i, int j) const
296  {
297  return tanh(gamma*dot(x[i],x[j])+coef0);
298  }
299  double kernel_precomputed(int i, int j) const
300  {
301  return x[i][(int)(x[j][0].value)].value;
302  }
303 };
304 
305 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
306  :kernel_type(param.kernel_type), degree(param.degree),
307  gamma(param.gamma), coef0(param.coef0)
308 {
309  switch(kernel_type)
310  {
311  case LINEAR:
312  kernel_function = &Kernel::kernel_linear;
313  break;
314  case POLY:
315  kernel_function = &Kernel::kernel_poly;
316  break;
317  case RBF:
318  kernel_function = &Kernel::kernel_rbf;
319  break;
320  case SIGMOID:
321  kernel_function = &Kernel::kernel_sigmoid;
322  break;
323  case PRECOMPUTED:
324  kernel_function = &Kernel::kernel_precomputed;
325  break;
326  }
327 
328  clone(x,x_,l);
329 
330  if(kernel_type == RBF)
331  {
332  x_square = new double[l];
333  for(int i=0;i<l;i++)
334  x_square[i] = dot(x[i],x[i]);
335  }
336  else
337  x_square = 0;
338 }
339 
340 Kernel::~Kernel()
341 {
342  delete[] x;
343  delete[] x_square;
344 }
345 
346 double Kernel::dot(const svm_node *px, const svm_node *py)
347 {
348  double sum = 0;
349  while(px->index != -1 && py->index != -1)
350  {
351  if(px->index == py->index)
352  {
353  sum += px->value * py->value;
354  ++px;
355  ++py;
356  }
357  else
358  {
359  if(px->index > py->index)
360  ++py;
361  else
362  ++px;
363  }
364  }
365  return sum;
366 }
367 
368 double Kernel::k_function(const svm_node *x, const svm_node *y,
369  const svm_parameter& param)
370 {
371  switch(param.kernel_type)
372  {
373  case LINEAR:
374  return dot(x,y);
375  case POLY:
376  return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
377  case RBF:
378  {
379  double sum = 0;
380  while(x->index != -1 && y->index !=-1)
381  {
382  if(x->index == y->index)
383  {
384  double d = x->value - y->value;
385  sum += d*d;
386  ++x;
387  ++y;
388  }
389  else
390  {
391  if(x->index > y->index)
392  {
393  sum += y->value * y->value;
394  ++y;
395  }
396  else
397  {
398  sum += x->value * x->value;
399  ++x;
400  }
401  }
402  }
403 
404  while(x->index != -1)
405  {
406  sum += x->value * x->value;
407  ++x;
408  }
409 
410  while(y->index != -1)
411  {
412  sum += y->value * y->value;
413  ++y;
414  }
415 
416  return exp(-param.gamma*sum);
417  }
418  case SIGMOID:
419  return tanh(param.gamma*dot(x,y)+param.coef0);
420  case PRECOMPUTED: //x: test (validation), y: SV
421  return x[(int)(y->value)].value;
422  default:
423  return 0; // Unreachable
424  }
425 }
426 
427 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
428 // Solves:
429 //
430 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
431 //
432 // y^T \alpha = \delta
433 // y_i = +1 or -1
434 // 0 <= alpha_i <= Cp for y_i = 1
435 // 0 <= alpha_i <= Cn for y_i = -1
436 //
437 // Given:
438 //
439 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
440 // l is the size of vectors and matrices
441 // eps is the stopping tolerance
442 //
443 // solution will be put in \alpha, objective value will be put in obj
444 //
445 class Solver {
446 public:
447  Solver() {};
448  virtual ~Solver() {};
449 
450  struct SolutionInfo {
451  double obj;
452  double rho;
453  double *upper_bound;
454  double r; // for Solver_NU
455  };
456 
457  void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
458  double *alpha_, const double* C_, double eps,
459  SolutionInfo* si, int shrinking);
460 protected:
461  int active_size;
462  schar *y;
463  double *G; // gradient of objective function
464  enum { LOWER_BOUND, UPPER_BOUND, FREE };
465  char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
466  double *alpha;
467  const QMatrix *Q;
468  const double *QD;
469  double eps;
470  double Cp,Cn;
471  double *C;
472  double *p;
473  int *active_set;
474  double *G_bar; // gradient, if we treat free variables as 0
475  int l;
476  bool unshrink; // XXX
477 
478  double get_C(int i)
479  {
480  return C[i];
481  }
482  void update_alpha_status(int i)
483  {
484  if(alpha[i] >= get_C(i))
485  alpha_status[i] = UPPER_BOUND;
486  else if(alpha[i] <= 0)
487  alpha_status[i] = LOWER_BOUND;
488  else alpha_status[i] = FREE;
489  }
490  bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
491  bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
492  bool is_free(int i) { return alpha_status[i] == FREE; }
493  void swap_index(int i, int j);
494  void reconstruct_gradient();
495  virtual int select_working_set(int &i, int &j);
496  virtual double calculate_rho();
497  virtual void do_shrinking();
498 private:
499  bool be_shrunk(int i, double Gmax1, double Gmax2);
500 };
501 
502 void Solver::swap_index(int i, int j)
503 {
504  Q->swap_index(i,j);
505  swap(y[i],y[j]);
506  swap(G[i],G[j]);
507  swap(alpha_status[i],alpha_status[j]);
508  swap(alpha[i],alpha[j]);
509  swap(p[i],p[j]);
510  swap(active_set[i],active_set[j]);
511  swap(G_bar[i],G_bar[j]);
512  swap(C[i],C[j]);
513 }
514 
515 void Solver::reconstruct_gradient()
516 {
517  // reconstruct inactive elements of G from G_bar and free variables
518 
519  if(active_size == l) return;
520 
521  int i,j;
522  int nr_free = 0;
523 
524  for(j=active_size;j<l;j++)
525  G[j] = G_bar[j] + p[j];
526 
527  for(j=0;j<active_size;j++)
528  if(is_free(j))
529  nr_free++;
530 
531  if(2*nr_free < active_size)
532  info("\nWARNING: using -h 0 may be faster\n");
533 
534  if (nr_free*l > 2*active_size*(l-active_size))
535  {
536  for(i=active_size;i<l;i++)
537  {
538  const Qfloat *Q_i = Q->get_Q(i,active_size);
539  for(j=0;j<active_size;j++)
540  if(is_free(j))
541  G[i] += alpha[j] * Q_i[j];
542  }
543  }
544  else
545  {
546  for(i=0;i<active_size;i++)
547  if(is_free(i))
548  {
549  const Qfloat *Q_i = Q->get_Q(i,l);
550  double alpha_i = alpha[i];
551  for(j=active_size;j<l;j++)
552  G[j] += alpha_i * Q_i[j];
553  }
554  }
555 }
556 
557 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
558  double *alpha_, const double* C_, double eps,
559  SolutionInfo* si, int shrinking)
560 {
561  this->l = l;
562  this->Q = &Q;
563  QD=Q.get_QD();
564  clone(p, p_,l);
565  clone(y, y_,l);
566  clone(alpha,alpha_,l);
567  clone(C,C_,l);
568  this->eps = eps;
569  unshrink = false;
570 
571  // initialize alpha_status
572  {
573  alpha_status = new char[l];
574  for(int i=0;i<l;i++)
575  update_alpha_status(i);
576  }
577 
578  // initialize active set (for shrinking)
579  {
580  active_set = new int[l];
581  for(int i=0;i<l;i++)
582  active_set[i] = i;
583  active_size = l;
584  }
585 
586  // initialize gradient
587  {
588  G = new double[l];
589  G_bar = new double[l];
590  int i;
591  for(i=0;i<l;i++)
592  {
593  G[i] = p[i];
594  G_bar[i] = 0;
595  }
596  for(i=0;i<l;i++)
597  if(!is_lower_bound(i))
598  {
599  const Qfloat *Q_i = Q.get_Q(i,l);
600  double alpha_i = alpha[i];
601  int j;
602  for(j=0;j<l;j++)
603  G[j] += alpha_i*Q_i[j];
604  if(is_upper_bound(i))
605  for(j=0;j<l;j++)
606  G_bar[j] += get_C(i) * Q_i[j];
607  }
608  }
609 
610  // optimization step
611 
612  int iter = 0;
613  int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
614  int counter = min(l,1000)+1;
615 
616  while(iter < max_iter)
617  {
618  // show progress and do shrinking
619 
620  if(--counter == 0)
621  {
622  counter = min(l,1000);
623  if(shrinking) do_shrinking();
624  info(".");
625  }
626 
627  int i,j;
628  if(select_working_set(i,j)!=0)
629  {
630  // reconstruct the whole gradient
631  reconstruct_gradient();
632  // reset active set size and check
633  active_size = l;
634  info("*");
635  if(select_working_set(i,j)!=0)
636  break;
637  else
638  counter = 1; // do shrinking next iteration
639  }
640 
641  ++iter;
642 
643  // update alpha[i] and alpha[j], handle bounds carefully
644 
645  const Qfloat *Q_i = Q.get_Q(i,active_size);
646  const Qfloat *Q_j = Q.get_Q(j,active_size);
647 
648  double C_i = get_C(i);
649  double C_j = get_C(j);
650 
651  double old_alpha_i = alpha[i];
652  double old_alpha_j = alpha[j];
653 
654  if(y[i]!=y[j])
655  {
656  double quad_coef = QD[i]+QD[j]+2*Q_i[j];
657  if (quad_coef <= 0)
658  quad_coef = TAU;
659  double delta = (-G[i]-G[j])/quad_coef;
660  double diff = alpha[i] - alpha[j];
661  alpha[i] += delta;
662  alpha[j] += delta;
663 
664  if(diff > 0)
665  {
666  if(alpha[j] < 0)
667  {
668  alpha[j] = 0;
669  alpha[i] = diff;
670  }
671  }
672  else
673  {
674  if(alpha[i] < 0)
675  {
676  alpha[i] = 0;
677  alpha[j] = -diff;
678  }
679  }
680  if(diff > C_i - C_j)
681  {
682  if(alpha[i] > C_i)
683  {
684  alpha[i] = C_i;
685  alpha[j] = C_i - diff;
686  }
687  }
688  else
689  {
690  if(alpha[j] > C_j)
691  {
692  alpha[j] = C_j;
693  alpha[i] = C_j + diff;
694  }
695  }
696  }
697  else
698  {
699  double quad_coef = QD[i]+QD[j]-2*Q_i[j];
700  if (quad_coef <= 0)
701  quad_coef = TAU;
702  double delta = (G[i]-G[j])/quad_coef;
703  double sum = alpha[i] + alpha[j];
704  alpha[i] -= delta;
705  alpha[j] += delta;
706 
707  if(sum > C_i)
708  {
709  if(alpha[i] > C_i)
710  {
711  alpha[i] = C_i;
712  alpha[j] = sum - C_i;
713  }
714  }
715  else
716  {
717  if(alpha[j] < 0)
718  {
719  alpha[j] = 0;
720  alpha[i] = sum;
721  }
722  }
723  if(sum > C_j)
724  {
725  if(alpha[j] > C_j)
726  {
727  alpha[j] = C_j;
728  alpha[i] = sum - C_j;
729  }
730  }
731  else
732  {
733  if(alpha[i] < 0)
734  {
735  alpha[i] = 0;
736  alpha[j] = sum;
737  }
738  }
739  }
740 
741  // update G
742 
743  double delta_alpha_i = alpha[i] - old_alpha_i;
744  double delta_alpha_j = alpha[j] - old_alpha_j;
745 
746  for(int k=0;k<active_size;k++)
747  {
748  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
749  }
750 
751  // update alpha_status and G_bar
752 
753  {
754  bool ui = is_upper_bound(i);
755  bool uj = is_upper_bound(j);
756  update_alpha_status(i);
757  update_alpha_status(j);
758  int k;
759  if(ui != is_upper_bound(i))
760  {
761  Q_i = Q.get_Q(i,l);
762  if(ui)
763  for(k=0;k<l;k++)
764  G_bar[k] -= C_i * Q_i[k];
765  else
766  for(k=0;k<l;k++)
767  G_bar[k] += C_i * Q_i[k];
768  }
769 
770  if(uj != is_upper_bound(j))
771  {
772  Q_j = Q.get_Q(j,l);
773  if(uj)
774  for(k=0;k<l;k++)
775  G_bar[k] -= C_j * Q_j[k];
776  else
777  for(k=0;k<l;k++)
778  G_bar[k] += C_j * Q_j[k];
779  }
780  }
781  }
782 
783  if(iter >= max_iter)
784  {
785  if(active_size < l)
786  {
787  // reconstruct the whole gradient to calculate objective value
788  reconstruct_gradient();
789  active_size = l;
790  info("*");
791  }
792  fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
793  }
794 
795  // calculate rho
796 
797  si->rho = calculate_rho();
798 
799  // calculate objective value
800  {
801  double v = 0;
802  int i;
803  for(i=0;i<l;i++)
804  v += alpha[i] * (G[i] + p[i]);
805 
806  si->obj = v/2;
807  }
808 
809  // put back the solution
810  {
811  for(int i=0;i<l;i++)
812  alpha_[active_set[i]] = alpha[i];
813  }
814 
815  // juggle everything back
816  /*{
817  for(int i=0;i<l;i++)
818  while(active_set[i] != i)
819  swap_index(i,active_set[i]);
820  // or Q.swap_index(i,active_set[i]);
821  }*/
822 
823  for(int i=0;i<l;i++)
824  si->upper_bound[i] = C[i];
825 
826  info("\noptimization finished, #iter = %d\n",iter);
827 
828  delete[] p;
829  delete[] y;
830  delete[] C;
831  delete[] alpha;
832  delete[] alpha_status;
833  delete[] active_set;
834  delete[] G;
835  delete[] G_bar;
836 }
837 
838 // return 1 if already optimal, return 0 otherwise
839 int Solver::select_working_set(int &out_i, int &out_j)
840 {
841  // return i,j such that
842  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
843  // j: minimizes the decrease of obj value
844  // (if quadratic coefficeint <= 0, replace it with tau)
845  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
846 
847  double Gmax = -INF;
848  double Gmax2 = -INF;
849  int Gmax_idx = -1;
850  int Gmin_idx = -1;
851  double obj_diff_min = INF;
852 
853  for(int t=0;t<active_size;t++)
854  if(y[t]==+1)
855  {
856  if(!is_upper_bound(t))
857  if(-G[t] >= Gmax)
858  {
859  Gmax = -G[t];
860  Gmax_idx = t;
861  }
862  }
863  else
864  {
865  if(!is_lower_bound(t))
866  if(G[t] >= Gmax)
867  {
868  Gmax = G[t];
869  Gmax_idx = t;
870  }
871  }
872 
873  int i = Gmax_idx;
874  const Qfloat *Q_i = NULL;
875  if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
876  Q_i = Q->get_Q(i,active_size);
877 
878  for(int j=0;j<active_size;j++)
879  {
880  if(y[j]==+1)
881  {
882  if (!is_lower_bound(j))
883  {
884  double grad_diff=Gmax+G[j];
885  if (G[j] >= Gmax2)
886  Gmax2 = G[j];
887  if (grad_diff > 0)
888  {
889  double obj_diff;
890  double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
891  if (quad_coef > 0)
892  obj_diff = -(grad_diff*grad_diff)/quad_coef;
893  else
894  obj_diff = -(grad_diff*grad_diff)/TAU;
895 
896  if (obj_diff <= obj_diff_min)
897  {
898  Gmin_idx=j;
899  obj_diff_min = obj_diff;
900  }
901  }
902  }
903  }
904  else
905  {
906  if (!is_upper_bound(j))
907  {
908  double grad_diff= Gmax-G[j];
909  if (-G[j] >= Gmax2)
910  Gmax2 = -G[j];
911  if (grad_diff > 0)
912  {
913  double obj_diff;
914  double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
915  if (quad_coef > 0)
916  obj_diff = -(grad_diff*grad_diff)/quad_coef;
917  else
918  obj_diff = -(grad_diff*grad_diff)/TAU;
919 
920  if (obj_diff <= obj_diff_min)
921  {
922  Gmin_idx=j;
923  obj_diff_min = obj_diff;
924  }
925  }
926  }
927  }
928  }
929 
930  if(Gmax+Gmax2 < eps)
931  return 1;
932 
933  out_i = Gmax_idx;
934  out_j = Gmin_idx;
935  return 0;
936 }
937 
938 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
939 {
940  if(is_upper_bound(i))
941  {
942  if(y[i]==+1)
943  return(-G[i] > Gmax1);
944  else
945  return(-G[i] > Gmax2);
946  }
947  else if(is_lower_bound(i))
948  {
949  if(y[i]==+1)
950  return(G[i] > Gmax2);
951  else
952  return(G[i] > Gmax1);
953  }
954  else
955  return(false);
956 }
957 
958 void Solver::do_shrinking()
959 {
960  int i;
961  double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
962  double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
963 
964  // find maximal violating pair first
965  for(i=0;i<active_size;i++)
966  {
967  if(y[i]==+1)
968  {
969  if(!is_upper_bound(i))
970  {
971  if(-G[i] >= Gmax1)
972  Gmax1 = -G[i];
973  }
974  if(!is_lower_bound(i))
975  {
976  if(G[i] >= Gmax2)
977  Gmax2 = G[i];
978  }
979  }
980  else
981  {
982  if(!is_upper_bound(i))
983  {
984  if(-G[i] >= Gmax2)
985  Gmax2 = -G[i];
986  }
987  if(!is_lower_bound(i))
988  {
989  if(G[i] >= Gmax1)
990  Gmax1 = G[i];
991  }
992  }
993  }
994 
995  if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
996  {
997  unshrink = true;
998  reconstruct_gradient();
999  active_size = l;
1000  info("*");
1001  }
1002 
1003  for(i=0;i<active_size;i++)
1004  if (be_shrunk(i, Gmax1, Gmax2))
1005  {
1006  active_size--;
1007  while (active_size > i)
1008  {
1009  if (!be_shrunk(active_size, Gmax1, Gmax2))
1010  {
1011  swap_index(i,active_size);
1012  break;
1013  }
1014  active_size--;
1015  }
1016  }
1017 }
1018 
1019 double Solver::calculate_rho()
1020 {
1021  double r;
1022  int nr_free = 0;
1023  double ub = INF, lb = -INF, sum_free = 0;
1024  for(int i=0;i<active_size;i++)
1025  {
1026  double yG = y[i]*G[i];
1027 
1028  if(is_upper_bound(i))
1029  {
1030  if(y[i]==-1)
1031  ub = min(ub,yG);
1032  else
1033  lb = max(lb,yG);
1034  }
1035  else if(is_lower_bound(i))
1036  {
1037  if(y[i]==+1)
1038  ub = min(ub,yG);
1039  else
1040  lb = max(lb,yG);
1041  }
1042  else
1043  {
1044  ++nr_free;
1045  sum_free += yG;
1046  }
1047  }
1048 
1049  if(nr_free>0)
1050  r = sum_free/nr_free;
1051  else
1052  r = (ub+lb)/2;
1053 
1054  return r;
1055 }
1056 
1057 //
1058 // Solver for nu-svm classification and regression
1059 //
1060 // additional constraint: e^T \alpha = constant
1061 //
1062 class Solver_NU: public Solver
1063 {
1064 public:
1065  Solver_NU() {}
1066  void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1067  double *alpha, double* C_, double eps,
1068  SolutionInfo* si, int shrinking)
1069  {
1070  this->si = si;
1071  Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking);
1072  }
1073 private:
1074  SolutionInfo *si;
1075  int select_working_set(int &i, int &j);
1076  double calculate_rho();
1077  bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1078  void do_shrinking();
1079 };
1080 
1081 // return 1 if already optimal, return 0 otherwise
1082 int Solver_NU::select_working_set(int &out_i, int &out_j)
1083 {
1084  // return i,j such that y_i = y_j and
1085  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1086  // j: minimizes the decrease of obj value
1087  // (if quadratic coefficeint <= 0, replace it with tau)
1088  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1089 
1090  double Gmaxp = -INF;
1091  double Gmaxp2 = -INF;
1092  int Gmaxp_idx = -1;
1093 
1094  double Gmaxn = -INF;
1095  double Gmaxn2 = -INF;
1096  int Gmaxn_idx = -1;
1097 
1098  int Gmin_idx = -1;
1099  double obj_diff_min = INF;
1100 
1101  for(int t=0;t<active_size;t++)
1102  if(y[t]==+1)
1103  {
1104  if(!is_upper_bound(t))
1105  if(-G[t] >= Gmaxp)
1106  {
1107  Gmaxp = -G[t];
1108  Gmaxp_idx = t;
1109  }
1110  }
1111  else
1112  {
1113  if(!is_lower_bound(t))
1114  if(G[t] >= Gmaxn)
1115  {
1116  Gmaxn = G[t];
1117  Gmaxn_idx = t;
1118  }
1119  }
1120 
1121  int ip = Gmaxp_idx;
1122  int in = Gmaxn_idx;
1123  const Qfloat *Q_ip = NULL;
1124  const Qfloat *Q_in = NULL;
1125  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1126  Q_ip = Q->get_Q(ip,active_size);
1127  if(in != -1)
1128  Q_in = Q->get_Q(in,active_size);
1129 
1130  for(int j=0;j<active_size;j++)
1131  {
1132  if(y[j]==+1)
1133  {
1134  if (!is_lower_bound(j))
1135  {
1136  double grad_diff=Gmaxp+G[j];
1137  if (G[j] >= Gmaxp2)
1138  Gmaxp2 = G[j];
1139  if (grad_diff > 0)
1140  {
1141  double obj_diff;
1142  double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1143  if (quad_coef > 0)
1144  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1145  else
1146  obj_diff = -(grad_diff*grad_diff)/TAU;
1147 
1148  if (obj_diff <= obj_diff_min)
1149  {
1150  Gmin_idx=j;
1151  obj_diff_min = obj_diff;
1152  }
1153  }
1154  }
1155  }
1156  else
1157  {
1158  if (!is_upper_bound(j))
1159  {
1160  double grad_diff=Gmaxn-G[j];
1161  if (-G[j] >= Gmaxn2)
1162  Gmaxn2 = -G[j];
1163  if (grad_diff > 0)
1164  {
1165  double obj_diff;
1166  double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1167  if (quad_coef > 0)
1168  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1169  else
1170  obj_diff = -(grad_diff*grad_diff)/TAU;
1171 
1172  if (obj_diff <= obj_diff_min)
1173  {
1174  Gmin_idx=j;
1175  obj_diff_min = obj_diff;
1176  }
1177  }
1178  }
1179  }
1180  }
1181 
1182  if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1183  return 1;
1184 
1185  if (y[Gmin_idx] == +1)
1186  out_i = Gmaxp_idx;
1187  else
1188  out_i = Gmaxn_idx;
1189  out_j = Gmin_idx;
1190 
1191  return 0;
1192 }
1193 
1194 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1195 {
1196  if(is_upper_bound(i))
1197  {
1198  if(y[i]==+1)
1199  return(-G[i] > Gmax1);
1200  else
1201  return(-G[i] > Gmax4);
1202  }
1203  else if(is_lower_bound(i))
1204  {
1205  if(y[i]==+1)
1206  return(G[i] > Gmax2);
1207  else
1208  return(G[i] > Gmax3);
1209  }
1210  else
1211  return(false);
1212 }
1213 
1214 void Solver_NU::do_shrinking()
1215 {
1216  double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1217  double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1218  double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1219  double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1220 
1221  // find maximal violating pair first
1222  int i;
1223  for(i=0;i<active_size;i++)
1224  {
1225  if(!is_upper_bound(i))
1226  {
1227  if(y[i]==+1)
1228  {
1229  if(-G[i] > Gmax1) Gmax1 = -G[i];
1230  }
1231  else if(-G[i] > Gmax4) Gmax4 = -G[i];
1232  }
1233  if(!is_lower_bound(i))
1234  {
1235  if(y[i]==+1)
1236  {
1237  if(G[i] > Gmax2) Gmax2 = G[i];
1238  }
1239  else if(G[i] > Gmax3) Gmax3 = G[i];
1240  }
1241  }
1242 
1243  if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1244  {
1245  unshrink = true;
1246  reconstruct_gradient();
1247  active_size = l;
1248  }
1249 
1250  for(i=0;i<active_size;i++)
1251  if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1252  {
1253  active_size--;
1254  while (active_size > i)
1255  {
1256  if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1257  {
1258  swap_index(i,active_size);
1259  break;
1260  }
1261  active_size--;
1262  }
1263  }
1264 }
1265 
1266 double Solver_NU::calculate_rho()
1267 {
1268  int nr_free1 = 0,nr_free2 = 0;
1269  double ub1 = INF, ub2 = INF;
1270  double lb1 = -INF, lb2 = -INF;
1271  double sum_free1 = 0, sum_free2 = 0;
1272 
1273  for(int i=0;i<active_size;i++)
1274  {
1275  if(y[i]==+1)
1276  {
1277  if(is_upper_bound(i))
1278  lb1 = max(lb1,G[i]);
1279  else if(is_lower_bound(i))
1280  ub1 = min(ub1,G[i]);
1281  else
1282  {
1283  ++nr_free1;
1284  sum_free1 += G[i];
1285  }
1286  }
1287  else
1288  {
1289  if(is_upper_bound(i))
1290  lb2 = max(lb2,G[i]);
1291  else if(is_lower_bound(i))
1292  ub2 = min(ub2,G[i]);
1293  else
1294  {
1295  ++nr_free2;
1296  sum_free2 += G[i];
1297  }
1298  }
1299  }
1300 
1301  double r1,r2;
1302  if(nr_free1 > 0)
1303  r1 = sum_free1/nr_free1;
1304  else
1305  r1 = (ub1+lb1)/2;
1306 
1307  if(nr_free2 > 0)
1308  r2 = sum_free2/nr_free2;
1309  else
1310  r2 = (ub2+lb2)/2;
1311 
1312  si->r = (r1+r2)/2;
1313  return (r1-r2)/2;
1314 }
1315 
1316 //
1317 // Q matrices for various formulations
1318 //
1319 class SVC_Q: public Kernel
1320 {
1321 public:
1322  SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1323  :Kernel(prob.l, prob.x, param)
1324  {
1325  clone(y,y_,prob.l);
1326  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1327  QD = new double[prob.l];
1328  for(int i=0;i<prob.l;i++)
1329  QD[i] = (this->*kernel_function)(i,i);
1330  }
1331 
1332  Qfloat *get_Q(int i, int len) const
1333  {
1334  Qfloat *data;
1335  int start, j;
1336  if((start = cache->get_data(i,&data,len)) < len)
1337  {
1338  for(j=start;j<len;j++)
1339  data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1340  }
1341  return data;
1342  }
1343 
1344  double *get_QD() const
1345  {
1346  return QD;
1347  }
1348 
1349  void swap_index(int i, int j) const
1350  {
1351  cache->swap_index(i,j);
1352  Kernel::swap_index(i,j);
1353  swap(y[i],y[j]);
1354  swap(QD[i],QD[j]);
1355  }
1356 
1357  ~SVC_Q()
1358  {
1359  delete[] y;
1360  delete cache;
1361  delete[] QD;
1362  }
1363 private:
1364  schar *y;
1365  Cache *cache;
1366  double *QD;
1367 };
1368 
1369 class ONE_CLASS_Q: public Kernel
1370 {
1371 public:
1372  ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1373  :Kernel(prob.l, prob.x, param)
1374  {
1375  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1376  QD = new double[prob.l];
1377  for(int i=0;i<prob.l;i++)
1378  QD[i] = (this->*kernel_function)(i,i);
1379  }
1380 
1381  Qfloat *get_Q(int i, int len) const
1382  {
1383  Qfloat *data;
1384  int start, j;
1385  if((start = cache->get_data(i,&data,len)) < len)
1386  {
1387  for(j=start;j<len;j++)
1388  data[j] = (Qfloat)(this->*kernel_function)(i,j);
1389  }
1390  return data;
1391  }
1392 
1393  double *get_QD() const
1394  {
1395  return QD;
1396  }
1397 
1398  void swap_index(int i, int j) const
1399  {
1400  cache->swap_index(i,j);
1401  Kernel::swap_index(i,j);
1402  swap(QD[i],QD[j]);
1403  }
1404 
1405  ~ONE_CLASS_Q()
1406  {
1407  delete cache;
1408  delete[] QD;
1409  }
1410 private:
1411  Cache *cache;
1412  double *QD;
1413 };
1414 
1415 class SVR_Q: public Kernel
1416 {
1417 public:
1418  SVR_Q(const svm_problem& prob, const svm_parameter& param)
1419  :Kernel(prob.l, prob.x, param)
1420  {
1421  l = prob.l;
1422  cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1423  QD = new double[2*l];
1424  sign = new schar[2*l];
1425  index = new int[2*l];
1426  for(int k=0;k<l;k++)
1427  {
1428  sign[k] = 1;
1429  sign[k+l] = -1;
1430  index[k] = k;
1431  index[k+l] = k;
1432  QD[k] = (this->*kernel_function)(k,k);
1433  QD[k+l] = QD[k];
1434  }
1435  buffer[0] = new Qfloat[2*l];
1436  buffer[1] = new Qfloat[2*l];
1437  next_buffer = 0;
1438  }
1439 
1440  void swap_index(int i, int j) const
1441  {
1442  swap(sign[i],sign[j]);
1443  swap(index[i],index[j]);
1444  swap(QD[i],QD[j]);
1445  }
1446 
1447  Qfloat *get_Q(int i, int len) const
1448  {
1449  Qfloat *data;
1450  int j, real_i = index[i];
1451  if(cache->get_data(real_i,&data,l) < l)
1452  {
1453  for(j=0;j<l;j++)
1454  data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1455  }
1456 
1457  // reorder and copy
1458  Qfloat *buf = buffer[next_buffer];
1459  next_buffer = 1 - next_buffer;
1460  schar si = sign[i];
1461  for(j=0;j<len;j++)
1462  buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1463  return buf;
1464  }
1465 
1466  double *get_QD() const
1467  {
1468  return QD;
1469  }
1470 
1471  ~SVR_Q()
1472  {
1473  delete cache;
1474  delete[] sign;
1475  delete[] index;
1476  delete[] buffer[0];
1477  delete[] buffer[1];
1478  delete[] QD;
1479  }
1480 private:
1481  int l;
1482  Cache *cache;
1483  schar *sign;
1484  int *index;
1485  mutable int next_buffer;
1486  Qfloat *buffer[2];
1487  double *QD;
1488 };
1489 
1490 //
1491 // construct and solve various formulations
1492 //
1493 static void solve_c_svc(
1494  const svm_problem *prob, const svm_parameter* param,
1495  double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1496 {
1497  int l = prob->l;
1498  double *minus_ones = new double[l];
1499  schar *y = new schar[l];
1500  double *C = new double[l];
1501 
1502  int i;
1503 
1504  for(i=0;i<l;i++)
1505  {
1506  alpha[i] = 0;
1507  minus_ones[i] = -1;
1508  if(prob->y[i] > 0)
1509  {
1510  y[i] = +1;
1511  C[i] = prob->W[i]*Cp;
1512  }
1513  else
1514  {
1515  y[i] = -1;
1516  C[i] = prob->W[i]*Cn;
1517  }
1518  }
1519 
1520  Solver s;
1521  s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1522  alpha, C, param->eps, si, param->shrinking);
1523 
1524  /*
1525  double sum_alpha=0;
1526  for(i=0;i<l;i++)
1527  sum_alpha += alpha[i];
1528  if (Cp==Cn)
1529  info("nu = %f\n", sum_alpha/(Cp*prob->l));
1530  */
1531 
1532  for(i=0;i<l;i++)
1533  alpha[i] *= y[i];
1534 
1535  delete[] C;
1536  delete[] minus_ones;
1537  delete[] y;
1538 }
1539 
1540 static void solve_nu_svc(
1541  const svm_problem *prob, const svm_parameter *param,
1542  double *alpha, Solver::SolutionInfo* si)
1543 {
1544  int i;
1545  int l = prob->l;
1546  double nu = param->nu;
1547 
1548  schar *y = new schar[l];
1549  double *C = new double[l];
1550 
1551  for(i=0;i<l;i++)
1552  {
1553  if(prob->y[i]>0)
1554  y[i] = +1;
1555  else
1556  y[i] = -1;
1557  C[i] = prob->W[i];
1558  }
1559 
1560  double nu_l = 0;
1561  for(i=0;i<l;i++) nu_l += nu*C[i];
1562  double sum_pos = nu_l/2;
1563  double sum_neg = nu_l/2;
1564 
1565  for(i=0;i<l;i++)
1566  if(y[i] == +1)
1567  {
1568  alpha[i] = min(C[i],sum_pos);
1569  sum_pos -= alpha[i];
1570  }
1571  else
1572  {
1573  alpha[i] = min(C[i],sum_neg);
1574  sum_neg -= alpha[i];
1575  }
1576 
1577  double *zeros = new double[l];
1578 
1579  for(i=0;i<l;i++)
1580  zeros[i] = 0;
1581 
1582  Solver_NU s;
1583  s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1584  alpha, C, param->eps, si, param->shrinking);
1585  double r = si->r;
1586 
1587  info("C = %f\n",1/r);
1588 
1589  for(i=0;i<l;i++)
1590  {
1591  alpha[i] *= y[i]/r;
1592  si->upper_bound[i] /= r;
1593  }
1594 
1595  si->rho /= r;
1596  si->obj /= (r*r);
1597 
1598  delete[] C;
1599  delete[] y;
1600  delete[] zeros;
1601 }
1602 
1603 static void solve_one_class(
1604  const svm_problem *prob, const svm_parameter *param,
1605  double *alpha, Solver::SolutionInfo* si)
1606 {
1607  int l = prob->l;
1608  double *zeros = new double[l];
1609  schar *ones = new schar[l];
1610  double *C = new double[l];
1611  int i;
1612 
1613  double nu_l = 0;
1614 
1615  for(i=0;i<l;i++)
1616  {
1617  C[i] = prob->W[i];
1618  nu_l += C[i] * param->nu;
1619  }
1620 
1621  i = 0;
1622  while(nu_l > 0)
1623  {
1624  alpha[i] = min(C[i],nu_l);
1625  nu_l -= alpha[i];
1626  ++i;
1627  }
1628  for(;i<l;i++)
1629  alpha[i] = 0;
1630 
1631  for(i=0;i<l;i++)
1632  {
1633  zeros[i] = 0;
1634  ones[i] = 1;
1635  }
1636 
1637  Solver s;
1638  s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1639  alpha, C, param->eps, si, param->shrinking);
1640 
1641  delete[] C;
1642  delete[] zeros;
1643  delete[] ones;
1644 }
1645 
1646 static void solve_epsilon_svr(
1647  const svm_problem *prob, const svm_parameter *param,
1648  double *alpha, Solver::SolutionInfo* si)
1649 {
1650  int l = prob->l;
1651  double *alpha2 = new double[2*l];
1652  double *linear_term = new double[2*l];
1653  double *C = new double[2*l];
1654  schar *y = new schar[2*l];
1655  int i;
1656 
1657  for(i=0;i<l;i++)
1658  {
1659  alpha2[i] = 0;
1660  linear_term[i] = param->p - prob->y[i];
1661  y[i] = 1;
1662  C[i] = prob->W[i]*param->C;
1663 
1664  alpha2[i+l] = 0;
1665  linear_term[i+l] = param->p + prob->y[i];
1666  y[i+l] = -1;
1667  C[i+l] = prob->W[i]*param->C;
1668  }
1669 
1670  Solver s;
1671  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1672  alpha2, C, param->eps, si, param->shrinking);
1673  double sum_alpha = 0;
1674  for(i=0;i<l;i++)
1675  {
1676  alpha[i] = alpha2[i] - alpha2[i+l];
1677  sum_alpha += fabs(alpha[i]);
1678  }
1679  //info("nu = %f\n",sum_alpha/(param->C*l));
1680  delete[] alpha2;
1681  delete[] linear_term;
1682  delete[] C;
1683  delete[] y;
1684 }
1685 
1686 static void solve_nu_svr(
1687  const svm_problem *prob, const svm_parameter *param,
1688  double *alpha, Solver::SolutionInfo* si)
1689 {
1690  int l = prob->l;
1691  double *C = new double[2*l];
1692  double *alpha2 = new double[2*l];
1693  double *linear_term = new double[2*l];
1694  schar *y = new schar[2*l];
1695  int i;
1696 
1697  double sum = 0;
1698  for(i=0;i<l;i++)
1699  {
1700  C[i] = C[i+l] = prob->W[i]*param->C;
1701  sum += C[i] * param->nu;
1702  }
1703  sum /= 2;
1704 
1705  for(i=0;i<l;i++)
1706  {
1707  alpha2[i] = alpha2[i+l] = min(sum,C[i]);
1708  sum -= alpha2[i];
1709 
1710  linear_term[i] = - prob->y[i];
1711  y[i] = 1;
1712 
1713  linear_term[i+l] = prob->y[i];
1714  y[i+l] = -1;
1715  }
1716 
1717  Solver_NU s;
1718  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1719  alpha2, C, param->eps, si, param->shrinking);
1720 
1721  info("epsilon = %f\n",-si->r);
1722 
1723  for(i=0;i<l;i++)
1724  alpha[i] = alpha2[i] - alpha2[i+l];
1725 
1726  delete[] alpha2;
1727  delete[] linear_term;
1728  delete[] C;
1729  delete[] y;
1730 }
1731 
1732 //
1733 // decision_function
1734 //
1735 struct decision_function
1736 {
1737  double *alpha;
1738  double rho;
1739 };
1740 
1741 static decision_function svm_train_one(
1742  const svm_problem *prob, const svm_parameter *param,
1743  double Cp, double Cn)
1744 {
1745  double *alpha = Malloc(double,prob->l);
1746  Solver::SolutionInfo si;
1747  switch(param->svm_type)
1748  {
1749  case C_SVC:
1750  si.upper_bound = Malloc(double,prob->l);
1751  solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1752  break;
1753  case NU_SVC:
1754  si.upper_bound = Malloc(double,prob->l);
1755  solve_nu_svc(prob,param,alpha,&si);
1756  break;
1757  case ONE_CLASS:
1758  si.upper_bound = Malloc(double,prob->l);
1759  solve_one_class(prob,param,alpha,&si);
1760  break;
1761  case EPSILON_SVR:
1762  si.upper_bound = Malloc(double,2*prob->l);
1763  solve_epsilon_svr(prob,param,alpha,&si);
1764  break;
1765  case NU_SVR:
1766  si.upper_bound = Malloc(double,2*prob->l);
1767  solve_nu_svr(prob,param,alpha,&si);
1768  break;
1769  }
1770 
1771  info("obj = %f, rho = %f\n",si.obj,si.rho);
1772 
1773  // output SVs
1774 
1775  int nSV = 0;
1776  int nBSV = 0;
1777  for(int i=0;i<prob->l;i++)
1778  {
1779  if(fabs(alpha[i]) > 0)
1780  {
1781  ++nSV;
1782  if(prob->y[i] > 0)
1783  {
1784  if(fabs(alpha[i]) >= si.upper_bound[i])
1785  ++nBSV;
1786  }
1787  else
1788  {
1789  if(fabs(alpha[i]) >= si.upper_bound[i])
1790  ++nBSV;
1791  }
1792  }
1793  }
1794 
1795  free(si.upper_bound);
1796 
1797  info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1798 
1799  decision_function f;
1800  f.alpha = alpha;
1801  f.rho = si.rho;
1802  return f;
1803 }
1804 
1805 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1806 static void sigmoid_train(
1807  int l, const double *dec_values, const double *labels,
1808  double& A, double& B)
1809 {
1810  double prior1=0, prior0 = 0;
1811  int i;
1812 
1813  for (i=0;i<l;i++)
1814  if (labels[i] > 0) prior1+=1;
1815  else prior0+=1;
1816 
1817  int max_iter=100; // Maximal number of iterations
1818  double min_step=1e-10; // Minimal step taken in line search
1819  double sigma=1e-12; // For numerically strict PD of Hessian
1820  double eps=1e-5;
1821  double hiTarget=(prior1+1.0)/(prior1+2.0);
1822  double loTarget=1/(prior0+2.0);
1823  double *t=Malloc(double,l);
1824  double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1825  double newA,newB,newf,d1,d2;
1826  int iter;
1827 
1828  // Initial Point and Initial Fun Value
1829  A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1830  double fval = 0.0;
1831 
1832  for (i=0;i<l;i++)
1833  {
1834  if (labels[i]>0) t[i]=hiTarget;
1835  else t[i]=loTarget;
1836  fApB = dec_values[i]*A+B;
1837  if (fApB>=0)
1838  fval += t[i]*fApB + log(1+exp(-fApB));
1839  else
1840  fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1841  }
1842  for (iter=0;iter<max_iter;iter++)
1843  {
1844  // Update Gradient and Hessian (use H' = H + sigma I)
1845  h11=sigma; // numerically ensures strict PD
1846  h22=sigma;
1847  h21=0.0;g1=0.0;g2=0.0;
1848  for (i=0;i<l;i++)
1849  {
1850  fApB = dec_values[i]*A+B;
1851  if (fApB >= 0)
1852  {
1853  p=exp(-fApB)/(1.0+exp(-fApB));
1854  q=1.0/(1.0+exp(-fApB));
1855  }
1856  else
1857  {
1858  p=1.0/(1.0+exp(fApB));
1859  q=exp(fApB)/(1.0+exp(fApB));
1860  }
1861  d2=p*q;
1862  h11+=dec_values[i]*dec_values[i]*d2;
1863  h22+=d2;
1864  h21+=dec_values[i]*d2;
1865  d1=t[i]-p;
1866  g1+=dec_values[i]*d1;
1867  g2+=d1;
1868  }
1869 
1870  // Stopping Criteria
1871  if (fabs(g1)<eps && fabs(g2)<eps)
1872  break;
1873 
1874  // Finding Newton direction: -inv(H') * g
1875  det=h11*h22-h21*h21;
1876  dA=-(h22*g1 - h21 * g2) / det;
1877  dB=-(-h21*g1+ h11 * g2) / det;
1878  gd=g1*dA+g2*dB;
1879 
1880 
1881  stepsize = 1; // Line Search
1882  while (stepsize >= min_step)
1883  {
1884  newA = A + stepsize * dA;
1885  newB = B + stepsize * dB;
1886 
1887  // New function value
1888  newf = 0.0;
1889  for (i=0;i<l;i++)
1890  {
1891  fApB = dec_values[i]*newA+newB;
1892  if (fApB >= 0)
1893  newf += t[i]*fApB + log(1+exp(-fApB));
1894  else
1895  newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1896  }
1897  // Check sufficient decrease
1898  if (newf<fval+0.0001*stepsize*gd)
1899  {
1900  A=newA;B=newB;fval=newf;
1901  break;
1902  }
1903  else
1904  stepsize = stepsize / 2.0;
1905  }
1906 
1907  if (stepsize < min_step)
1908  {
1909  info("Line search fails in two-class probability estimates\n");
1910  break;
1911  }
1912  }
1913 
1914  if (iter>=max_iter)
1915  info("Reaching maximal iterations in two-class probability estimates\n");
1916  free(t);
1917 }
1918 
1919 static double sigmoid_predict(double decision_value, double A, double B)
1920 {
1921  double fApB = decision_value*A+B;
1922  // 1-p used later; avoid catastrophic cancellation
1923  if (fApB >= 0)
1924  return exp(-fApB)/(1.0+exp(-fApB));
1925  else
1926  return 1.0/(1+exp(fApB)) ;
1927 }
1928 
1929 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1930 static void multiclass_probability(int k, double **r, double *p)
1931 {
1932  int t,j;
1933  int iter = 0, max_iter=max(100,k);
1934  double **Q=Malloc(double *,k);
1935  double *Qp=Malloc(double,k);
1936  double pQp, eps=0.005/k;
1937 
1938  for (t=0;t<k;t++)
1939  {
1940  p[t]=1.0/k; // Valid if k = 1
1941  Q[t]=Malloc(double,k);
1942  Q[t][t]=0;
1943  for (j=0;j<t;j++)
1944  {
1945  Q[t][t]+=r[j][t]*r[j][t];
1946  Q[t][j]=Q[j][t];
1947  }
1948  for (j=t+1;j<k;j++)
1949  {
1950  Q[t][t]+=r[j][t]*r[j][t];
1951  Q[t][j]=-r[j][t]*r[t][j];
1952  }
1953  }
1954  for (iter=0;iter<max_iter;iter++)
1955  {
1956  // stopping condition, recalculate QP,pQP for numerical accuracy
1957  pQp=0;
1958  for (t=0;t<k;t++)
1959  {
1960  Qp[t]=0;
1961  for (j=0;j<k;j++)
1962  Qp[t]+=Q[t][j]*p[j];
1963  pQp+=p[t]*Qp[t];
1964  }
1965  double max_error=0;
1966  for (t=0;t<k;t++)
1967  {
1968  double error=fabs(Qp[t]-pQp);
1969  if (error>max_error)
1970  max_error=error;
1971  }
1972  if (max_error<eps) break;
1973 
1974  for (t=0;t<k;t++)
1975  {
1976  double diff=(-Qp[t]+pQp)/Q[t][t];
1977  p[t]+=diff;
1978  pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1979  for (j=0;j<k;j++)
1980  {
1981  Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1982  p[j]/=(1+diff);
1983  }
1984  }
1985  }
1986  if (iter>=max_iter)
1987  info("Exceeds max_iter in multiclass_prob\n");
1988  for(t=0;t<k;t++) free(Q[t]);
1989  free(Q);
1990  free(Qp);
1991 }
1992 
1993 // Cross-validation decision values for probability estimates
1995  const svm_problem *prob, const svm_parameter *param,
1996  double Cp, double Cn, double& probA, double& probB)
1997 {
1998  int i;
1999  int nr_fold = 5;
2000  int *perm = Malloc(int,prob->l);
2001  double *dec_values = Malloc(double,prob->l);
2002 
2003  // random shuffle
2004  for(i=0;i<prob->l;i++) perm[i]=i;
2005  for(i=0;i<prob->l;i++)
2006  {
2007  int j = i+rand()%(prob->l-i);
2008  swap(perm[i],perm[j]);
2009  }
2010  for(i=0;i<nr_fold;i++)
2011  {
2012  int begin = i*prob->l/nr_fold;
2013  int end = (i+1)*prob->l/nr_fold;
2014  int j,k;
2015  struct svm_problem subprob;
2016 
2017  subprob.l = prob->l-(end-begin);
2018  subprob.x = Malloc(struct svm_node*,subprob.l);
2019  subprob.y = Malloc(double,subprob.l);
2020  subprob.W = Malloc(double,subprob.l);
2021 
2022  k=0;
2023  for(j=0;j<begin;j++)
2024  {
2025  subprob.x[k] = prob->x[perm[j]];
2026  subprob.y[k] = prob->y[perm[j]];
2027  subprob.W[k] = prob->W[perm[j]];
2028  ++k;
2029  }
2030  for(j=end;j<prob->l;j++)
2031  {
2032  subprob.x[k] = prob->x[perm[j]];
2033  subprob.y[k] = prob->y[perm[j]];
2034  subprob.W[k] = prob->W[perm[j]];
2035  ++k;
2036  }
2037  int p_count=0,n_count=0;
2038  for(j=0;j<k;j++)
2039  if(subprob.y[j]>0)
2040  p_count++;
2041  else
2042  n_count++;
2043 
2044  if(p_count==0 && n_count==0)
2045  for(j=begin;j<end;j++)
2046  dec_values[perm[j]] = 0;
2047  else if(p_count > 0 && n_count == 0)
2048  for(j=begin;j<end;j++)
2049  dec_values[perm[j]] = 1;
2050  else if(p_count == 0 && n_count > 0)
2051  for(j=begin;j<end;j++)
2052  dec_values[perm[j]] = -1;
2053  else
2054  {
2055  svm_parameter subparam = *param;
2056  subparam.probability=0;
2057  subparam.C=1.0;
2058  subparam.nr_weight=2;
2059  subparam.weight_label = Malloc(int,2);
2060  subparam.weight = Malloc(double,2);
2061  subparam.weight_label[0]=+1;
2062  subparam.weight_label[1]=-1;
2063  subparam.weight[0]=Cp;
2064  subparam.weight[1]=Cn;
2065  struct svm_model *submodel = svm_train(&subprob,&subparam);
2066  for(j=begin;j<end;j++)
2067  {
2068  svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
2069  // ensure +1 -1 order; reason not using CV subroutine
2070  dec_values[perm[j]] *= submodel->label[0];
2071  }
2072  svm_free_and_destroy_model(&submodel);
2073  svm_destroy_param(&subparam);
2074  }
2075  free(subprob.x);
2076  free(subprob.y);
2077  free(subprob.W);
2078  }
2079  sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
2080  free(dec_values);
2081  free(perm);
2082 }
2083 
2084 // Return parameter of a Laplace distribution
2085 static double svm_svr_probability(
2086  const svm_problem *prob, const svm_parameter *param)
2087 {
2088  int i;
2089  int nr_fold = 5;
2090  double *ymv = Malloc(double,prob->l);
2091  double mae = 0;
2092 
2093  svm_parameter newparam = *param;
2094  newparam.probability = 0;
2095  svm_cross_validation(prob,&newparam,nr_fold,ymv);
2096  for(i=0;i<prob->l;i++)
2097  {
2098  ymv[i]=prob->y[i]-ymv[i];
2099  mae += fabs(ymv[i]);
2100  }
2101  mae /= prob->l;
2102  double std=sqrt(2*mae*mae);
2103  int count=0;
2104  mae=0;
2105  for(i=0;i<prob->l;i++)
2106  if (fabs(ymv[i]) > 5*std)
2107  count=count+1;
2108  else
2109  mae+=fabs(ymv[i]);
2110  mae /= (prob->l-count);
2111  info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2112  free(ymv);
2113  return mae;
2114 }
2115 
2116 
2117 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2118 // perm, length l, must be allocated before calling this subroutine
2119 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2120 {
2121  int l = prob->l;
2122  int max_nr_class = 16;
2123  int nr_class = 0;
2124  int *label = Malloc(int,max_nr_class);
2125  int *count = Malloc(int,max_nr_class);
2126  int *data_label = Malloc(int,l);
2127  int i;
2128 
2129  for(i=0;i<l;i++)
2130  {
2131  int this_label = (int)prob->y[i];
2132  int j;
2133  for(j=0;j<nr_class;j++)
2134  {
2135  if(this_label == label[j])
2136  {
2137  ++count[j];
2138  break;
2139  }
2140  }
2141  data_label[i] = j;
2142  if(j == nr_class)
2143  {
2144  if(nr_class == max_nr_class)
2145  {
2146  max_nr_class *= 2;
2147  label = (int *)realloc(label,max_nr_class*sizeof(int));
2148  count = (int *)realloc(count,max_nr_class*sizeof(int));
2149  }
2150  label[nr_class] = this_label;
2151  count[nr_class] = 1;
2152  ++nr_class;
2153  }
2154  }
2155 
2156  //
2157  // Labels are ordered by their first occurrence in the training set.
2158  // However, for two-class sets with -1/+1 labels and -1 appears first,
2159  // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2160  //
2161  if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2162  {
2163  swap(label[0],label[1]);
2164  swap(count[0],count[1]);
2165  for(i=0;i<l;i++)
2166  {
2167  if(data_label[i] == 0)
2168  data_label[i] = 1;
2169  else
2170  data_label[i] = 0;
2171  }
2172  }
2173 
2174  int *start = Malloc(int,nr_class);
2175  start[0] = 0;
2176  for(i=1;i<nr_class;i++)
2177  start[i] = start[i-1]+count[i-1];
2178  for(i=0;i<l;i++)
2179  {
2180  perm[start[data_label[i]]] = i;
2181  ++start[data_label[i]];
2182  }
2183  start[0] = 0;
2184  for(i=1;i<nr_class;i++)
2185  start[i] = start[i-1]+count[i-1];
2186 
2187  *nr_class_ret = nr_class;
2188  *label_ret = label;
2189  *start_ret = start;
2190  *count_ret = count;
2191  free(data_label);
2192 }
2193 
2194 //
2195 // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
2196 //
2197 static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
2198 {
2199  int i;
2200  int l = 0;
2201  for(i=0;i<prob->l;i++)
2202  if(prob->W[i] > 0) l++;
2203  *newprob = *prob;
2204  newprob->l = l;
2205  newprob->x = Malloc(svm_node*,l);
2206  newprob->y = Malloc(double,l);
2207  newprob->W = Malloc(double,l);
2208 
2209  int j = 0;
2210  for(i=0;i<prob->l;i++)
2211  if(prob->W[i] > 0)
2212  {
2213  newprob->x[j] = prob->x[i];
2214  newprob->y[j] = prob->y[i];
2215  newprob->W[j] = prob->W[i];
2216  j++;
2217  }
2218 }
2219 
2220 //
2221 // Interface functions
2222 //
2223 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2224 {
2225  svm_problem newprob;
2226  remove_zero_weight(&newprob, prob);
2227  prob = &newprob;
2228 
2229  svm_model *model = Malloc(svm_model,1);
2230  model->param = *param;
2231  model->free_sv = 0; // XXX
2232 
2233  if(param->svm_type == ONE_CLASS ||
2234  param->svm_type == EPSILON_SVR ||
2235  param->svm_type == NU_SVR)
2236  {
2237  // regression or one-class-svm
2238  model->nr_class = 2;
2239  model->label = NULL;
2240  model->nSV = NULL;
2241  model->probA = NULL; model->probB = NULL;
2242  model->sv_coef = Malloc(double *,1);
2243 
2244  if(param->probability &&
2245  (param->svm_type == EPSILON_SVR ||
2246  param->svm_type == NU_SVR))
2247  {
2248  model->probA = Malloc(double,1);
2249  model->probA[0] = svm_svr_probability(prob,param);
2250  }
2251 
2252  decision_function f = svm_train_one(prob,param,0,0);
2253  model->rho = Malloc(double,1);
2254  model->rho[0] = f.rho;
2255 
2256  int nSV = 0;
2257  int i;
2258  for(i=0;i<prob->l;i++)
2259  if(fabs(f.alpha[i]) > 0) ++nSV;
2260  model->l = nSV;
2261  model->SV = Malloc(svm_node *,nSV);
2262  model->sv_coef[0] = Malloc(double,nSV);
2263  model->sv_indices = Malloc(int,nSV);
2264  int j = 0;
2265  for(i=0;i<prob->l;i++)
2266  if(fabs(f.alpha[i]) > 0)
2267  {
2268  model->SV[j] = prob->x[i];
2269  model->sv_coef[0][j] = f.alpha[i];
2270  model->sv_indices[j] = i+1;
2271  ++j;
2272  }
2273 
2274  free(f.alpha);
2275  }
2276  else
2277  {
2278  // classification
2279  int l = prob->l;
2280  int nr_class;
2281  int *label = NULL;
2282  int *start = NULL;
2283  int *count = NULL;
2284  int *perm = Malloc(int,l);
2285 
2286  // group training data of the same class
2287  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2288  if(nr_class == 1)
2289  info("WARNING: training data in only one class. See README for details.\n");
2290 
2291  svm_node **x = Malloc(svm_node *,l);
2292  double *W;
2293  W = Malloc(double,l);
2294 
2295  int i;
2296  for(i=0;i<l;i++)
2297  {
2298  x[i] = prob->x[perm[i]];
2299  W[i] = prob->W[perm[i]];
2300  }
2301 
2302  // calculate weighted C
2303 
2304  double *weighted_C = Malloc(double, nr_class);
2305  for(i=0;i<nr_class;i++)
2306  weighted_C[i] = param->C;
2307  for(i=0;i<param->nr_weight;i++)
2308  {
2309  int j;
2310  for(j=0;j<nr_class;j++)
2311  if(param->weight_label[i] == label[j])
2312  break;
2313  if(j == nr_class)
2314  fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2315  else
2316  weighted_C[j] *= param->weight[i];
2317  }
2318 
2319  // train k*(k-1)/2 models
2320 
2321  bool *nonzero = Malloc(bool,l);
2322  for(i=0;i<l;i++)
2323  nonzero[i] = false;
2324  decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2325 
2326  double *probA=NULL,*probB=NULL;
2327  if (param->probability)
2328  {
2329  probA=Malloc(double,nr_class*(nr_class-1)/2);
2330  probB=Malloc(double,nr_class*(nr_class-1)/2);
2331  }
2332 
2333  int p = 0;
2334  for(i=0;i<nr_class;i++)
2335  for(int j=i+1;j<nr_class;j++)
2336  {
2337  svm_problem sub_prob;
2338  int si = start[i], sj = start[j];
2339  int ci = count[i], cj = count[j];
2340  sub_prob.l = ci+cj;
2341  sub_prob.x = Malloc(svm_node *,sub_prob.l);
2342  sub_prob.y = Malloc(double,sub_prob.l);
2343  sub_prob.W = Malloc(double,sub_prob.l);
2344  int k;
2345  for(k=0;k<ci;k++)
2346  {
2347  sub_prob.x[k] = x[si+k];
2348  sub_prob.y[k] = +1;
2349  sub_prob.W[k] = W[si+k];
2350  }
2351  for(k=0;k<cj;k++)
2352  {
2353  sub_prob.x[ci+k] = x[sj+k];
2354  sub_prob.y[ci+k] = -1;
2355  sub_prob.W[ci+k] = W[sj+k];
2356  }
2357 
2358  if(param->probability)
2359  svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2360 
2361  f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2362  for(k=0;k<ci;k++)
2363  if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2364  nonzero[si+k] = true;
2365  for(k=0;k<cj;k++)
2366  if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2367  nonzero[sj+k] = true;
2368  free(sub_prob.x);
2369  free(sub_prob.y);
2370  free(sub_prob.W);
2371  ++p;
2372  }
2373 
2374  // build output
2375 
2376  model->nr_class = nr_class;
2377 
2378  model->label = Malloc(int,nr_class);
2379  for(i=0;i<nr_class;i++)
2380  model->label[i] = label[i];
2381 
2382  model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2383  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2384  model->rho[i] = f[i].rho;
2385 
2386  if(param->probability)
2387  {
2388  model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2389  model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2390  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2391  {
2392  model->probA[i] = probA[i];
2393  model->probB[i] = probB[i];
2394  }
2395  }
2396  else
2397  {
2398  model->probA=NULL;
2399  model->probB=NULL;
2400  }
2401 
2402  int total_sv = 0;
2403  int *nz_count = Malloc(int,nr_class);
2404  model->nSV = Malloc(int,nr_class);
2405  for(i=0;i<nr_class;i++)
2406  {
2407  int nSV = 0;
2408  for(int j=0;j<count[i];j++)
2409  if(nonzero[start[i]+j])
2410  {
2411  ++nSV;
2412  ++total_sv;
2413  }
2414  model->nSV[i] = nSV;
2415  nz_count[i] = nSV;
2416  }
2417 
2418  info("Total nSV = %d\n",total_sv);
2419 
2420  model->l = total_sv;
2421  model->SV = Malloc(svm_node *,total_sv);
2422  model->sv_indices = Malloc(int,total_sv);
2423  p = 0;
2424  for(i=0;i<l;i++)
2425  if(nonzero[i])
2426  {
2427  model->SV[p] = x[i];
2428  model->sv_indices[p++] = perm[i] + 1;
2429  }
2430 
2431  int *nz_start = Malloc(int,nr_class);
2432  nz_start[0] = 0;
2433  for(i=1;i<nr_class;i++)
2434  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2435 
2436  model->sv_coef = Malloc(double *,nr_class-1);
2437  for(i=0;i<nr_class-1;i++)
2438  model->sv_coef[i] = Malloc(double,total_sv);
2439 
2440  p = 0;
2441  for(i=0;i<nr_class;i++)
2442  for(int j=i+1;j<nr_class;j++)
2443  {
2444  // classifier (i,j): coefficients with
2445  // i are in sv_coef[j-1][nz_start[i]...],
2446  // j are in sv_coef[i][nz_start[j]...]
2447 
2448  int si = start[i];
2449  int sj = start[j];
2450  int ci = count[i];
2451  int cj = count[j];
2452 
2453  int q = nz_start[i];
2454  int k;
2455  for(k=0;k<ci;k++)
2456  if(nonzero[si+k])
2457  model->sv_coef[j-1][q++] = f[p].alpha[k];
2458  q = nz_start[j];
2459  for(k=0;k<cj;k++)
2460  if(nonzero[sj+k])
2461  model->sv_coef[i][q++] = f[p].alpha[ci+k];
2462  ++p;
2463  }
2464 
2465  free(label);
2466  free(probA);
2467  free(probB);
2468  free(count);
2469  free(perm);
2470  free(start);
2471  free(W);
2472  free(x);
2473  free(weighted_C);
2474  free(nonzero);
2475  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2476  free(f[i].alpha);
2477  free(f);
2478  free(nz_count);
2479  free(nz_start);
2480  }
2481  free(newprob.x);
2482  free(newprob.y);
2483  free(newprob.W);
2484  return model;
2485 }
2486 
2487 // Stratified cross validation
2488 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2489 {
2490  int i;
2491  int *fold_start;
2492  int l = prob->l;
2493  int *perm = Malloc(int,l);
2494  int nr_class;
2495  if (nr_fold > l)
2496  {
2497  nr_fold = l;
2498  fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2499  }
2500  fold_start = Malloc(int,nr_fold+1);
2501  // stratified cv may not give leave-one-out rate
2502  // Each class to l folds -> some folds may have zero elements
2503  if((param->svm_type == C_SVC ||
2504  param->svm_type == NU_SVC) && nr_fold < l)
2505  {
2506  int *start = NULL;
2507  int *label = NULL;
2508  int *count = NULL;
2509  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2510 
2511  // random shuffle and then data grouped by fold using the array perm
2512  int *fold_count = Malloc(int,nr_fold);
2513  int c;
2514  int *index = Malloc(int,l);
2515  for(i=0;i<l;i++)
2516  index[i]=perm[i];
2517  for (c=0; c<nr_class; c++)
2518  for(i=0;i<count[c];i++)
2519  {
2520  int j = i+rand()%(count[c]-i);
2521  swap(index[start[c]+j],index[start[c]+i]);
2522  }
2523  for(i=0;i<nr_fold;i++)
2524  {
2525  fold_count[i] = 0;
2526  for (c=0; c<nr_class;c++)
2527  fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2528  }
2529  fold_start[0]=0;
2530  for (i=1;i<=nr_fold;i++)
2531  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2532  for (c=0; c<nr_class;c++)
2533  for(i=0;i<nr_fold;i++)
2534  {
2535  int begin = start[c]+i*count[c]/nr_fold;
2536  int end = start[c]+(i+1)*count[c]/nr_fold;
2537  for(int j=begin;j<end;j++)
2538  {
2539  perm[fold_start[i]] = index[j];
2540  fold_start[i]++;
2541  }
2542  }
2543  fold_start[0]=0;
2544  for (i=1;i<=nr_fold;i++)
2545  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2546  free(start);
2547  free(label);
2548  free(count);
2549  free(index);
2550  free(fold_count);
2551  }
2552  else
2553  {
2554  for(i=0;i<l;i++) perm[i]=i;
2555  for(i=0;i<l;i++)
2556  {
2557  int j = i+rand()%(l-i);
2558  swap(perm[i],perm[j]);
2559  }
2560  for(i=0;i<=nr_fold;i++)
2561  fold_start[i]=i*l/nr_fold;
2562  }
2563 
2564  for(i=0;i<nr_fold;i++)
2565  {
2566  int begin = fold_start[i];
2567  int end = fold_start[i+1];
2568  int j,k;
2569  struct svm_problem subprob;
2570 
2571  subprob.l = l-(end-begin);
2572  subprob.x = Malloc(struct svm_node*,subprob.l);
2573  subprob.y = Malloc(double,subprob.l);
2574 
2575  subprob.W = Malloc(double,subprob.l);
2576  k=0;
2577  for(j=0;j<begin;j++)
2578  {
2579  subprob.x[k] = prob->x[perm[j]];
2580  subprob.y[k] = prob->y[perm[j]];
2581  subprob.W[k] = prob->W[perm[j]];
2582  ++k;
2583  }
2584  for(j=end;j<l;j++)
2585  {
2586  subprob.x[k] = prob->x[perm[j]];
2587  subprob.y[k] = prob->y[perm[j]];
2588  subprob.W[k] = prob->W[perm[j]];
2589  ++k;
2590  }
2591  struct svm_model *submodel = svm_train(&subprob,param);
2592  if(param->probability &&
2593  (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2594  {
2595  double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2596  for(j=begin;j<end;j++)
2597  target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2598  free(prob_estimates);
2599  }
2600  else
2601  for(j=begin;j<end;j++)
2602  target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2603  svm_free_and_destroy_model(&submodel);
2604  free(subprob.x);
2605  free(subprob.y);
2606  free(subprob.W);
2607  }
2608  free(fold_start);
2609  free(perm);
2610 }
2611 
2612 
2613 int svm_get_svm_type(const svm_model *model)
2614 {
2615  return model->param.svm_type;
2616 }
2617 
2618 int svm_get_nr_class(const svm_model *model)
2619 {
2620  return model->nr_class;
2621 }
2622 
2623 void svm_get_labels(const svm_model *model, int* label)
2624 {
2625  if (model->label != NULL)
2626  for(int i=0;i<model->nr_class;i++)
2627  label[i] = model->label[i];
2628 }
2629 
2630 void svm_get_sv_indices(const svm_model *model, int* indices)
2631 {
2632  if (model->sv_indices != NULL)
2633  for(int i=0;i<model->l;i++)
2634  indices[i] = model->sv_indices[i];
2635 }
2636 
2637 int svm_get_nr_sv(const svm_model *model)
2638 {
2639  return model->l;
2640 }
2641 
2643 {
2644  if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2645  model->probA!=NULL)
2646  return model->probA[0];
2647  else
2648  {
2649  fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2650  return 0;
2651  }
2652 }
2653 
2654 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2655 {
2656  int i;
2657  if(model->param.svm_type == ONE_CLASS ||
2658  model->param.svm_type == EPSILON_SVR ||
2659  model->param.svm_type == NU_SVR)
2660  {
2661  double *sv_coef = model->sv_coef[0];
2662  double sum = 0;
2663  for(i=0;i<model->l;i++)
2664  sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2665  sum -= model->rho[0];
2666  *dec_values = sum;
2667 
2668  if(model->param.svm_type == ONE_CLASS)
2669  return (sum>0)?1:-1;
2670  else
2671  return sum;
2672  }
2673  else
2674  {
2675  int nr_class = model->nr_class;
2676  int l = model->l;
2677 
2678  double *kvalue = Malloc(double,l);
2679  for(i=0;i<l;i++)
2680  kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2681 
2682  int *start = Malloc(int,nr_class);
2683  start[0] = 0;
2684  for(i=1;i<nr_class;i++)
2685  start[i] = start[i-1]+model->nSV[i-1];
2686 
2687  int *vote = Malloc(int,nr_class);
2688  for(i=0;i<nr_class;i++)
2689  vote[i] = 0;
2690 
2691  int p=0;
2692  for(i=0;i<nr_class;i++)
2693  for(int j=i+1;j<nr_class;j++)
2694  {
2695  double sum = 0;
2696  int si = start[i];
2697  int sj = start[j];
2698  int ci = model->nSV[i];
2699  int cj = model->nSV[j];
2700 
2701  int k;
2702  double *coef1 = model->sv_coef[j-1];
2703  double *coef2 = model->sv_coef[i];
2704  for(k=0;k<ci;k++)
2705  sum += coef1[si+k] * kvalue[si+k];
2706  for(k=0;k<cj;k++)
2707  sum += coef2[sj+k] * kvalue[sj+k];
2708  sum -= model->rho[p];
2709  dec_values[p] = sum;
2710 
2711  if(dec_values[p] > 0)
2712  ++vote[i];
2713  else
2714  ++vote[j];
2715  p++;
2716  }
2717 
2718  int vote_max_idx = 0;
2719  for(i=1;i<nr_class;i++)
2720  if(vote[i] > vote[vote_max_idx])
2721  vote_max_idx = i;
2722 
2723  free(kvalue);
2724  free(start);
2725  free(vote);
2726  return model->label[vote_max_idx];
2727  }
2728 }
2729 
2730 double svm_predict(const svm_model *model, const svm_node *x)
2731 {
2732  int nr_class = model->nr_class;
2733  double *dec_values;
2734  if(model->param.svm_type == ONE_CLASS ||
2735  model->param.svm_type == EPSILON_SVR ||
2736  model->param.svm_type == NU_SVR)
2737  dec_values = Malloc(double, 1);
2738  else
2739  dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2740  double pred_result = svm_predict_values(model, x, dec_values);
2741  free(dec_values);
2742  return pred_result;
2743 }
2744 
2746  const svm_model *model, const svm_node *x, double *prob_estimates)
2747 {
2748  if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2749  model->probA!=NULL && model->probB!=NULL)
2750  {
2751  int i;
2752  int nr_class = model->nr_class;
2753  double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2754  svm_predict_values(model, x, dec_values);
2755 
2756  double min_prob=1e-7;
2757  double **pairwise_prob=Malloc(double *,nr_class);
2758  for(i=0;i<nr_class;i++)
2759  pairwise_prob[i]=Malloc(double,nr_class);
2760  int k=0;
2761  for(i=0;i<nr_class;i++)
2762  for(int j=i+1;j<nr_class;j++)
2763  {
2764  pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2765  pairwise_prob[j][i]=1-pairwise_prob[i][j];
2766  k++;
2767  }
2768  multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2769 
2770  int prob_max_idx = 0;
2771  for(i=1;i<nr_class;i++)
2772  if(prob_estimates[i] > prob_estimates[prob_max_idx])
2773  prob_max_idx = i;
2774  for(i=0;i<nr_class;i++)
2775  free(pairwise_prob[i]);
2776  free(dec_values);
2777  free(pairwise_prob);
2778  return model->label[prob_max_idx];
2779  }
2780  else
2781  return svm_predict(model, x);
2782 }
2783 
2784 static const char *svm_type_table[] =
2785 {
2786  "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2787 };
2788 
2789 static const char *kernel_type_table[]=
2790 {
2791  "linear","polynomial","rbf","sigmoid","precomputed",NULL
2792 };
2793 
2794 int svm_save_model(const char *model_file_name, const svm_model *model)
2795 {
2796  FILE *fp = fopen(model_file_name,"w");
2797  if(fp==NULL) return -1;
2798 
2799  mitk::LocaleSwitch localeSwitch("C");
2800 
2801  const svm_parameter& param = model->param;
2802 
2803  fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2804  fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2805 
2806  if(param.kernel_type == POLY)
2807  fprintf(fp,"degree %d\n", param.degree);
2808 
2809  if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2810  fprintf(fp,"gamma %g\n", param.gamma);
2811 
2812  if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2813  fprintf(fp,"coef0 %g\n", param.coef0);
2814 
2815  int nr_class = model->nr_class;
2816  int l = model->l;
2817  fprintf(fp, "nr_class %d\n", nr_class);
2818  fprintf(fp, "total_sv %d\n",l);
2819 
2820  {
2821  fprintf(fp, "rho");
2822  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2823  fprintf(fp," %g",model->rho[i]);
2824  fprintf(fp, "\n");
2825  }
2826 
2827  if(model->label)
2828  {
2829  fprintf(fp, "label");
2830  for(int i=0;i<nr_class;i++)
2831  fprintf(fp," %d",model->label[i]);
2832  fprintf(fp, "\n");
2833  }
2834 
2835  if(model->probA) // regression has probA only
2836  {
2837  fprintf(fp, "probA");
2838  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2839  fprintf(fp," %g",model->probA[i]);
2840  fprintf(fp, "\n");
2841  }
2842  if(model->probB)
2843  {
2844  fprintf(fp, "probB");
2845  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2846  fprintf(fp," %g",model->probB[i]);
2847  fprintf(fp, "\n");
2848  }
2849 
2850  if(model->nSV)
2851  {
2852  fprintf(fp, "nr_sv");
2853  for(int i=0;i<nr_class;i++)
2854  fprintf(fp," %d",model->nSV[i]);
2855  fprintf(fp, "\n");
2856  }
2857 
2858  fprintf(fp, "SV\n");
2859  const double * const *sv_coef = model->sv_coef;
2860  const svm_node * const *SV = model->SV;
2861 
2862  for(int i=0;i<l;i++)
2863  {
2864  for(int j=0;j<nr_class-1;j++)
2865  fprintf(fp, "%.16g ",sv_coef[j][i]);
2866 
2867  const svm_node *p = SV[i];
2868 
2869  if(param.kernel_type == PRECOMPUTED)
2870  fprintf(fp,"0:%d ",(int)(p->value));
2871  else
2872  while(p->index != -1)
2873  {
2874  fprintf(fp,"%d:%.8g ",p->index,p->value);
2875  p++;
2876  }
2877  fprintf(fp, "\n");
2878  }
2879 
2880  if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2881  else return 0;
2882 }
2883 
2884 static char *line = NULL;
2885 static int max_line_len;
2886 
2887 static char* readline(FILE *input)
2888 {
2889  int len;
2890 
2891  if(fgets(line,max_line_len,input) == NULL)
2892  return NULL;
2893 
2894  while(strrchr(line,'\n') == NULL)
2895  {
2896  max_line_len *= 2;
2897  line = (char *) realloc(line,max_line_len);
2898  len = (int) strlen(line);
2899  if(fgets(line+len,max_line_len-len,input) == NULL)
2900  break;
2901  }
2902  return line;
2903 }
2904 
2905 //
2906 // FSCANF helps to handle fscanf failures.
2907 // Its do-while block avoids the ambiguity when
2908 // if (...)
2909 // FSCANF();
2910 // is used
2911 //
2912 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
2913 bool read_model_header(FILE *fp, svm_model* model)
2914 {
2915  svm_parameter& param = model->param;
2916  char cmd[81];
2917  while(1)
2918  {
2919  FSCANF(fp,"%80s",cmd);
2920 
2921  if(strcmp(cmd,"svm_type")==0)
2922  {
2923  FSCANF(fp,"%80s",cmd);
2924  int i;
2925  for(i=0;svm_type_table[i];i++)
2926  {
2927  if(strcmp(svm_type_table[i],cmd)==0)
2928  {
2929  param.svm_type=i;
2930  break;
2931  }
2932  }
2933  if(svm_type_table[i] == NULL)
2934  {
2935  fprintf(stderr,"unknown svm type.\n");
2936  return false;
2937  }
2938  }
2939  else if(strcmp(cmd,"kernel_type")==0)
2940  {
2941  FSCANF(fp,"%80s",cmd);
2942  int i;
2943  for(i=0;kernel_type_table[i];i++)
2944  {
2945  if(strcmp(kernel_type_table[i],cmd)==0)
2946  {
2947  param.kernel_type=i;
2948  break;
2949  }
2950  }
2951  if(kernel_type_table[i] == NULL)
2952  {
2953  fprintf(stderr,"unknown kernel function.\n");
2954  return false;
2955  }
2956  }
2957  else if(strcmp(cmd,"degree")==0)
2958  FSCANF(fp,"%d",&param.degree);
2959  else if(strcmp(cmd,"gamma")==0)
2960  FSCANF(fp,"%lf",&param.gamma);
2961  else if(strcmp(cmd,"coef0")==0)
2962  FSCANF(fp,"%lf",&param.coef0);
2963  else if(strcmp(cmd,"nr_class")==0)
2964  FSCANF(fp,"%d",&model->nr_class);
2965  else if(strcmp(cmd,"total_sv")==0)
2966  FSCANF(fp,"%d",&model->l);
2967  else if(strcmp(cmd,"rho")==0)
2968  {
2969  int n = model->nr_class * (model->nr_class-1)/2;
2970  model->rho = Malloc(double,n);
2971  for(int i=0;i<n;i++)
2972  FSCANF(fp,"%lf",&model->rho[i]);
2973  }
2974  else if(strcmp(cmd,"label")==0)
2975  {
2976  int n = model->nr_class;
2977  model->label = Malloc(int,n);
2978  for(int i=0;i<n;i++)
2979  FSCANF(fp,"%d",&model->label[i]);
2980  }
2981  else if(strcmp(cmd,"probA")==0)
2982  {
2983  int n = model->nr_class * (model->nr_class-1)/2;
2984  model->probA = Malloc(double,n);
2985  for(int i=0;i<n;i++)
2986  FSCANF(fp,"%lf",&model->probA[i]);
2987  }
2988  else if(strcmp(cmd,"probB")==0)
2989  {
2990  int n = model->nr_class * (model->nr_class-1)/2;
2991  model->probB = Malloc(double,n);
2992  for(int i=0;i<n;i++)
2993  FSCANF(fp,"%lf",&model->probB[i]);
2994  }
2995  else if(strcmp(cmd,"nr_sv")==0)
2996  {
2997  int n = model->nr_class;
2998  model->nSV = Malloc(int,n);
2999  for(int i=0;i<n;i++)
3000  FSCANF(fp,"%d",&model->nSV[i]);
3001  }
3002  else if(strcmp(cmd,"SV")==0)
3003  {
3004  while(1)
3005  {
3006  int c = getc(fp);
3007  if(c==EOF || c=='\n') break;
3008  }
3009  break;
3010  }
3011  else
3012  {
3013  fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
3014  return false;
3015  }
3016  }
3017 
3018  return true;
3019 
3020 }
3021 
3022 svm_model *svm_load_model(const char *model_file_name)
3023 {
3024  FILE *fp = fopen(model_file_name,"rb");
3025  if(fp==NULL) return NULL;
3026 
3027  mitk::LocaleSwitch localeSwitch("C");
3028 
3029  // read parameters
3030 
3031  svm_model *model = Malloc(svm_model,1);
3032  model->rho = NULL;
3033  model->probA = NULL;
3034  model->probB = NULL;
3035  model->sv_indices = NULL;
3036  model->label = NULL;
3037  model->nSV = NULL;
3038 
3039  // read header
3040  if (!read_model_header(fp, model))
3041  {
3042  fprintf(stderr, "ERROR: fscanf failed to read model\n");
3043  free(model->rho);
3044  free(model->label);
3045  free(model->nSV);
3046  free(model);
3047  return NULL;
3048  }
3049 
3050  // read sv_coef and SV
3051 
3052  int elements = 0;
3053  long pos = ftell(fp);
3054 
3055  max_line_len = 1024;
3056  line = Malloc(char,max_line_len);
3057  char *p,*endptr,*idx,*val;
3058 
3059  while(readline(fp)!=NULL)
3060  {
3061  p = strtok(line,":");
3062  while(1)
3063  {
3064  p = strtok(NULL,":");
3065  if(p == NULL)
3066  break;
3067  ++elements;
3068  }
3069  }
3070  elements += model->l;
3071 
3072  fseek(fp,pos,SEEK_SET);
3073 
3074  int m = model->nr_class - 1;
3075  int l = model->l;
3076  model->sv_coef = Malloc(double *,m);
3077  int i;
3078  for(i=0;i<m;i++)
3079  model->sv_coef[i] = Malloc(double,l);
3080  model->SV = Malloc(svm_node*,l);
3081  svm_node *x_space = NULL;
3082  if(l>0) x_space = Malloc(svm_node,elements);
3083 
3084  int j=0;
3085  for(i=0;i<l;i++)
3086  {
3087  readline(fp);
3088  model->SV[i] = &x_space[j];
3089 
3090  p = strtok(line, " \t");
3091  model->sv_coef[0][i] = strtod(p,&endptr);
3092  for(int k=1;k<m;k++)
3093  {
3094  p = strtok(NULL, " \t");
3095  model->sv_coef[k][i] = strtod(p,&endptr);
3096  }
3097 
3098  while(1)
3099  {
3100  idx = strtok(NULL, ":");
3101  val = strtok(NULL, " \t");
3102 
3103  if(val == NULL)
3104  break;
3105  x_space[j].index = (int) strtol(idx,&endptr,10);
3106  x_space[j].value = strtod(val,&endptr);
3107 
3108  ++j;
3109  }
3110  x_space[j++].index = -1;
3111  }
3112  free(line);
3113 
3114  if (ferror(fp) != 0 || fclose(fp) != 0)
3115  return NULL;
3116 
3117  model->free_sv = 1; // XXX
3118  return model;
3119 }
3120 
3122 {
3123  if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3124  free((void *)(model_ptr->SV[0]));
3125  if(model_ptr->sv_coef)
3126  {
3127  for(int i=0;i<model_ptr->nr_class-1;i++)
3128  free(model_ptr->sv_coef[i]);
3129  }
3130 
3131  free(model_ptr->SV);
3132  model_ptr->SV = NULL;
3133 
3134  free(model_ptr->sv_coef);
3135  model_ptr->sv_coef = NULL;
3136 
3137  free(model_ptr->rho);
3138  model_ptr->rho = NULL;
3139 
3140  free(model_ptr->label);
3141  model_ptr->label= NULL;
3142 
3143  free(model_ptr->probA);
3144  model_ptr->probA = NULL;
3145 
3146  free(model_ptr->probB);
3147  model_ptr->probB= NULL;
3148 
3149  free(model_ptr->sv_indices);
3150  model_ptr->sv_indices = NULL;
3151 
3152  free(model_ptr->nSV);
3153  model_ptr->nSV = NULL;
3154 }
3155 
3157 {
3158  if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3159  {
3160  svm_free_model_content(*model_ptr_ptr);
3161  free(*model_ptr_ptr);
3162  *model_ptr_ptr = NULL;
3163  }
3164 }
3165 
3167 {
3168  free(param->weight_label);
3169  free(param->weight);
3170 }
3171 
3172 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3173 {
3174  // svm_type
3175 
3176  int svm_type = param->svm_type;
3177  if(svm_type != C_SVC &&
3178  svm_type != NU_SVC &&
3179  svm_type != ONE_CLASS &&
3180  svm_type != EPSILON_SVR &&
3181  svm_type != NU_SVR)
3182  return "unknown svm type";
3183 
3184  // kernel_type, degree
3185 
3186  int kernel_type = param->kernel_type;
3187  if(kernel_type != LINEAR &&
3188  kernel_type != POLY &&
3189  kernel_type != RBF &&
3190  kernel_type != SIGMOID &&
3191  kernel_type != PRECOMPUTED)
3192  return "unknown kernel type";
3193 
3194  if(param->gamma < 0)
3195  return "gamma < 0";
3196 
3197  if(param->degree < 0)
3198  return "degree of polynomial kernel < 0";
3199 
3200  // cache_size,eps,C,nu,p,shrinking
3201 
3202  if(param->cache_size <= 0)
3203  return "cache_size <= 0";
3204 
3205  if(param->eps <= 0)
3206  return "eps <= 0";
3207 
3208  if(svm_type == C_SVC ||
3209  svm_type == EPSILON_SVR ||
3210  svm_type == NU_SVR)
3211  if(param->C <= 0)
3212  return "C <= 0";
3213 
3214  if(svm_type == NU_SVC ||
3215  svm_type == ONE_CLASS ||
3216  svm_type == NU_SVR)
3217  if(param->nu <= 0 || param->nu > 1)
3218  return "nu <= 0 or nu > 1";
3219 
3220  if(svm_type == EPSILON_SVR)
3221  if(param->p < 0)
3222  return "p < 0";
3223 
3224  if(param->shrinking != 0 &&
3225  param->shrinking != 1)
3226  return "shrinking != 0 and shrinking != 1";
3227 
3228  if(param->probability != 0 &&
3229  param->probability != 1)
3230  return "probability != 0 and probability != 1";
3231 
3232  if(param->probability == 1 &&
3233  svm_type == ONE_CLASS)
3234  return "one-class SVM probability output not supported yet";
3235 
3236 
3237  // check whether nu-svc is feasible
3238 
3239  if(svm_type == NU_SVC)
3240  {
3241  int l = prob->l;
3242  int max_nr_class = 16;
3243  int nr_class = 0;
3244  int *label = Malloc(int,max_nr_class);
3245  double *count = Malloc(double,max_nr_class);
3246 
3247  int i;
3248  for(i=0;i<l;i++)
3249  {
3250  int this_label = (int)prob->y[i];
3251  int j;
3252  for(j=0;j<nr_class;j++)
3253  if(this_label == label[j])
3254  {
3255  count[j] += prob->W[i];
3256  break;
3257  }
3258  if(j == nr_class)
3259  {
3260  if(nr_class == max_nr_class)
3261  {
3262  max_nr_class *= 2;
3263  label = (int *)realloc(label,max_nr_class*sizeof(int));
3264  count = (double *)realloc(count,max_nr_class*sizeof(double));
3265  }
3266  label[nr_class] = this_label;
3267  count[nr_class] = prob->W[i];
3268  ++nr_class;
3269  }
3270  }
3271 
3272  for(i=0;i<nr_class;i++)
3273  {
3274  double n1 = count[i];
3275  for(int j=i+1;j<nr_class;j++)
3276  {
3277  double n2 = count[j];
3278  if(param->nu*(n1+n2)/2 > min(n1,n2))
3279  {
3280  free(label);
3281  free(count);
3282  return "specified nu is infeasible";
3283  }
3284  }
3285  }
3286  free(label);
3287  free(count);
3288  }
3289 
3290  return NULL;
3291 }
3292 
3294 {
3295  return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3296  model->probA!=NULL && model->probB!=NULL) ||
3297  ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3298  model->probA!=NULL);
3299 }
3300 
3301 void svm_set_print_string_function(void (*print_func)(const char *))
3302 {
3303  if(print_func == NULL)
3305  else
3306  svm_print_string = print_func;
3307 }
signed char schar
Definition: svm.cpp:65
int svm_save_model(const char *model_file_name, const svm_model *model)
Definition: svm.cpp:2794
svm_model * svm_train(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:2223
static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
Definition: svm.cpp:2119
static void solve_one_class(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1603
static char * line
Definition: svm.cpp:2884
int * nSV
Definition: svm.h:119
void svm_free_model_content(svm_model *model_ptr)
Definition: svm.cpp:3121
const char * svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:3172
double * W
Definition: svm.h:74
static double sigmoid_predict(double decision_value, double A, double B)
Definition: svm.cpp:1919
double value
Definition: svm.h:66
static void solve_epsilon_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1646
int svm_get_nr_sv(const svm_model *model)
Definition: svm.cpp:2637
int l
Definition: svm.h:108
int nr_weight
Definition: svm.h:92
Definition: svm.h:78
STL namespace.
static const char * kernel_type_table[]
Definition: svm.cpp:2789
Definition: svm.h:78
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
Definition: svm.cpp:2488
struct svm_node ** SV
Definition: svm.h:109
double * probB
Definition: svm.h:113
int * weight_label
Definition: svm.h:93
static void sigmoid_train(int l, const double *dec_values, const double *labels, double &A, double &B)
Definition: svm.cpp:1806
double svm_get_svr_probability(const svm_model *model)
Definition: svm.cpp:2642
Definition: svm.h:77
static int max_line_len
Definition: svm.cpp:2885
double * probA
Definition: svm.h:112
int nr_class
Definition: svm.h:107
double svm_predict(const svm_model *model, const svm_node *x)
Definition: svm.cpp:2730
Definition: svm.h:104
static void info(const char *fmt,...)
Definition: svm.cpp:100
static void print_string_stdout(const char *s)
Definition: svm.cpp:93
#define LIBSVM_VERSION
Definition: svm.h:55
double p
Definition: svm.h:96
float Qfloat
Definition: svm.cpp:64
double cache_size
Definition: svm.h:89
int svm_get_nr_class(const svm_model *model)
Definition: svm.cpp:2618
int svm_check_probability_model(const svm_model *model)
Definition: svm.cpp:3293
static void solve_nu_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1686
Convenience class to temporarily change the current locale.
#define INF
Definition: svm.cpp:89
static void clone(T *&dst, S *src, int n)
Definition: svm.cpp:73
double eps
Definition: svm.h:90
static void(* svm_print_string)(const char *)
Definition: svm.cpp:98
static void svm_binary_svc_probability(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn, double &probA, double &probB)
Definition: svm.cpp:1994
static char * readline(FILE *input)
Definition: svm.cpp:2887
bool read_model_header(FILE *fp, svm_model *model)
Definition: svm.cpp:2913
static double powi(double base, int times)
Definition: svm.cpp:78
int shrinking
Definition: svm.h:97
struct svm_node ** x
Definition: svm.h:73
int * label
Definition: svm.h:118
void svm_free_and_destroy_model(svm_model **model_ptr_ptr)
Definition: svm.cpp:3156
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
Definition: svm.cpp:2654
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
Definition: svm.cpp:2745
struct svm_parameter param
Definition: svm.h:106
static T max(T x, T y)
Definition: svm.cpp:70
int * sv_indices
Definition: svm.h:114
static double svm_svr_probability(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:2085
double * rho
Definition: svm.h:111
#define Malloc(type, n)
Definition: svm.cpp:91
void svm_get_sv_indices(const svm_model *model, int *indices)
Definition: svm.cpp:2630
int index
Definition: svm.h:65
#define FSCANF(_stream, _format, _var)
Definition: svm.cpp:2912
static T min(T x, T y)
Definition: svm.cpp:67
static bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4)
Definition: jsoncpp.cpp:244
double ** sv_coef
Definition: svm.h:110
static decision_function svm_train_one(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn)
Definition: svm.cpp:1741
static void multiclass_probability(int k, double **r, double *p)
Definition: svm.cpp:1930
static void swap(T &x, T &y)
Definition: svm.cpp:72
int svm_get_svm_type(const svm_model *model)
Definition: svm.cpp:2613
int probability
Definition: svm.h:98
int degree
Definition: svm.h:84
int free_sv
Definition: svm.h:122
#define TAU
Definition: svm.cpp:90
int libsvm_version
Definition: svm.cpp:63
static void solve_nu_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Definition: svm.cpp:1540
MITKCORE_EXPORT const ScalarType eps
Definition: svm.h:63
static const char * svm_type_table[]
Definition: svm.cpp:2784
Definition: svm.h:78
double * y
Definition: svm.h:72
double gamma
Definition: svm.h:85
int l
Definition: svm.h:71
double * weight
Definition: svm.h:94
Definition: svm.h:77
double C
Definition: svm.h:91
Definition: svm.h:77
int svm_type
Definition: svm.h:82
double nu
Definition: svm.h:95
static void solve_c_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn)
Definition: svm.cpp:1493
svm_model * svm_load_model(const char *model_file_name)
Definition: svm.cpp:3022
Definition: svm.h:77
double coef0
Definition: svm.h:86
void svm_destroy_param(svm_parameter *param)
Definition: svm.cpp:3166
int kernel_type
Definition: svm.h:83
void svm_get_labels(const svm_model *model, int *label)
Definition: svm.cpp:2623
Definition: svm.h:78
void svm_set_print_string_function(void(*print_func)(const char *))
Definition: svm.cpp:3301
static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
Definition: svm.cpp:2197