67 template <
class T>
static inline T
min(T x,T y) {
return (x<y)?x:y; }
70 template <
class T>
static inline T
max(T x,T y) {
return (x>y)?x:y; }
72 template <
class T>
static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
73 template <
class S,
class T>
static inline void clone(T*& dst, S* src,
int n)
76 memcpy((
void *)dst,(
void *)src,
sizeof(T)*n);
78 static inline double powi(
double base,
int times)
80 double tmp = base, ret = 1.0;
82 for(
int t=times; t>0; t/=2)
91 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
100 static void info(
const char *fmt,...)
105 vsprintf(buf,fmt,ap);
107 (*svm_print_string)(buf);
110 static void info(
const char *fmt,...) {}
122 Cache(
int l,
long int size);
128 int get_data(
const int index,
Qfloat **data,
int len);
129 void swap_index(
int i,
int j);
142 void lru_delete(head_t *h);
143 void lru_insert(head_t *h);
146 Cache::Cache(
int l_,
long int size_):l(l_),size(size_)
148 head = (head_t *)calloc(l,
sizeof(head_t));
150 size -= l *
sizeof(head_t) /
sizeof(
Qfloat);
151 size =
max(size, 2 * (
long int) l);
152 lru_head.next = lru_head.prev = &lru_head;
157 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
162 void Cache::lru_delete(head_t *h)
165 h->prev->next = h->next;
166 h->next->prev = h->prev;
169 void Cache::lru_insert(head_t *h)
173 h->prev = lru_head.prev;
178 int Cache::get_data(
const int index,
Qfloat **data,
int len)
180 head_t *h = &head[index];
181 if(h->len) lru_delete(h);
182 int more = len - h->len;
189 head_t *old = lru_head.next;
198 h->data = (
Qfloat *)realloc(h->data,
sizeof(
Qfloat)*len);
208 void Cache::swap_index(
int i,
int j)
212 if(head[i].len) lru_delete(&head[i]);
213 if(head[j].len) lru_delete(&head[j]);
214 swap(head[i].data,head[j].data);
215 swap(head[i].len,head[j].len);
216 if(head[i].len) lru_insert(&head[i]);
217 if(head[j].len) lru_insert(&head[j]);
220 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
225 swap(h->data[i],h->data[j]);
248 virtual Qfloat *get_Q(
int column,
int len)
const = 0;
249 virtual double *get_QD()
const = 0;
250 virtual void swap_index(
int i,
int j)
const = 0;
251 virtual ~QMatrix() {}
254 class Kernel:
public QMatrix {
261 virtual Qfloat *get_Q(
int column,
int len)
const = 0;
262 virtual double *get_QD()
const = 0;
263 virtual void swap_index(
int i,
int j)
const
266 if(x_square)
swap(x_square[i],x_square[j]);
270 double (Kernel::*kernel_function)(
int i,
int j)
const;
277 const int kernel_type;
283 double kernel_linear(
int i,
int j)
const
285 return dot(x[i],x[j]);
287 double kernel_poly(
int i,
int j)
const
289 return powi(gamma*dot(x[i],x[j])+coef0,degree);
291 double kernel_rbf(
int i,
int j)
const
293 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
295 double kernel_sigmoid(
int i,
int j)
const
297 return tanh(gamma*dot(x[i],x[j])+coef0);
299 double kernel_precomputed(
int i,
int j)
const
301 return x[i][(int)(x[j][0].value)].
value;
306 :kernel_type(param.kernel_type), degree(param.degree),
307 gamma(param.gamma), coef0(param.coef0)
312 kernel_function = &Kernel::kernel_linear;
315 kernel_function = &Kernel::kernel_poly;
318 kernel_function = &Kernel::kernel_rbf;
321 kernel_function = &Kernel::kernel_sigmoid;
324 kernel_function = &Kernel::kernel_precomputed;
330 if(kernel_type ==
RBF)
332 x_square =
new double[l];
334 x_square[i] = dot(x[i],x[i]);
404 while(x->
index != -1)
410 while(y->
index != -1)
416 return exp(-param.
gamma*sum);
419 return tanh(param.
gamma*dot(x,y)+param.
coef0);
448 virtual ~Solver() {};
450 struct SolutionInfo {
457 void Solve(
int l,
const QMatrix& Q,
const double *p_,
const schar *y_,
458 double *alpha_,
const double* C_,
double eps,
459 SolutionInfo* si,
int shrinking);
464 enum { LOWER_BOUND, UPPER_BOUND, FREE };
482 void update_alpha_status(
int i)
484 if(alpha[i] >= get_C(i))
485 alpha_status[i] = UPPER_BOUND;
486 else if(alpha[i] <= 0)
487 alpha_status[i] = LOWER_BOUND;
488 else alpha_status[i] = FREE;
490 bool is_upper_bound(
int i) {
return alpha_status[i] == UPPER_BOUND; }
491 bool is_lower_bound(
int i) {
return alpha_status[i] == LOWER_BOUND; }
492 bool is_free(
int i) {
return alpha_status[i] == FREE; }
493 void swap_index(
int i,
int j);
494 void reconstruct_gradient();
495 virtual int select_working_set(
int &i,
int &j);
496 virtual double calculate_rho();
497 virtual void do_shrinking();
499 bool be_shrunk(
int i,
double Gmax1,
double Gmax2);
502 void Solver::swap_index(
int i,
int j)
507 swap(alpha_status[i],alpha_status[j]);
508 swap(alpha[i],alpha[j]);
510 swap(active_set[i],active_set[j]);
511 swap(G_bar[i],G_bar[j]);
515 void Solver::reconstruct_gradient()
519 if(active_size == l)
return;
524 for(j=active_size;j<l;j++)
525 G[j] = G_bar[j] + p[j];
527 for(j=0;j<active_size;j++)
531 if(2*nr_free < active_size)
532 info(
"\nWARNING: using -h 0 may be faster\n");
534 if (nr_free*l > 2*active_size*(l-active_size))
536 for(i=active_size;i<l;i++)
538 const Qfloat *Q_i = Q->get_Q(i,active_size);
539 for(j=0;j<active_size;j++)
541 G[i] += alpha[j] * Q_i[j];
546 for(i=0;i<active_size;i++)
549 const Qfloat *Q_i = Q->get_Q(i,l);
550 double alpha_i = alpha[i];
551 for(j=active_size;j<l;j++)
552 G[j] += alpha_i * Q_i[j];
557 void Solver::Solve(
int l,
const QMatrix& Q,
const double *p_,
const schar *y_,
558 double *alpha_,
const double* C_,
double eps,
559 SolutionInfo* si,
int shrinking)
566 clone(alpha,alpha_,l);
573 alpha_status =
new char[l];
575 update_alpha_status(i);
580 active_set =
new int[l];
589 G_bar =
new double[l];
597 if(!is_lower_bound(i))
599 const Qfloat *Q_i = Q.get_Q(i,l);
600 double alpha_i = alpha[i];
603 G[j] += alpha_i*Q_i[j];
604 if(is_upper_bound(i))
606 G_bar[j] += get_C(i) * Q_i[j];
613 int max_iter =
max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
614 int counter =
min(l,1000)+1;
616 while(iter < max_iter)
622 counter =
min(l,1000);
623 if(shrinking) do_shrinking();
628 if(select_working_set(i,j)!=0)
631 reconstruct_gradient();
635 if(select_working_set(i,j)!=0)
645 const Qfloat *Q_i = Q.get_Q(i,active_size);
646 const Qfloat *Q_j = Q.get_Q(j,active_size);
648 double C_i = get_C(i);
649 double C_j = get_C(j);
651 double old_alpha_i = alpha[i];
652 double old_alpha_j = alpha[j];
656 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
659 double delta = (-G[i]-G[j])/quad_coef;
660 double diff = alpha[i] - alpha[j];
685 alpha[j] = C_i - diff;
693 alpha[i] = C_j + diff;
699 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
702 double delta = (G[i]-G[j])/quad_coef;
703 double sum = alpha[i] + alpha[j];
712 alpha[j] = sum - C_i;
728 alpha[i] = sum - C_j;
743 double delta_alpha_i = alpha[i] - old_alpha_i;
744 double delta_alpha_j = alpha[j] - old_alpha_j;
746 for(
int k=0;k<active_size;k++)
748 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
754 bool ui = is_upper_bound(i);
755 bool uj = is_upper_bound(j);
756 update_alpha_status(i);
757 update_alpha_status(j);
759 if(ui != is_upper_bound(i))
764 G_bar[k] -= C_i * Q_i[k];
767 G_bar[k] += C_i * Q_i[k];
770 if(uj != is_upper_bound(j))
775 G_bar[k] -= C_j * Q_j[k];
778 G_bar[k] += C_j * Q_j[k];
788 reconstruct_gradient();
792 fprintf(stderr,
"\nWARNING: reaching max number of iterations\n");
797 si->rho = calculate_rho();
804 v += alpha[i] * (G[i] + p[i]);
812 alpha_[active_set[i]] = alpha[i];
824 si->upper_bound[i] = C[i];
826 info(
"\noptimization finished, #iter = %d\n",iter);
832 delete[] alpha_status;
839 int Solver::select_working_set(
int &out_i,
int &out_j)
851 double obj_diff_min =
INF;
853 for(
int t=0;t<active_size;t++)
856 if(!is_upper_bound(t))
865 if(!is_lower_bound(t))
876 Q_i = Q->get_Q(i,active_size);
878 for(
int j=0;j<active_size;j++)
882 if (!is_lower_bound(j))
884 double grad_diff=Gmax+G[j];
890 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
892 obj_diff = -(grad_diff*grad_diff)/quad_coef;
894 obj_diff = -(grad_diff*grad_diff)/
TAU;
896 if (obj_diff <= obj_diff_min)
899 obj_diff_min = obj_diff;
906 if (!is_upper_bound(j))
908 double grad_diff= Gmax-G[j];
914 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
916 obj_diff = -(grad_diff*grad_diff)/quad_coef;
918 obj_diff = -(grad_diff*grad_diff)/
TAU;
920 if (obj_diff <= obj_diff_min)
923 obj_diff_min = obj_diff;
938 bool Solver::be_shrunk(
int i,
double Gmax1,
double Gmax2)
940 if(is_upper_bound(i))
943 return(-G[i] > Gmax1);
945 return(-G[i] > Gmax2);
947 else if(is_lower_bound(i))
950 return(G[i] > Gmax2);
952 return(G[i] > Gmax1);
958 void Solver::do_shrinking()
965 for(i=0;i<active_size;i++)
969 if(!is_upper_bound(i))
974 if(!is_lower_bound(i))
982 if(!is_upper_bound(i))
987 if(!is_lower_bound(i))
995 if(unshrink ==
false && Gmax1 + Gmax2 <= eps*10)
998 reconstruct_gradient();
1003 for(i=0;i<active_size;i++)
1004 if (be_shrunk(i, Gmax1, Gmax2))
1007 while (active_size > i)
1009 if (!be_shrunk(active_size, Gmax1, Gmax2))
1011 swap_index(i,active_size);
1019 double Solver::calculate_rho()
1023 double ub =
INF, lb = -
INF, sum_free = 0;
1024 for(
int i=0;i<active_size;i++)
1026 double yG = y[i]*G[i];
1028 if(is_upper_bound(i))
1035 else if(is_lower_bound(i))
1050 r = sum_free/nr_free;
1062 class Solver_NU:
public Solver
1066 void Solve(
int l,
const QMatrix& Q,
const double *p,
const schar *y,
1067 double *alpha,
double* C_,
double eps,
1068 SolutionInfo* si,
int shrinking)
1071 Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking);
1075 int select_working_set(
int &i,
int &j);
1076 double calculate_rho();
1077 bool be_shrunk(
int i,
double Gmax1,
double Gmax2,
double Gmax3,
double Gmax4);
1078 void do_shrinking();
1082 int Solver_NU::select_working_set(
int &out_i,
int &out_j)
1090 double Gmaxp = -
INF;
1091 double Gmaxp2 = -
INF;
1094 double Gmaxn = -
INF;
1095 double Gmaxn2 = -
INF;
1099 double obj_diff_min =
INF;
1101 for(
int t=0;t<active_size;t++)
1104 if(!is_upper_bound(t))
1113 if(!is_lower_bound(t))
1123 const Qfloat *Q_ip = NULL;
1124 const Qfloat *Q_in = NULL;
1126 Q_ip = Q->get_Q(ip,active_size);
1128 Q_in = Q->get_Q(in,active_size);
1130 for(
int j=0;j<active_size;j++)
1134 if (!is_lower_bound(j))
1136 double grad_diff=Gmaxp+G[j];
1142 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1144 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1146 obj_diff = -(grad_diff*grad_diff)/
TAU;
1148 if (obj_diff <= obj_diff_min)
1151 obj_diff_min = obj_diff;
1158 if (!is_upper_bound(j))
1160 double grad_diff=Gmaxn-G[j];
1161 if (-G[j] >= Gmaxn2)
1166 double quad_coef = QD[
in]+QD[j]-2*Q_in[j];
1168 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1170 obj_diff = -(grad_diff*grad_diff)/
TAU;
1172 if (obj_diff <= obj_diff_min)
1175 obj_diff_min = obj_diff;
1182 if(
max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) <
eps)
1185 if (y[Gmin_idx] == +1)
1194 bool Solver_NU::be_shrunk(
int i,
double Gmax1,
double Gmax2,
double Gmax3,
double Gmax4)
1196 if(is_upper_bound(i))
1199 return(-G[i] > Gmax1);
1201 return(-G[i] > Gmax4);
1203 else if(is_lower_bound(i))
1206 return(G[i] > Gmax2);
1208 return(G[i] > Gmax3);
1214 void Solver_NU::do_shrinking()
1216 double Gmax1 = -
INF;
1217 double Gmax2 = -
INF;
1218 double Gmax3 = -
INF;
1219 double Gmax4 = -
INF;
1223 for(i=0;i<active_size;i++)
1225 if(!is_upper_bound(i))
1229 if(-G[i] > Gmax1) Gmax1 = -G[i];
1231 else if(-G[i] > Gmax4) Gmax4 = -G[i];
1233 if(!is_lower_bound(i))
1237 if(G[i] > Gmax2) Gmax2 = G[i];
1239 else if(G[i] > Gmax3) Gmax3 = G[i];
1243 if(unshrink ==
false &&
max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1246 reconstruct_gradient();
1250 for(i=0;i<active_size;i++)
1251 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1254 while (active_size > i)
1256 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1258 swap_index(i,active_size);
1266 double Solver_NU::calculate_rho()
1268 int nr_free1 = 0,nr_free2 = 0;
1269 double ub1 =
INF, ub2 =
INF;
1270 double lb1 = -
INF, lb2 = -
INF;
1271 double sum_free1 = 0, sum_free2 = 0;
1273 for(
int i=0;i<active_size;i++)
1277 if(is_upper_bound(i))
1278 lb1 =
max(lb1,G[i]);
1279 else if(is_lower_bound(i))
1280 ub1 =
min(ub1,G[i]);
1289 if(is_upper_bound(i))
1290 lb2 =
max(lb2,G[i]);
1291 else if(is_lower_bound(i))
1292 ub2 =
min(ub2,G[i]);
1303 r1 = sum_free1/nr_free1;
1308 r2 = sum_free2/nr_free2;
1319 class SVC_Q:
public Kernel
1323 :Kernel(prob.l, prob.x, param)
1326 cache =
new Cache(prob.
l,(
long int)(param.
cache_size*(1<<20)));
1327 QD =
new double[prob.
l];
1328 for(
int i=0;i<prob.
l;i++)
1329 QD[i] = (this->*kernel_function)(i,i);
1332 Qfloat *get_Q(
int i,
int len)
const
1336 if((start = cache->get_data(i,&data,len)) < len)
1338 for(j=start;j<len;j++)
1339 data[j] = (
Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1344 double *get_QD()
const
1349 void swap_index(
int i,
int j)
const
1351 cache->swap_index(i,j);
1352 Kernel::swap_index(i,j);
1369 class ONE_CLASS_Q:
public Kernel
1373 :Kernel(prob.l, prob.x, param)
1375 cache =
new Cache(prob.
l,(
long int)(param.
cache_size*(1<<20)));
1376 QD =
new double[prob.
l];
1377 for(
int i=0;i<prob.
l;i++)
1378 QD[i] = (this->*kernel_function)(i,i);
1381 Qfloat *get_Q(
int i,
int len)
const
1385 if((start = cache->get_data(i,&data,len)) < len)
1387 for(j=start;j<len;j++)
1388 data[j] = (
Qfloat)(this->*kernel_function)(i,j);
1393 double *get_QD()
const
1398 void swap_index(
int i,
int j)
const
1400 cache->swap_index(i,j);
1401 Kernel::swap_index(i,j);
1415 class SVR_Q:
public Kernel
1419 :Kernel(prob.l, prob.x, param)
1422 cache =
new Cache(l,(
long int)(param.
cache_size*(1<<20)));
1423 QD =
new double[2*l];
1424 sign =
new schar[2*l];
1425 index =
new int[2*l];
1426 for(
int k=0;k<l;k++)
1432 QD[k] = (this->*kernel_function)(k,k);
1435 buffer[0] =
new Qfloat[2*l];
1436 buffer[1] =
new Qfloat[2*l];
1440 void swap_index(
int i,
int j)
const
1442 swap(sign[i],sign[j]);
1443 swap(index[i],index[j]);
1447 Qfloat *get_Q(
int i,
int len)
const
1450 int j, real_i = index[i];
1451 if(cache->get_data(real_i,&data,l) < l)
1454 data[j] = (
Qfloat)(this->*kernel_function)(real_i,j);
1458 Qfloat *buf = buffer[next_buffer];
1459 next_buffer = 1 - next_buffer;
1462 buf[j] = (
Qfloat) si * (
Qfloat) sign[j] * data[index[j]];
1466 double *get_QD()
const
1485 mutable int next_buffer;
1495 double *alpha, Solver::SolutionInfo* si,
double Cp,
double Cn)
1498 double *minus_ones =
new double[l];
1500 double *C =
new double[l];
1511 C[i] = prob->
W[i]*Cp;
1516 C[i] = prob->
W[i]*Cn;
1521 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1536 delete[] minus_ones;
1542 double *alpha, Solver::SolutionInfo* si)
1546 double nu = param->
nu;
1549 double *C =
new double[l];
1561 for(i=0;i<l;i++) nu_l += nu*C[i];
1562 double sum_pos = nu_l/2;
1563 double sum_neg = nu_l/2;
1568 alpha[i] =
min(C[i],sum_pos);
1569 sum_pos -= alpha[i];
1573 alpha[i] =
min(C[i],sum_neg);
1574 sum_neg -= alpha[i];
1577 double *zeros =
new double[l];
1583 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1587 info(
"C = %f\n",1/r);
1592 si->upper_bound[i] /= r;
1605 double *alpha, Solver::SolutionInfo* si)
1608 double *zeros =
new double[l];
1610 double *C =
new double[l];
1618 nu_l += C[i] * param->
nu;
1624 alpha[i] =
min(C[i],nu_l);
1638 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1648 double *alpha, Solver::SolutionInfo* si)
1651 double *alpha2 =
new double[2*l];
1652 double *linear_term =
new double[2*l];
1653 double *C =
new double[2*l];
1660 linear_term[i] = param->
p - prob->
y[i];
1662 C[i] = prob->
W[i]*param->
C;
1665 linear_term[i+l] = param->
p + prob->
y[i];
1667 C[i+l] = prob->
W[i]*param->
C;
1671 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1673 double sum_alpha = 0;
1676 alpha[i] = alpha2[i] - alpha2[i+l];
1677 sum_alpha += fabs(alpha[i]);
1681 delete[] linear_term;
1688 double *alpha, Solver::SolutionInfo* si)
1691 double *C =
new double[2*l];
1692 double *alpha2 =
new double[2*l];
1693 double *linear_term =
new double[2*l];
1700 C[i] = C[i+l] = prob->
W[i]*param->
C;
1701 sum += C[i] * param->
nu;
1707 alpha2[i] = alpha2[i+l] =
min(sum,C[i]);
1710 linear_term[i] = - prob->
y[i];
1713 linear_term[i+l] = prob->
y[i];
1718 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1721 info(
"epsilon = %f\n",-si->r);
1724 alpha[i] = alpha2[i] - alpha2[i+l];
1727 delete[] linear_term;
1735 struct decision_function
1743 double Cp,
double Cn)
1745 double *alpha =
Malloc(
double,prob->
l);
1746 Solver::SolutionInfo si;
1750 si.upper_bound =
Malloc(
double,prob->
l);
1754 si.upper_bound =
Malloc(
double,prob->
l);
1758 si.upper_bound =
Malloc(
double,prob->
l);
1762 si.upper_bound =
Malloc(
double,2*prob->
l);
1766 si.upper_bound =
Malloc(
double,2*prob->
l);
1771 info(
"obj = %f, rho = %f\n",si.obj,si.rho);
1777 for(
int i=0;i<prob->
l;i++)
1779 if(fabs(alpha[i]) > 0)
1784 if(fabs(alpha[i]) >= si.upper_bound[i])
1789 if(fabs(alpha[i]) >= si.upper_bound[i])
1795 free(si.upper_bound);
1797 info(
"nSV = %d, nBSV = %d\n",nSV,nBSV);
1799 decision_function f;
1807 int l,
const double *dec_values,
const double *labels,
1808 double& A,
double& B)
1810 double prior1=0, prior0 = 0;
1814 if (labels[i] > 0) prior1+=1;
1818 double min_step=1e-10;
1821 double hiTarget=(prior1+1.0)/(prior1+2.0);
1822 double loTarget=1/(prior0+2.0);
1823 double *t=
Malloc(
double,l);
1824 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1825 double newA,newB,newf,d1,d2;
1829 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1834 if (labels[i]>0) t[i]=hiTarget;
1836 fApB = dec_values[i]*A+B;
1838 fval += t[i]*fApB + log(1+exp(-fApB));
1840 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1842 for (iter=0;iter<max_iter;iter++)
1847 h21=0.0;g1=0.0;g2=0.0;
1850 fApB = dec_values[i]*A+B;
1853 p=exp(-fApB)/(1.0+exp(-fApB));
1854 q=1.0/(1.0+exp(-fApB));
1858 p=1.0/(1.0+exp(fApB));
1859 q=exp(fApB)/(1.0+exp(fApB));
1862 h11+=dec_values[i]*dec_values[i]*d2;
1864 h21+=dec_values[i]*d2;
1866 g1+=dec_values[i]*d1;
1871 if (fabs(g1)<eps && fabs(g2)<
eps)
1875 det=h11*h22-h21*h21;
1876 dA=-(h22*g1 - h21 * g2) / det;
1877 dB=-(-h21*g1+ h11 * g2) / det;
1882 while (stepsize >= min_step)
1884 newA = A + stepsize * dA;
1885 newB = B + stepsize * dB;
1891 fApB = dec_values[i]*newA+newB;
1893 newf += t[i]*fApB + log(1+exp(-fApB));
1895 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1898 if (newf<fval+0.0001*stepsize*gd)
1900 A=newA;B=newB;fval=newf;
1904 stepsize = stepsize / 2.0;
1907 if (stepsize < min_step)
1909 info(
"Line search fails in two-class probability estimates\n");
1915 info(
"Reaching maximal iterations in two-class probability estimates\n");
1921 double fApB = decision_value*A+B;
1924 return exp(-fApB)/(1.0+exp(-fApB));
1926 return 1.0/(1+exp(fApB)) ;
1933 int iter = 0, max_iter=
max(100,k);
1934 double **Q=
Malloc(
double *,k);
1935 double *Qp=
Malloc(
double,k);
1936 double pQp, eps=0.005/k;
1945 Q[t][t]+=r[j][t]*r[j][t];
1950 Q[t][t]+=r[j][t]*r[j][t];
1951 Q[t][j]=-r[j][t]*r[t][j];
1954 for (iter=0;iter<max_iter;iter++)
1962 Qp[t]+=Q[t][j]*p[j];
1968 double error=fabs(Qp[t]-pQp);
1969 if (error>max_error)
1972 if (max_error<eps)
break;
1976 double diff=(-Qp[t]+pQp)/Q[t][t];
1978 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1981 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1987 info(
"Exceeds max_iter in multiclass_prob\n");
1988 for(t=0;t<k;t++) free(Q[t]);
1996 double Cp,
double Cn,
double& probA,
double& probB)
2000 int *perm =
Malloc(
int,prob->
l);
2001 double *dec_values =
Malloc(
double,prob->
l);
2004 for(i=0;i<prob->
l;i++) perm[i]=i;
2005 for(i=0;i<prob->
l;i++)
2007 int j = i+rand()%(prob->
l-i);
2008 swap(perm[i],perm[j]);
2010 for(i=0;i<nr_fold;i++)
2012 int begin = i*prob->
l/nr_fold;
2013 int end = (i+1)*prob->
l/nr_fold;
2017 subprob.
l = prob->
l-(end-begin);
2019 subprob.y =
Malloc(
double,subprob.l);
2020 subprob.W =
Malloc(
double,subprob.l);
2023 for(j=0;j<begin;j++)
2025 subprob.x[k] = prob->
x[perm[j]];
2026 subprob.y[k] = prob->
y[perm[j]];
2027 subprob.W[k] = prob->
W[perm[j]];
2030 for(j=end;j<prob->
l;j++)
2032 subprob.x[k] = prob->
x[perm[j]];
2033 subprob.y[k] = prob->
y[perm[j]];
2034 subprob.W[k] = prob->
W[perm[j]];
2037 int p_count=0,n_count=0;
2044 if(p_count==0 && n_count==0)
2045 for(j=begin;j<end;j++)
2046 dec_values[perm[j]] = 0;
2047 else if(p_count > 0 && n_count == 0)
2048 for(j=begin;j<end;j++)
2049 dec_values[perm[j]] = 1;
2050 else if(p_count == 0 && n_count > 0)
2051 for(j=begin;j<end;j++)
2052 dec_values[perm[j]] = -1;
2066 for(j=begin;j<end;j++)
2070 dec_values[perm[j]] *= submodel->
label[0];
2090 double *ymv =
Malloc(
double,prob->
l);
2096 for(i=0;i<prob->
l;i++)
2098 ymv[i]=prob->
y[i]-ymv[i];
2099 mae += fabs(ymv[i]);
2102 double std=sqrt(2*mae*mae);
2105 for(i=0;i<prob->
l;i++)
2106 if (fabs(ymv[i]) > 5*std)
2110 mae /= (prob->
l-count);
2111 info(
"Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2122 int max_nr_class = 16;
2125 int *count =
Malloc(
int,max_nr_class);
2126 int *data_label =
Malloc(
int,l);
2131 int this_label = (int)prob->
y[i];
2133 for(j=0;j<nr_class;j++)
2135 if(this_label == label[j])
2144 if(nr_class == max_nr_class)
2147 label = (
int *)realloc(label,max_nr_class*
sizeof(
int));
2148 count = (
int *)realloc(count,max_nr_class*
sizeof(
int));
2161 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2163 swap(label[0],label[1]);
2164 swap(count[0],count[1]);
2167 if(data_label[i] == 0)
2174 int *start =
Malloc(
int,nr_class);
2177 start[i] = start[i-1]+count[i-1];
2180 perm[start[data_label[i]]] = i;
2181 ++start[data_label[i]];
2185 start[i] = start[i-1]+count[i-1];
2201 for(i=0;i<prob->
l;i++)
2202 if(prob->
W[i] > 0) l++;
2206 newprob->
y =
Malloc(
double,l);
2207 newprob->
W =
Malloc(
double,l);
2210 for(i=0;i<prob->
l;i++)
2213 newprob->
x[j] = prob->
x[i];
2214 newprob->
y[j] = prob->
y[i];
2215 newprob->
W[j] = prob->
W[i];
2239 model->
label = NULL;
2254 model->
rho[0] = f.rho;
2258 for(i=0;i<prob->
l;i++)
2259 if(fabs(f.alpha[i]) > 0) ++
nSV;
2265 for(i=0;i<prob->
l;i++)
2266 if(fabs(f.alpha[i]) > 0)
2268 model->
SV[j] = prob->
x[i];
2269 model->
sv_coef[0][j] = f.alpha[i];
2284 int *perm =
Malloc(
int,l);
2289 info(
"WARNING: training data in only one class. See README for details.\n");
2298 x[i] = prob->
x[perm[i]];
2299 W[i] = prob->
W[perm[i]];
2304 double *weighted_C =
Malloc(
double, nr_class);
2306 weighted_C[i] = param->
C;
2307 for(i=0;i<param->nr_weight;i++)
2314 fprintf(stderr,
"WARNING: class label %d specified in weight is not found\n", param->
weight_label[i]);
2316 weighted_C[j] *= param->
weight[i];
2321 bool *nonzero =
Malloc(
bool,l);
2324 decision_function *f =
Malloc(decision_function,nr_class*(nr_class-1)/2);
2329 probA=
Malloc(
double,nr_class*(nr_class-1)/2);
2330 probB=
Malloc(
double,nr_class*(nr_class-1)/2);
2338 int si = start[i], sj = start[j];
2339 int ci = count[i], cj = count[j];
2342 sub_prob.
y =
Malloc(
double,sub_prob.
l);
2343 sub_prob.
W =
Malloc(
double,sub_prob.
l);
2347 sub_prob.
x[k] = x[si+k];
2349 sub_prob.
W[k] = W[si+k];
2353 sub_prob.
x[ci+k] = x[sj+k];
2354 sub_prob.
y[ci+k] = -1;
2355 sub_prob.
W[ci+k] = W[sj+k];
2361 f[p] =
svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2363 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2364 nonzero[si+k] =
true;
2366 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2367 nonzero[sj+k] =
true;
2380 model->
label[i] = label[i];
2382 model->
rho =
Malloc(
double,nr_class*(nr_class-1)/2);
2383 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2384 model->
rho[i] = f[i].rho;
2388 model->
probA =
Malloc(
double,nr_class*(nr_class-1)/2);
2389 model->
probB =
Malloc(
double,nr_class*(nr_class-1)/2);
2390 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2392 model->
probA[i] = probA[i];
2393 model->
probB[i] = probB[i];
2403 int *nz_count =
Malloc(
int,nr_class);
2408 for(
int j=0;j<count[i];j++)
2409 if(nonzero[start[i]+j])
2418 info(
"Total nSV = %d\n",total_sv);
2420 model->
l = total_sv;
2427 model->
SV[p] = x[i];
2431 int *nz_start =
Malloc(
int,nr_class);
2434 nz_start[i] = nz_start[i-1]+nz_count[i-1];
2437 for(i=0;i<nr_class-1;i++)
2453 int q = nz_start[i];
2457 model->
sv_coef[j-1][q++] = f[p].alpha[k];
2461 model->
sv_coef[i][q++] = f[p].alpha[ci+k];
2475 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2493 int *perm =
Malloc(
int,l);
2498 fprintf(stderr,
"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2500 fold_start =
Malloc(
int,nr_fold+1);
2512 int *fold_count =
Malloc(
int,nr_fold);
2514 int *index =
Malloc(
int,l);
2518 for(i=0;i<count[c];i++)
2520 int j = i+rand()%(count[c]-i);
2521 swap(index[start[c]+j],index[start[c]+i]);
2523 for(i=0;i<nr_fold;i++)
2527 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2530 for (i=1;i<=nr_fold;i++)
2531 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2533 for(i=0;i<nr_fold;i++)
2535 int begin = start[c]+i*count[c]/nr_fold;
2536 int end = start[c]+(i+1)*count[c]/nr_fold;
2537 for(
int j=begin;j<end;j++)
2539 perm[fold_start[i]] = index[j];
2544 for (i=1;i<=nr_fold;i++)
2545 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2554 for(i=0;i<
l;i++) perm[i]=i;
2557 int j = i+rand()%(l-i);
2558 swap(perm[i],perm[j]);
2560 for(i=0;i<=nr_fold;i++)
2561 fold_start[i]=i*l/nr_fold;
2564 for(i=0;i<nr_fold;i++)
2566 int begin = fold_start[i];
2567 int end = fold_start[i+1];
2571 subprob.
l = l-(end-begin);
2573 subprob.
y =
Malloc(
double,subprob.
l);
2575 subprob.
W =
Malloc(
double,subprob.
l);
2577 for(j=0;j<begin;j++)
2579 subprob.
x[k] = prob->
x[perm[j]];
2580 subprob.
y[k] = prob->
y[perm[j]];
2581 subprob.
W[k] = prob->
W[perm[j]];
2586 subprob.
x[k] = prob->
x[perm[j]];
2587 subprob.
y[k] = prob->
y[perm[j]];
2588 subprob.
W[k] = prob->
W[perm[j]];
2596 for(j=begin;j<end;j++)
2598 free(prob_estimates);
2601 for(j=begin;j<end;j++)
2602 target[perm[j]] =
svm_predict(submodel,prob->
x[perm[j]]);
2625 if (model->
label != NULL)
2627 label[i] = model->
label[i];
2633 for(
int i=0;i<model->
l;i++)
2646 return model->
probA[0];
2649 fprintf(stderr,
"Model doesn't contain information for SVR probability inference\n");
2663 for(i=0;i<model->
l;i++)
2664 sum += sv_coef[i] * Kernel::k_function(x,model->
SV[i],model->
param);
2665 sum -= model->
rho[0];
2669 return (sum>0)?1:-1;
2678 double *kvalue =
Malloc(
double,l);
2680 kvalue[i] = Kernel::k_function(x,model->
SV[i],model->
param);
2682 int *start =
Malloc(
int,nr_class);
2685 start[i] = start[i-1]+model->
nSV[i-1];
2687 int *vote =
Malloc(
int,nr_class);
2698 int ci = model->
nSV[i];
2699 int cj = model->
nSV[j];
2702 double *coef1 = model->
sv_coef[j-1];
2703 double *coef2 = model->
sv_coef[i];
2705 sum += coef1[si+k] * kvalue[si+k];
2707 sum += coef2[sj+k] * kvalue[sj+k];
2708 sum -= model->
rho[p];
2709 dec_values[p] = sum;
2711 if(dec_values[p] > 0)
2718 int vote_max_idx = 0;
2720 if(vote[i] > vote[vote_max_idx])
2726 return model->
label[vote_max_idx];
2737 dec_values =
Malloc(
double, 1);
2739 dec_values =
Malloc(
double, nr_class*(nr_class-1)/2);
2753 double *dec_values =
Malloc(
double, nr_class*(nr_class-1)/2);
2756 double min_prob=1e-7;
2757 double **pairwise_prob=
Malloc(
double *,nr_class);
2759 pairwise_prob[i]=
Malloc(
double,nr_class);
2765 pairwise_prob[j][i]=1-pairwise_prob[i][j];
2770 int prob_max_idx = 0;
2772 if(prob_estimates[i] > prob_estimates[prob_max_idx])
2775 free(pairwise_prob[i]);
2777 free(pairwise_prob);
2778 return model->
label[prob_max_idx];
2786 "c_svc",
"nu_svc",
"one_class",
"epsilon_svr",
"nu_svr",NULL
2791 "linear",
"polynomial",
"rbf",
"sigmoid",
"precomputed",NULL
2796 FILE *fp = fopen(model_file_name,
"w");
2797 if(fp==NULL)
return -1;
2803 fprintf(fp,
"svm_type %s\n", svm_type_table[param.
svm_type]);
2804 fprintf(fp,
"kernel_type %s\n", kernel_type_table[param.
kernel_type]);
2807 fprintf(fp,
"degree %d\n", param.
degree);
2810 fprintf(fp,
"gamma %g\n", param.
gamma);
2813 fprintf(fp,
"coef0 %g\n", param.
coef0);
2817 fprintf(fp,
"nr_class %d\n", nr_class);
2818 fprintf(fp,
"total_sv %d\n",l);
2822 for(
int i=0;i<nr_class*(nr_class-1)/2;i++)
2823 fprintf(fp,
" %g",model->
rho[i]);
2829 fprintf(fp,
"label");
2831 fprintf(fp,
" %d",model->
label[i]);
2837 fprintf(fp,
"probA");
2838 for(
int i=0;i<nr_class*(nr_class-1)/2;i++)
2839 fprintf(fp,
" %g",model->
probA[i]);
2844 fprintf(fp,
"probB");
2845 for(
int i=0;i<nr_class*(nr_class-1)/2;i++)
2846 fprintf(fp,
" %g",model->
probB[i]);
2852 fprintf(fp,
"nr_sv");
2854 fprintf(fp,
" %d",model->
nSV[i]);
2858 fprintf(fp,
"SV\n");
2862 for(
int i=0;i<
l;i++)
2864 for(
int j=0;j<nr_class-1;j++)
2865 fprintf(fp,
"%.16g ",sv_coef[j][i]);
2870 fprintf(fp,
"0:%d ",(
int)(p->
value));
2872 while(p->
index != -1)
2880 if (ferror(fp) != 0 || fclose(fp) != 0)
return -1;
2891 if(fgets(line,max_line_len,input) == NULL)
2894 while(strrchr(line,
'\n') == NULL)
2897 line = (
char *) realloc(line,max_line_len);
2898 len = (int) strlen(line);
2899 if(fgets(line+len,max_line_len-len,input) == NULL)
2912 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
2921 if(strcmp(cmd,
"svm_type")==0)
2925 for(i=0;svm_type_table[i];i++)
2927 if(strcmp(svm_type_table[i],cmd)==0)
2933 if(svm_type_table[i] == NULL)
2935 fprintf(stderr,
"unknown svm type.\n");
2939 else if(strcmp(cmd,
"kernel_type")==0)
2943 for(i=0;kernel_type_table[i];i++)
2945 if(strcmp(kernel_type_table[i],cmd)==0)
2951 if(kernel_type_table[i] == NULL)
2953 fprintf(stderr,
"unknown kernel function.\n");
2957 else if(strcmp(cmd,
"degree")==0)
2959 else if(strcmp(cmd,
"gamma")==0)
2961 else if(strcmp(cmd,
"coef0")==0)
2963 else if(strcmp(cmd,
"nr_class")==0)
2965 else if(strcmp(cmd,
"total_sv")==0)
2967 else if(strcmp(cmd,
"rho")==0)
2971 for(
int i=0;i<n;i++)
2974 else if(strcmp(cmd,
"label")==0)
2978 for(
int i=0;i<n;i++)
2981 else if(strcmp(cmd,
"probA")==0)
2985 for(
int i=0;i<n;i++)
2988 else if(strcmp(cmd,
"probB")==0)
2992 for(
int i=0;i<n;i++)
2995 else if(strcmp(cmd,
"nr_sv")==0)
2999 for(
int i=0;i<n;i++)
3002 else if(strcmp(cmd,
"SV")==0)
3007 if(c==EOF || c==
'\n')
break;
3013 fprintf(stderr,
"unknown text in model file: [%s]\n",cmd);
3024 FILE *fp = fopen(model_file_name,
"rb");
3025 if(fp==NULL)
return NULL;
3033 model->
probA = NULL;
3034 model->
probB = NULL;
3036 model->
label = NULL;
3042 fprintf(stderr,
"ERROR: fscanf failed to read model\n");
3053 long pos = ftell(fp);
3055 max_line_len = 1024;
3056 line =
Malloc(
char,max_line_len);
3057 char *p,*endptr,*idx,*val;
3061 p = strtok(line,
":");
3064 p = strtok(NULL,
":");
3070 elements += model->
l;
3072 fseek(fp,pos,SEEK_SET);
3088 model->
SV[i] = &x_space[j];
3090 p = strtok(line,
" \t");
3091 model->
sv_coef[0][i] = strtod(p,&endptr);
3092 for(
int k=1;k<m;k++)
3094 p = strtok(NULL,
" \t");
3095 model->
sv_coef[k][i] = strtod(p,&endptr);
3100 idx = strtok(NULL,
":");
3101 val = strtok(NULL,
" \t");
3105 x_space[j].
index = (int) strtol(idx,&endptr,10);
3106 x_space[j].
value = strtod(val,&endptr);
3110 x_space[j++].
index = -1;
3114 if (ferror(fp) != 0 || fclose(fp) != 0)
3123 if(model_ptr->
free_sv && model_ptr->
l > 0 && model_ptr->
SV != NULL)
3124 free((
void *)(model_ptr->
SV[0]));
3127 for(
int i=0;i<model_ptr->
nr_class-1;i++)
3131 free(model_ptr->
SV);
3132 model_ptr->
SV = NULL;
3137 free(model_ptr->
rho);
3138 model_ptr->
rho = NULL;
3140 free(model_ptr->
label);
3141 model_ptr->
label= NULL;
3143 free(model_ptr->
probA);
3144 model_ptr->
probA = NULL;
3146 free(model_ptr->
probB);
3147 model_ptr->
probB= NULL;
3152 free(model_ptr->
nSV);
3153 model_ptr->
nSV = NULL;
3158 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3161 free(*model_ptr_ptr);
3162 *model_ptr_ptr = NULL;
3177 if(svm_type !=
C_SVC &&
3182 return "unknown svm type";
3187 if(kernel_type !=
LINEAR &&
3188 kernel_type !=
POLY &&
3189 kernel_type !=
RBF &&
3192 return "unknown kernel type";
3194 if(param->
gamma < 0)
3198 return "degree of polynomial kernel < 0";
3203 return "cache_size <= 0";
3208 if(svm_type ==
C_SVC ||
3217 if(param->
nu <= 0 || param->
nu > 1)
3218 return "nu <= 0 or nu > 1";
3226 return "shrinking != 0 and shrinking != 1";
3230 return "probability != 0 and probability != 1";
3234 return "one-class SVM probability output not supported yet";
3242 int max_nr_class = 16;
3245 double *count =
Malloc(
double,max_nr_class);
3250 int this_label = (int)prob->
y[i];
3252 for(j=0;j<nr_class;j++)
3253 if(this_label == label[j])
3255 count[j] += prob->
W[i];
3260 if(nr_class == max_nr_class)
3263 label = (
int *)realloc(label,max_nr_class*
sizeof(
int));
3264 count = (
double *)realloc(count,max_nr_class*
sizeof(
double));
3274 double n1 = count[i];
3277 double n2 = count[j];
3278 if(param->
nu*(n1+n2)/2 >
min(n1,n2))
3282 return "specified nu is infeasible";
3298 model->
probA!=NULL);
3303 if(print_func == NULL)
int svm_save_model(const char *model_file_name, const svm_model *model)
svm_model * svm_train(const svm_problem *prob, const svm_parameter *param)
static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
static void solve_one_class(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
void svm_free_model_content(svm_model *model_ptr)
const char * svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
static double sigmoid_predict(double decision_value, double A, double B)
static void solve_epsilon_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
int svm_get_nr_sv(const svm_model *model)
static const char * kernel_type_table[]
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
static void sigmoid_train(int l, const double *dec_values, const double *labels, double &A, double &B)
double svm_get_svr_probability(const svm_model *model)
double svm_predict(const svm_model *model, const svm_node *x)
static void info(const char *fmt,...)
static void print_string_stdout(const char *s)
int svm_get_nr_class(const svm_model *model)
int svm_check_probability_model(const svm_model *model)
static void solve_nu_svr(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
Convenience class to temporarily change the current locale.
static void clone(T *&dst, S *src, int n)
static void(* svm_print_string)(const char *)
static void svm_binary_svc_probability(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn, double &probA, double &probB)
static char * readline(FILE *input)
bool read_model_header(FILE *fp, svm_model *model)
static double powi(double base, int times)
void svm_free_and_destroy_model(svm_model **model_ptr_ptr)
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
struct svm_parameter param
static double svm_svr_probability(const svm_problem *prob, const svm_parameter *param)
void svm_get_sv_indices(const svm_model *model, int *indices)
#define FSCANF(_stream, _format, _var)
static bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4)
static decision_function svm_train_one(const svm_problem *prob, const svm_parameter *param, double Cp, double Cn)
static void multiclass_probability(int k, double **r, double *p)
static void swap(T &x, T &y)
int svm_get_svm_type(const svm_model *model)
static void solve_nu_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si)
MITKCORE_EXPORT const ScalarType eps
static const char * svm_type_table[]
static void solve_c_svc(const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn)
svm_model * svm_load_model(const char *model_file_name)
void svm_destroy_param(svm_parameter *param)
void svm_get_labels(const svm_model *model, int *label)
void svm_set_print_string_function(void(*print_func)(const char *))
static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)