svm.cpp

Go to the documentation of this file.
00001 /*
00002 Copyright (c) 2000-2008 Chih-Chung Chang and Chih-Jen Lin
00003 All rights reserved.
00004 
00005 Redistribution and use in source and binary forms, with or without
00006 modification, are permitted provided that the following conditions
00007 are met:
00008 
00009 1. Redistributions of source code must retain the above copyright
00010 notice, this list of conditions and the following disclaimer.
00011 
00012 2. Redistributions in binary form must reproduce the above copyright
00013 notice, this list of conditions and the following disclaimer in the
00014 documentation and/or other materials provided with the distribution.
00015 
00016 3. Neither name of copyright holders nor the names of its contributors
00017 may be used to endorse or promote products derived from this software
00018 without specific prior written permission.
00019 
00020 
00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 
00033 */
00034 
00035 
00036 #include <math.h>
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <ctype.h>
00040 #include <float.h>
00041 #include <string.h>
00042 #include <stdarg.h>
00043 #include <locale.h>
00044 #include "svm.h"
00045 
00046 namespace celeste
00047 {
00048 typedef float Qfloat;
00049 typedef signed char schar;
00050 #ifndef min
00051 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00052 #endif
00053 #ifndef max
00054 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00055 #endif
00056 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00057 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00058 {
00059         dst = new T[n];
00060         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00061 }
00062 inline double powi(double base, int times)
00063 {
00064         double tmp = base, ret = 1.0;
00065 
00066         for(int t=times; t>0; t/=2)
00067         {
00068                 if(t%2==1) ret*=tmp;
00069                 tmp = tmp * tmp;
00070         }
00071         return ret;
00072 }
00073 #define INF HUGE_VAL
00074 #define TAU 1e-12
00075 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00076 #if 1
00077 void info(const char *fmt,...)
00078 {
00079         va_list ap;
00080         va_start(ap,fmt);
00081         vprintf(fmt,ap);
00082         va_end(ap);
00083 }
00084 void info_flush()
00085 {
00086         fflush(stdout);
00087 }
00088 #else
00089 void info(char *fmt,...) {}
00090 void info_flush() {}
00091 #endif
00092 
00093 //
00094 // Kernel Cache
00095 //
00096 // l is the number of total data items
00097 // size is the cache size limit in bytes
00098 //
00099 class Cache
00100 {
00101 public:
00102         Cache(int l,long int size);
00103         ~Cache();
00104 
00105         // request data [0,len)
00106         // return some position p where [p,len) need to be filled
00107         // (p >= len if nothing needs to be filled)
00108         int get_data(const int index, Qfloat **data, int len);
00109         void swap_index(int i, int j);  // future_option
00110 private:
00111         int l;
00112         long int size;
00113         struct head_t
00114         {
00115                 head_t *prev, *next;    // a cicular list
00116                 Qfloat *data;
00117                 int len;                // data[0,len) is cached in this entry
00118         };
00119 
00120         head_t *head;
00121         head_t lru_head;
00122         void lru_delete(head_t *h);
00123         void lru_insert(head_t *h);
00124 };
00125 
00126 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00127 {
00128         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00129         size /= sizeof(Qfloat);
00130         size -= l * sizeof(head_t) / sizeof(Qfloat);
00131         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00132         lru_head.next = lru_head.prev = &lru_head;
00133 }
00134 
00135 Cache::~Cache()
00136 {
00137         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00138                 free(h->data);
00139         free(head);
00140 }
00141 
00142 void Cache::lru_delete(head_t *h)
00143 {
00144         // delete from current location
00145         h->prev->next = h->next;
00146         h->next->prev = h->prev;
00147 }
00148 
00149 void Cache::lru_insert(head_t *h)
00150 {
00151         // insert to last position
00152         h->next = &lru_head;
00153         h->prev = lru_head.prev;
00154         h->prev->next = h;
00155         h->next->prev = h;
00156 }
00157 
00158 int Cache::get_data(const int index, Qfloat **data, int len)
00159 {
00160         head_t *h = &head[index];
00161         if(h->len) lru_delete(h);
00162         int more = len - h->len;
00163 
00164         if(more > 0)
00165         {
00166                 // free old space
00167                 while(size < more)
00168                 {
00169                         head_t *old = lru_head.next;
00170                         lru_delete(old);
00171                         free(old->data);
00172                         size += old->len;
00173                         old->data = 0;
00174                         old->len = 0;
00175                 }
00176 
00177                 // allocate new space
00178                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00179                 size -= more;
00180                 swap(h->len,len);
00181         }
00182 
00183         lru_insert(h);
00184         *data = h->data;
00185         return len;
00186 }
00187 
00188 void Cache::swap_index(int i, int j)
00189 {
00190         if(i==j) return;
00191 
00192         if(head[i].len) lru_delete(&head[i]);
00193         if(head[j].len) lru_delete(&head[j]);
00194         swap(head[i].data,head[j].data);
00195         swap(head[i].len,head[j].len);
00196         if(head[i].len) lru_insert(&head[i]);
00197         if(head[j].len) lru_insert(&head[j]);
00198 
00199         if(i>j) swap(i,j);
00200         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00201         {
00202                 if(h->len > i)
00203                 {
00204                         if(h->len > j)
00205                                 swap(h->data[i],h->data[j]);
00206                         else
00207                         {
00208                                 // give up
00209                                 lru_delete(h);
00210                                 free(h->data);
00211                                 size += h->len;
00212                                 h->data = 0;
00213                                 h->len = 0;
00214                         }
00215                 }
00216         }
00217 }
00218 
00219 //
00220 // Kernel evaluation
00221 //
00222 // the static method k_function is for doing single kernel evaluation
00223 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00224 // the member function get_Q is for getting one column from the Q Matrix
00225 //
00226 class QMatrix {
00227 public:
00228         virtual Qfloat *get_Q(int column, int len) const = 0;
00229         virtual Qfloat *get_QD() const = 0;
00230         virtual void swap_index(int i, int j) const = 0;
00231         virtual ~QMatrix() {}
00232 };
00233 
00234 class Kernel: public QMatrix {
00235 public:
00236         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00237         virtual ~Kernel();
00238 
00239         static double k_function(const svm_node *x, const svm_node *y,
00240                                  const svm_parameter& param);
00241         virtual Qfloat *get_Q(int column, int len) const = 0;
00242         virtual Qfloat *get_QD() const = 0;
00243         virtual void swap_index(int i, int j) const     // no so const...
00244         {
00245                 swap(x[i],x[j]);
00246                 if(x_square) swap(x_square[i],x_square[j]);
00247         }
00248 protected:
00249 
00250         double (Kernel::*kernel_function)(int i, int j) const;
00251 
00252 private:
00253         const svm_node **x;
00254         double *x_square;
00255 
00256         // svm_parameter
00257         const int kernel_type;
00258         const int degree;
00259         const double gamma;
00260         const double coef0;
00261 
00262         static double dot(const svm_node *px, const svm_node *py);
00263         double kernel_linear(int i, int j) const
00264         {
00265                 return dot(x[i],x[j]);
00266         }
00267         double kernel_poly(int i, int j) const
00268         {
00269                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00270         }
00271         double kernel_rbf(int i, int j) const
00272         {
00273                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00274         }
00275         double kernel_sigmoid(int i, int j) const
00276         {
00277                 return tanh(gamma*dot(x[i],x[j])+coef0);
00278         }
00279         double kernel_precomputed(int i, int j) const
00280         {
00281                 return x[i][(int)(x[j][0].value)].value;
00282         }
00283 };
00284 
00285 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00286 :kernel_type(param.kernel_type), degree(param.degree),
00287  gamma(param.gamma), coef0(param.coef0)
00288 {
00289         switch(kernel_type)
00290         {
00291                 case LINEAR:
00292                         kernel_function = &Kernel::kernel_linear;
00293                         break;
00294                 case POLY:
00295                         kernel_function = &Kernel::kernel_poly;
00296                         break;
00297                 case RBF:
00298                         kernel_function = &Kernel::kernel_rbf;
00299                         break;
00300                 case SIGMOID:
00301                         kernel_function = &Kernel::kernel_sigmoid;
00302                         break;
00303                 case PRECOMPUTED:
00304                         kernel_function = &Kernel::kernel_precomputed;
00305                         break;
00306         }
00307 
00308         clone(x,x_,l);
00309 
00310         if(kernel_type == RBF)
00311         {
00312                 x_square = new double[l];
00313                 for(int i=0;i<l;i++)
00314                         x_square[i] = dot(x[i],x[i]);
00315         }
00316         else
00317                 x_square = 0;
00318 }
00319 
00320 Kernel::~Kernel()
00321 {
00322         delete[] x;
00323         delete[] x_square;
00324 }
00325 
00326 double Kernel::dot(const svm_node *px, const svm_node *py)
00327 {
00328         double sum = 0;
00329         while(px->index != -1 && py->index != -1)
00330         {
00331                 if(px->index == py->index)
00332                 {
00333                         sum += px->value * py->value;
00334                         ++px;
00335                         ++py;
00336                 }
00337                 else
00338                 {
00339                         if(px->index > py->index)
00340                                 ++py;
00341                         else
00342                                 ++px;
00343                 }                       
00344         }
00345         return sum;
00346 }
00347 
00348 double Kernel::k_function(const svm_node *x, const svm_node *y,
00349                           const svm_parameter& param)
00350 {
00351         switch(param.kernel_type)
00352         {
00353                 case LINEAR:
00354                         return dot(x,y);
00355                 case POLY:
00356                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00357                 case RBF:
00358                 {
00359                         double sum = 0;
00360                         while(x->index != -1 && y->index !=-1)
00361                         {
00362                                 if(x->index == y->index)
00363                                 {
00364                                         double d = x->value - y->value;
00365                                         sum += d*d;
00366                                         ++x;
00367                                         ++y;
00368                                 }
00369                                 else
00370                                 {
00371                                         if(x->index > y->index)
00372                                         {       
00373                                                 sum += y->value * y->value;
00374                                                 ++y;
00375                                         }
00376                                         else
00377                                         {
00378                                                 sum += x->value * x->value;
00379                                                 ++x;
00380                                         }
00381                                 }
00382                         }
00383 
00384                         while(x->index != -1)
00385                         {
00386                                 sum += x->value * x->value;
00387                                 ++x;
00388                         }
00389 
00390                         while(y->index != -1)
00391                         {
00392                                 sum += y->value * y->value;
00393                                 ++y;
00394                         }
00395                         
00396                         return exp(-param.gamma*sum);
00397                 }
00398                 case SIGMOID:
00399                         return tanh(param.gamma*dot(x,y)+param.coef0);
00400                 case PRECOMPUTED:  //x: test (validation), y: SV
00401                         return x[(int)(y->value)].value;
00402                 default:
00403                         return 0;  // Unreachable 
00404         }
00405 }
00406 
00407 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00408 // Solves:
00409 //
00410 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00411 //
00412 //              y^T \alpha = \delta
00413 //              y_i = +1 or -1
00414 //              0 <= alpha_i <= Cp for y_i = 1
00415 //              0 <= alpha_i <= Cn for y_i = -1
00416 //
00417 // Given:
00418 //
00419 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00420 //      l is the size of vectors and matrices
00421 //      eps is the stopping tolerance
00422 //
00423 // solution will be put in \alpha, objective value will be put in obj
00424 //
00425 class Solver {
00426 public:
00427         Solver() {};
00428         virtual ~Solver() {};
00429 
00430         struct SolutionInfo {
00431                 double obj;
00432                 double rho;
00433                 double upper_bound_p;
00434                 double upper_bound_n;
00435                 double r;       // for Solver_NU
00436         };
00437 
00438         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00439                    double *alpha_, double Cp, double Cn, double eps,
00440                    SolutionInfo* si, int shrinking);
00441 protected:
00442         int active_size;
00443         schar *y;
00444         double *G;              // gradient of objective function
00445         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00446         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00447         double *alpha;
00448         const QMatrix *Q;
00449         const Qfloat *QD;
00450         double eps;
00451         double Cp,Cn;
00452         double *p;
00453         int *active_set;
00454         double *G_bar;          // gradient, if we treat free variables as 0
00455         int l;
00456         bool unshrinked;        // XXX
00457 
00458         double get_C(int i)
00459         {
00460                 return (y[i] > 0)? Cp : Cn;
00461         }
00462         void update_alpha_status(int i)
00463         {
00464                 if(alpha[i] >= get_C(i))
00465                         alpha_status[i] = UPPER_BOUND;
00466                 else if(alpha[i] <= 0)
00467                         alpha_status[i] = LOWER_BOUND;
00468                 else alpha_status[i] = FREE;
00469         }
00470         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00471         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00472         bool is_free(int i) { return alpha_status[i] == FREE; }
00473         void swap_index(int i, int j);
00474         void reconstruct_gradient();
00475         virtual int select_working_set(int &i, int &j);
00476         virtual double calculate_rho();
00477         virtual void do_shrinking();
00478 private:
00479         bool be_shrunken(int i, double Gmax1, double Gmax2);    
00480 };
00481 
00482 void Solver::swap_index(int i, int j)
00483 {
00484         Q->swap_index(i,j);
00485         swap(y[i],y[j]);
00486         swap(G[i],G[j]);
00487         swap(alpha_status[i],alpha_status[j]);
00488         swap(alpha[i],alpha[j]);
00489         swap(p[i],p[j]);
00490         swap(active_set[i],active_set[j]);
00491         swap(G_bar[i],G_bar[j]);
00492 }
00493 
00494 void Solver::reconstruct_gradient()
00495 {
00496         // reconstruct inactive elements of G from G_bar and free variables
00497 
00498         if(active_size == l) return;
00499 
00500         int i;
00501         for(i=active_size;i<l;i++)
00502                 G[i] = G_bar[i] + p[i];
00503         
00504         for(i=0;i<active_size;i++)
00505                 if(is_free(i))
00506                 {
00507                         const Qfloat *Q_i = Q->get_Q(i,l);
00508                         double alpha_i = alpha[i];
00509                         for(int j=active_size;j<l;j++)
00510                                 G[j] += alpha_i * Q_i[j];
00511                 }
00512 }
00513 
00514 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00515                    double *alpha_, double Cp, double Cn, double eps,
00516                    SolutionInfo* si, int shrinking)
00517 {
00518         this->l = l;
00519         this->Q = &Q;
00520         QD=Q.get_QD();
00521         clone(p, p_,l);
00522         clone(y, y_,l);
00523         clone(alpha,alpha_,l);
00524         this->Cp = Cp;
00525         this->Cn = Cn;
00526         this->eps = eps;
00527         unshrinked = false;
00528 
00529         // initialize alpha_status
00530         {
00531                 alpha_status = new char[l];
00532                 for(int i=0;i<l;i++)
00533                         update_alpha_status(i);
00534         }
00535 
00536         // initialize active set (for shrinking)
00537         {
00538                 active_set = new int[l];
00539                 for(int i=0;i<l;i++)
00540                         active_set[i] = i;
00541                 active_size = l;
00542         }
00543 
00544         // initialize gradient
00545         {
00546                 G = new double[l];
00547                 G_bar = new double[l];
00548                 int i;
00549                 for(i=0;i<l;i++)
00550                 {
00551                         G[i] = p[i];
00552                         G_bar[i] = 0;
00553                 }
00554                 for(i=0;i<l;i++)
00555                         if(!is_lower_bound(i))
00556                         {
00557                                 const Qfloat *Q_i = Q.get_Q(i,l);
00558                                 double alpha_i = alpha[i];
00559                                 int j;
00560                                 for(j=0;j<l;j++)
00561                                         G[j] += alpha_i*Q_i[j];
00562                                 if(is_upper_bound(i))
00563                                         for(j=0;j<l;j++)
00564                                                 G_bar[j] += get_C(i) * Q_i[j];
00565                         }
00566         }
00567 
00568         // optimization step
00569 
00570         int iter = 0;
00571         int counter = min(l,1000)+1;
00572 
00573         while(1)
00574         {
00575                 // show progress and do shrinking
00576 
00577                 if(--counter == 0)
00578                 {
00579                         counter = min(l,1000);
00580                         if(shrinking) do_shrinking();
00581                         info("."); info_flush();
00582                 }
00583 
00584                 int i,j;
00585                 if(select_working_set(i,j)!=0)
00586                 {
00587                         // reconstruct the whole gradient
00588                         reconstruct_gradient();
00589                         // reset active set size and check
00590                         active_size = l;
00591                         info("*"); info_flush();
00592                         if(select_working_set(i,j)!=0)
00593                                 break;
00594                         else
00595                                 counter = 1;    // do shrinking next iteration
00596                 }
00597                 
00598                 ++iter;
00599 
00600                 // update alpha[i] and alpha[j], handle bounds carefully
00601                 
00602                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00603                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00604 
00605                 double C_i = get_C(i);
00606                 double C_j = get_C(j);
00607 
00608                 double old_alpha_i = alpha[i];
00609                 double old_alpha_j = alpha[j];
00610 
00611                 if(y[i]!=y[j])
00612                 {
00613                         double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00614                         if (quad_coef <= 0)
00615                                 quad_coef = TAU;
00616                         double delta = (-G[i]-G[j])/quad_coef;
00617                         double diff = alpha[i] - alpha[j];
00618                         alpha[i] += delta;
00619                         alpha[j] += delta;
00620                         
00621                         if(diff > 0)
00622                         {
00623                                 if(alpha[j] < 0)
00624                                 {
00625                                         alpha[j] = 0;
00626                                         alpha[i] = diff;
00627                                 }
00628                         }
00629                         else
00630                         {
00631                                 if(alpha[i] < 0)
00632                                 {
00633                                         alpha[i] = 0;
00634                                         alpha[j] = -diff;
00635                                 }
00636                         }
00637                         if(diff > C_i - C_j)
00638                         {
00639                                 if(alpha[i] > C_i)
00640                                 {
00641                                         alpha[i] = C_i;
00642                                         alpha[j] = C_i - diff;
00643                                 }
00644                         }
00645                         else
00646                         {
00647                                 if(alpha[j] > C_j)
00648                                 {
00649                                         alpha[j] = C_j;
00650                                         alpha[i] = C_j + diff;
00651                                 }
00652                         }
00653                 }
00654                 else
00655                 {
00656                         double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00657                         if (quad_coef <= 0)
00658                                 quad_coef = TAU;
00659                         double delta = (G[i]-G[j])/quad_coef;
00660                         double sum = alpha[i] + alpha[j];
00661                         alpha[i] -= delta;
00662                         alpha[j] += delta;
00663 
00664                         if(sum > C_i)
00665                         {
00666                                 if(alpha[i] > C_i)
00667                                 {
00668                                         alpha[i] = C_i;
00669                                         alpha[j] = sum - C_i;
00670                                 }
00671                         }
00672                         else
00673                         {
00674                                 if(alpha[j] < 0)
00675                                 {
00676                                         alpha[j] = 0;
00677                                         alpha[i] = sum;
00678                                 }
00679                         }
00680                         if(sum > C_j)
00681                         {
00682                                 if(alpha[j] > C_j)
00683                                 {
00684                                         alpha[j] = C_j;
00685                                         alpha[i] = sum - C_j;
00686                                 }
00687                         }
00688                         else
00689                         {
00690                                 if(alpha[i] < 0)
00691                                 {
00692                                         alpha[i] = 0;
00693                                         alpha[j] = sum;
00694                                 }
00695                         }
00696                 }
00697 
00698                 // update G
00699 
00700                 double delta_alpha_i = alpha[i] - old_alpha_i;
00701                 double delta_alpha_j = alpha[j] - old_alpha_j;
00702                 
00703                 for(int k=0;k<active_size;k++)
00704                 {
00705                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00706                 }
00707 
00708                 // update alpha_status and G_bar
00709 
00710                 {
00711                         bool ui = is_upper_bound(i);
00712                         bool uj = is_upper_bound(j);
00713                         update_alpha_status(i);
00714                         update_alpha_status(j);
00715                         int k;
00716                         if(ui != is_upper_bound(i))
00717                         {
00718                                 Q_i = Q.get_Q(i,l);
00719                                 if(ui)
00720                                         for(k=0;k<l;k++)
00721                                                 G_bar[k] -= C_i * Q_i[k];
00722                                 else
00723                                         for(k=0;k<l;k++)
00724                                                 G_bar[k] += C_i * Q_i[k];
00725                         }
00726 
00727                         if(uj != is_upper_bound(j))
00728                         {
00729                                 Q_j = Q.get_Q(j,l);
00730                                 if(uj)
00731                                         for(k=0;k<l;k++)
00732                                                 G_bar[k] -= C_j * Q_j[k];
00733                                 else
00734                                         for(k=0;k<l;k++)
00735                                                 G_bar[k] += C_j * Q_j[k];
00736                         }
00737                 }
00738         }
00739 
00740         // calculate rho
00741 
00742         si->rho = calculate_rho();
00743 
00744         // calculate objective value
00745         {
00746                 double v = 0;
00747                 int i;
00748                 for(i=0;i<l;i++)
00749                         v += alpha[i] * (G[i] + p[i]);
00750 
00751                 si->obj = v/2;
00752         }
00753 
00754         // put back the solution
00755         {
00756                 for(int i=0;i<l;i++)
00757                         alpha_[active_set[i]] = alpha[i];
00758         }
00759 
00760         // juggle everything back
00761         /*{
00762                 for(int i=0;i<l;i++)
00763                         while(active_set[i] != i)
00764                                 swap_index(i,active_set[i]);
00765                                 // or Q.swap_index(i,active_set[i]);
00766         }*/
00767 
00768         si->upper_bound_p = Cp;
00769         si->upper_bound_n = Cn;
00770 
00771         info("\noptimization finished, #iter = %d\n",iter);
00772 
00773         delete[] p;
00774         delete[] y;
00775         delete[] alpha;
00776         delete[] alpha_status;
00777         delete[] active_set;
00778         delete[] G;
00779         delete[] G_bar;
00780 }
00781 
00782 // return 1 if already optimal, return 0 otherwise
00783 int Solver::select_working_set(int &out_i, int &out_j)
00784 {
00785         // return i,j such that
00786         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00787         // j: minimizes the decrease of obj value
00788         //    (if quadratic coefficeint <= 0, replace it with tau)
00789         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00790         
00791         double Gmax = -INF;
00792         double Gmax2 = -INF;
00793         int Gmax_idx = -1;
00794         int Gmin_idx = -1;
00795         double obj_diff_min = INF;
00796 
00797         for(int t=0;t<active_size;t++)
00798                 if(y[t]==+1)    
00799                 {
00800                         if(!is_upper_bound(t))
00801                                 if(-G[t] >= Gmax)
00802                                 {
00803                                         Gmax = -G[t];
00804                                         Gmax_idx = t;
00805                                 }
00806                 }
00807                 else
00808                 {
00809                         if(!is_lower_bound(t))
00810                                 if(G[t] >= Gmax)
00811                                 {
00812                                         Gmax = G[t];
00813                                         Gmax_idx = t;
00814                                 }
00815                 }
00816 
00817         int i = Gmax_idx;
00818         const Qfloat *Q_i = NULL;
00819         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00820                 Q_i = Q->get_Q(i,active_size);
00821 
00822         for(int j=0;j<active_size;j++)
00823         {
00824                 if(y[j]==+1)
00825                 {
00826                         if (!is_lower_bound(j))
00827                         {
00828                                 double grad_diff=Gmax+G[j];
00829                                 if (G[j] >= Gmax2)
00830                                         Gmax2 = G[j];
00831                                 if (grad_diff > 0)
00832                                 {
00833                                         double obj_diff; 
00834                                         double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
00835                                         if (quad_coef > 0)
00836                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00837                                         else
00838                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00839 
00840                                         if (obj_diff <= obj_diff_min)
00841                                         {
00842                                                 Gmin_idx=j;
00843                                                 obj_diff_min = obj_diff;
00844                                         }
00845                                 }
00846                         }
00847                 }
00848                 else
00849                 {
00850                         if (!is_upper_bound(j))
00851                         {
00852                                 double grad_diff= Gmax-G[j];
00853                                 if (-G[j] >= Gmax2)
00854                                         Gmax2 = -G[j];
00855                                 if (grad_diff > 0)
00856                                 {
00857                                         double obj_diff; 
00858                                         double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
00859                                         if (quad_coef > 0)
00860                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00861                                         else
00862                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00863 
00864                                         if (obj_diff <= obj_diff_min)
00865                                         {
00866                                                 Gmin_idx=j;
00867                                                 obj_diff_min = obj_diff;
00868                                         }
00869                                 }
00870                         }
00871                 }
00872         }
00873 
00874         if(Gmax+Gmax2 < eps)
00875                 return 1;
00876 
00877         out_i = Gmax_idx;
00878         out_j = Gmin_idx;
00879         return 0;
00880 }
00881 
00882 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2)
00883 {
00884         if(is_upper_bound(i))
00885         {
00886                 if(y[i]==+1)
00887                         return(-G[i] > Gmax1);
00888                 else
00889                         return(-G[i] > Gmax2);
00890         }
00891         else if(is_lower_bound(i))
00892         {
00893                 if(y[i]==+1)
00894                         return(G[i] > Gmax2);
00895                 else    
00896                         return(G[i] > Gmax1);
00897         }
00898         else
00899                 return(false);
00900 }
00901 
00902 void Solver::do_shrinking()
00903 {
00904         int i;
00905         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00906         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00907 
00908         // find maximal violating pair first
00909         for(i=0;i<active_size;i++)
00910         {
00911                 if(y[i]==+1)    
00912                 {
00913                         if(!is_upper_bound(i))  
00914                         {
00915                                 if(-G[i] >= Gmax1)
00916                                         Gmax1 = -G[i];
00917                         }
00918                         if(!is_lower_bound(i))  
00919                         {
00920                                 if(G[i] >= Gmax2)
00921                                         Gmax2 = G[i];
00922                         }
00923                 }
00924                 else    
00925                 {
00926                         if(!is_upper_bound(i))  
00927                         {
00928                                 if(-G[i] >= Gmax2)
00929                                         Gmax2 = -G[i];
00930                         }
00931                         if(!is_lower_bound(i))  
00932                         {
00933                                 if(G[i] >= Gmax1)
00934                                         Gmax1 = G[i];
00935                         }
00936                 }
00937         }
00938 
00939         // shrink
00940 
00941         for(i=0;i<active_size;i++)
00942                 if (be_shrunken(i, Gmax1, Gmax2))
00943                 {
00944                         active_size--;
00945                         while (active_size > i)
00946                         {
00947                                 if (!be_shrunken(active_size, Gmax1, Gmax2))
00948                                 {
00949                                         swap_index(i,active_size);
00950                                         break;
00951                                 }
00952                                 active_size--;
00953                         }
00954                 }
00955 
00956         // unshrink, check all variables again before final iterations
00957 
00958         if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
00959         
00960         unshrinked = true;
00961         reconstruct_gradient();
00962 
00963         for(i=l-1;i>=active_size;i--)
00964                 if (!be_shrunken(i, Gmax1, Gmax2))
00965                 {
00966                         while (active_size < i)
00967                         {
00968                                 if (be_shrunken(active_size, Gmax1, Gmax2))
00969                                 {
00970                                         swap_index(i,active_size);
00971                                         break;
00972                                 }
00973                                 active_size++;
00974                         }
00975                         active_size++;
00976                 }
00977 }
00978 
00979 double Solver::calculate_rho()
00980 {
00981         double r;
00982         int nr_free = 0;
00983         double ub = INF, lb = -INF, sum_free = 0;
00984         for(int i=0;i<active_size;i++)
00985         {
00986                 double yG = y[i]*G[i];
00987 
00988                 if(is_upper_bound(i))
00989                 {
00990                         if(y[i]==-1)
00991                                 ub = min(ub,yG);
00992                         else
00993                                 lb = max(lb,yG);
00994                 }
00995                 else if(is_lower_bound(i))
00996                 {
00997                         if(y[i]==+1)
00998                                 ub = min(ub,yG);
00999                         else
01000                                 lb = max(lb,yG);
01001                 }
01002                 else
01003                 {
01004                         ++nr_free;
01005                         sum_free += yG;
01006                 }
01007         }
01008 
01009         if(nr_free>0)
01010                 r = sum_free/nr_free;
01011         else
01012                 r = (ub+lb)/2;
01013 
01014         return r;
01015 }
01016 
01017 //
01018 // Solver for nu-svm classification and regression
01019 //
01020 // additional constraint: e^T \alpha = constant
01021 //
01022 class Solver_NU : public Solver
01023 {
01024 public:
01025         Solver_NU() {}
01026         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01027                    double *alpha, double Cp, double Cn, double eps,
01028                    SolutionInfo* si, int shrinking)
01029         {
01030                 this->si = si;
01031                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01032         }
01033 private:
01034         SolutionInfo *si;
01035         int select_working_set(int &i, int &j);
01036         double calculate_rho();
01037         bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01038         void do_shrinking();
01039 };
01040 
01041 // return 1 if already optimal, return 0 otherwise
01042 int Solver_NU::select_working_set(int &out_i, int &out_j)
01043 {
01044         // return i,j such that y_i = y_j and
01045         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01046         // j: minimizes the decrease of obj value
01047         //    (if quadratic coefficeint <= 0, replace it with tau)
01048         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01049 
01050         double Gmaxp = -INF;
01051         double Gmaxp2 = -INF;
01052         int Gmaxp_idx = -1;
01053 
01054         double Gmaxn = -INF;
01055         double Gmaxn2 = -INF;
01056         int Gmaxn_idx = -1;
01057 
01058         int Gmin_idx = -1;
01059         double obj_diff_min = INF;
01060 
01061         for(int t=0;t<active_size;t++)
01062                 if(y[t]==+1)
01063                 {
01064                         if(!is_upper_bound(t))
01065                                 if(-G[t] >= Gmaxp)
01066                                 {
01067                                         Gmaxp = -G[t];
01068                                         Gmaxp_idx = t;
01069                                 }
01070                 }
01071                 else
01072                 {
01073                         if(!is_lower_bound(t))
01074                                 if(G[t] >= Gmaxn)
01075                                 {
01076                                         Gmaxn = G[t];
01077                                         Gmaxn_idx = t;
01078                                 }
01079                 }
01080 
01081         int ip = Gmaxp_idx;
01082         int in = Gmaxn_idx;
01083         const Qfloat *Q_ip = NULL;
01084         const Qfloat *Q_in = NULL;
01085         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01086                 Q_ip = Q->get_Q(ip,active_size);
01087         if(in != -1)
01088                 Q_in = Q->get_Q(in,active_size);
01089 
01090         for(int j=0;j<active_size;j++)
01091         {
01092                 if(y[j]==+1)
01093                 {
01094                         if (!is_lower_bound(j)) 
01095                         {
01096                                 double grad_diff=Gmaxp+G[j];
01097                                 if (G[j] >= Gmaxp2)
01098                                         Gmaxp2 = G[j];
01099                                 if (grad_diff > 0)
01100                                 {
01101                                         double obj_diff; 
01102                                         double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01103                                         if (quad_coef > 0)
01104                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01105                                         else
01106                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01107 
01108                                         if (obj_diff <= obj_diff_min)
01109                                         {
01110                                                 Gmin_idx=j;
01111                                                 obj_diff_min = obj_diff;
01112                                         }
01113                                 }
01114                         }
01115                 }
01116                 else
01117                 {
01118                         if (!is_upper_bound(j))
01119                         {
01120                                 double grad_diff=Gmaxn-G[j];
01121                                 if (-G[j] >= Gmaxn2)
01122                                         Gmaxn2 = -G[j];
01123                                 if (grad_diff > 0)
01124                                 {
01125                                         double obj_diff; 
01126                                         double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01127                                         if (quad_coef > 0)
01128                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01129                                         else
01130                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01131 
01132                                         if (obj_diff <= obj_diff_min)
01133                                         {
01134                                                 Gmin_idx=j;
01135                                                 obj_diff_min = obj_diff;
01136                                         }
01137                                 }
01138                         }
01139                 }
01140         }
01141 
01142         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01143                 return 1;
01144 
01145         if (y[Gmin_idx] == +1)
01146                 out_i = Gmaxp_idx;
01147         else
01148                 out_i = Gmaxn_idx;
01149         out_j = Gmin_idx;
01150 
01151         return 0;
01152 }
01153 
01154 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01155 {
01156         if(is_upper_bound(i))
01157         {
01158                 if(y[i]==+1)
01159                         return(-G[i] > Gmax1);
01160                 else    
01161                         return(-G[i] > Gmax4);
01162         }
01163         else if(is_lower_bound(i))
01164         {
01165                 if(y[i]==+1)
01166                         return(G[i] > Gmax2);
01167                 else    
01168                         return(G[i] > Gmax3);
01169         }
01170         else
01171                 return(false);
01172 }
01173 
01174 void Solver_NU::do_shrinking()
01175 {
01176         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01177         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01178         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01179         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01180 
01181         // find maximal violating pair first
01182         int i;
01183         for(i=0;i<active_size;i++)
01184         {
01185                 if(!is_upper_bound(i))
01186                 {
01187                         if(y[i]==+1)
01188                         {
01189                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01190                         }
01191                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01192                 }
01193                 if(!is_lower_bound(i))
01194                 {
01195                         if(y[i]==+1)
01196                         {       
01197                                 if(G[i] > Gmax2) Gmax2 = G[i];
01198                         }
01199                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01200                 }
01201         }
01202 
01203         // shrinking
01204 
01205         for(i=0;i<active_size;i++)
01206                 if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01207                 {
01208                         active_size--;
01209                         while (active_size > i)
01210                         {
01211                                 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01212                                 {
01213                                         swap_index(i,active_size);
01214                                         break;
01215                                 }
01216                                 active_size--;
01217                         }
01218                 }
01219 
01220         // unshrink, check all variables again before final iterations
01221 
01222         if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
01223         
01224         unshrinked = true;
01225         reconstruct_gradient();
01226 
01227         for(i=l-1;i>=active_size;i--)
01228                 if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01229                 {
01230                         while (active_size < i)
01231                         {
01232                                 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01233                                 {
01234                                         swap_index(i,active_size);
01235                                         break;
01236                                 }
01237                                 active_size++;
01238                         }
01239                         active_size++;
01240                 }
01241 }
01242 
01243 double Solver_NU::calculate_rho()
01244 {
01245         int nr_free1 = 0,nr_free2 = 0;
01246         double ub1 = INF, ub2 = INF;
01247         double lb1 = -INF, lb2 = -INF;
01248         double sum_free1 = 0, sum_free2 = 0;
01249 
01250         for(int i=0;i<active_size;i++)
01251         {
01252                 if(y[i]==+1)
01253                 {
01254                         if(is_upper_bound(i))
01255                                 lb1 = max(lb1,G[i]);
01256                         else if(is_lower_bound(i))
01257                                 ub1 = min(ub1,G[i]);
01258                         else
01259                         {
01260                                 ++nr_free1;
01261                                 sum_free1 += G[i];
01262                         }
01263                 }
01264                 else
01265                 {
01266                         if(is_upper_bound(i))
01267                                 lb2 = max(lb2,G[i]);
01268                         else if(is_lower_bound(i))
01269                                 ub2 = min(ub2,G[i]);
01270                         else
01271                         {
01272                                 ++nr_free2;
01273                                 sum_free2 += G[i];
01274                         }
01275                 }
01276         }
01277 
01278         double r1,r2;
01279         if(nr_free1 > 0)
01280                 r1 = sum_free1/nr_free1;
01281         else
01282                 r1 = (ub1+lb1)/2;
01283         
01284         if(nr_free2 > 0)
01285                 r2 = sum_free2/nr_free2;
01286         else
01287                 r2 = (ub2+lb2)/2;
01288         
01289         si->r = (r1+r2)/2;
01290         return (r1-r2)/2;
01291 }
01292 
01293 //
01294 // Q matrices for various formulations
01295 //
01296 class SVC_Q: public Kernel
01297 { 
01298 public:
01299         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01300         :Kernel(prob.l, prob.x, param)
01301         {
01302                 clone(y,y_,prob.l);
01303                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01304                 QD = new Qfloat[prob.l];
01305                 for(int i=0;i<prob.l;i++)
01306                         QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01307         }
01308         
01309         Qfloat *get_Q(int i, int len) const
01310         {
01311                 Qfloat *data;
01312                 int start;
01313                 if((start = cache->get_data(i,&data,len)) < len)
01314                 {
01315                         for(int j=start;j<len;j++)
01316                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01317                 }
01318                 return data;
01319         }
01320 
01321         Qfloat *get_QD() const
01322         {
01323                 return QD;
01324         }
01325 
01326         void swap_index(int i, int j) const
01327         {
01328                 cache->swap_index(i,j);
01329                 Kernel::swap_index(i,j);
01330                 swap(y[i],y[j]);
01331                 swap(QD[i],QD[j]);
01332         }
01333 
01334         ~SVC_Q()
01335         {
01336                 delete[] y;
01337                 delete cache;
01338                 delete[] QD;
01339         }
01340 private:
01341         schar *y;
01342         Cache *cache;
01343         Qfloat *QD;
01344 };
01345 
01346 class ONE_CLASS_Q: public Kernel
01347 {
01348 public:
01349         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01350         :Kernel(prob.l, prob.x, param)
01351         {
01352                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01353                 QD = new Qfloat[prob.l];
01354                 for(int i=0;i<prob.l;i++)
01355                         QD[i]= (Qfloat)(this->*kernel_function)(i,i);
01356         }
01357         
01358         Qfloat *get_Q(int i, int len) const
01359         {
01360                 Qfloat *data;
01361                 int start;
01362                 if((start = cache->get_data(i,&data,len)) < len)
01363                 {
01364                         for(int j=start;j<len;j++)
01365                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01366                 }
01367                 return data;
01368         }
01369 
01370         Qfloat *get_QD() const
01371         {
01372                 return QD;
01373         }
01374 
01375         void swap_index(int i, int j) const
01376         {
01377                 cache->swap_index(i,j);
01378                 Kernel::swap_index(i,j);
01379                 swap(QD[i],QD[j]);
01380         }
01381 
01382         ~ONE_CLASS_Q()
01383         {
01384                 delete cache;
01385                 delete[] QD;
01386         }
01387 private:
01388         Cache *cache;
01389         Qfloat *QD;
01390 };
01391 
01392 class SVR_Q: public Kernel
01393 { 
01394 public:
01395         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01396         :Kernel(prob.l, prob.x, param)
01397         {
01398                 l = prob.l;
01399                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01400                 QD = new Qfloat[2*l];
01401                 sign = new schar[2*l];
01402                 index = new int[2*l];
01403                 for(int k=0;k<l;k++)
01404                 {
01405                         sign[k] = 1;
01406                         sign[k+l] = -1;
01407                         index[k] = k;
01408                         index[k+l] = k;
01409                         QD[k]= (Qfloat)(this->*kernel_function)(k,k);
01410                         QD[k+l]=QD[k];
01411                 }
01412                 buffer[0] = new Qfloat[2*l];
01413                 buffer[1] = new Qfloat[2*l];
01414                 next_buffer = 0;
01415         }
01416 
01417         void swap_index(int i, int j) const
01418         {
01419                 swap(sign[i],sign[j]);
01420                 swap(index[i],index[j]);
01421                 swap(QD[i],QD[j]);
01422         }
01423         
01424         Qfloat *get_Q(int i, int len) const
01425         {
01426                 Qfloat *data;
01427                 int real_i = index[i];
01428                 if(cache->get_data(real_i,&data,l) < l)
01429                 {
01430                         for(int j=0;j<l;j++)
01431                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01432                 }
01433 
01434                 // reorder and copy
01435                 Qfloat *buf = buffer[next_buffer];
01436                 next_buffer = 1 - next_buffer;
01437                 schar si = sign[i];
01438                 for(int j=0;j<len;j++)
01439                         buf[j] = si * sign[j] * data[index[j]];
01440                 return buf;
01441         }
01442 
01443         Qfloat *get_QD() const
01444         {
01445                 return QD;
01446         }
01447 
01448         ~SVR_Q()
01449         {
01450                 delete cache;
01451                 delete[] sign;
01452                 delete[] index;
01453                 delete[] buffer[0];
01454                 delete[] buffer[1];
01455                 delete[] QD;
01456         }
01457 private:
01458         int l;
01459         Cache *cache;
01460         schar *sign;
01461         int *index;
01462         mutable int next_buffer;
01463         Qfloat *buffer[2];
01464         Qfloat *QD;
01465 };
01466 
01467 //
01468 // construct and solve various formulations
01469 //
01470 static void solve_c_svc(
01471         const svm_problem *prob, const svm_parameter* param,
01472         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01473 {
01474         int l = prob->l;
01475         double *minus_ones = new double[l];
01476         schar *y = new schar[l];
01477 
01478         int i;
01479 
01480         for(i=0;i<l;i++)
01481         {
01482                 alpha[i] = 0;
01483                 minus_ones[i] = -1;
01484                 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01485         }
01486 
01487         Solver s;
01488         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01489                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01490 
01491         double sum_alpha=0;
01492         for(i=0;i<l;i++)
01493                 sum_alpha += alpha[i];
01494 
01495         if (Cp==Cn)
01496                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01497 
01498         for(i=0;i<l;i++)
01499                 alpha[i] *= y[i];
01500 
01501         delete[] minus_ones;
01502         delete[] y;
01503 }
01504 
01505 static void solve_nu_svc(
01506         const svm_problem *prob, const svm_parameter *param,
01507         double *alpha, Solver::SolutionInfo* si)
01508 {
01509         int i;
01510         int l = prob->l;
01511         double nu = param->nu;
01512 
01513         schar *y = new schar[l];
01514 
01515         for(i=0;i<l;i++)
01516                 if(prob->y[i]>0)
01517                         y[i] = +1;
01518                 else
01519                         y[i] = -1;
01520 
01521         double sum_pos = nu*l/2;
01522         double sum_neg = nu*l/2;
01523 
01524         for(i=0;i<l;i++)
01525                 if(y[i] == +1)
01526                 {
01527                         alpha[i] = min(1.0,sum_pos);
01528                         sum_pos -= alpha[i];
01529                 }
01530                 else
01531                 {
01532                         alpha[i] = min(1.0,sum_neg);
01533                         sum_neg -= alpha[i];
01534                 }
01535 
01536         double *zeros = new double[l];
01537 
01538         for(i=0;i<l;i++)
01539                 zeros[i] = 0;
01540 
01541         Solver_NU s;
01542         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01543                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01544         double r = si->r;
01545 
01546         info("C = %f\n",1/r);
01547 
01548         for(i=0;i<l;i++)
01549                 alpha[i] *= y[i]/r;
01550 
01551         si->rho /= r;
01552         si->obj /= (r*r);
01553         si->upper_bound_p = 1/r;
01554         si->upper_bound_n = 1/r;
01555 
01556         delete[] y;
01557         delete[] zeros;
01558 }
01559 
01560 static void solve_one_class(
01561         const svm_problem *prob, const svm_parameter *param,
01562         double *alpha, Solver::SolutionInfo* si)
01563 {
01564         int l = prob->l;
01565         double *zeros = new double[l];
01566         schar *ones = new schar[l];
01567         int i;
01568 
01569         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01570 
01571         for(i=0;i<n;i++)
01572                 alpha[i] = 1;
01573         if(n<prob->l)
01574                 alpha[n] = param->nu * prob->l - n;
01575         for(i=n+1;i<l;i++)
01576                 alpha[i] = 0;
01577 
01578         for(i=0;i<l;i++)
01579         {
01580                 zeros[i] = 0;
01581                 ones[i] = 1;
01582         }
01583 
01584         Solver s;
01585         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01586                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01587 
01588         delete[] zeros;
01589         delete[] ones;
01590 }
01591 
01592 static void solve_epsilon_svr(
01593         const svm_problem *prob, const svm_parameter *param,
01594         double *alpha, Solver::SolutionInfo* si)
01595 {
01596         int l = prob->l;
01597         double *alpha2 = new double[2*l];
01598         double *linear_term = new double[2*l];
01599         schar *y = new schar[2*l];
01600         int i;
01601 
01602         for(i=0;i<l;i++)
01603         {
01604                 alpha2[i] = 0;
01605                 linear_term[i] = param->p - prob->y[i];
01606                 y[i] = 1;
01607 
01608                 alpha2[i+l] = 0;
01609                 linear_term[i+l] = param->p + prob->y[i];
01610                 y[i+l] = -1;
01611         }
01612 
01613         Solver s;
01614         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01615                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01616 
01617         double sum_alpha = 0;
01618         for(i=0;i<l;i++)
01619         {
01620                 alpha[i] = alpha2[i] - alpha2[i+l];
01621                 sum_alpha += fabs(alpha[i]);
01622         }
01623         info("nu = %f\n",sum_alpha/(param->C*l));
01624 
01625         delete[] alpha2;
01626         delete[] linear_term;
01627         delete[] y;
01628 }
01629 
01630 static void solve_nu_svr(
01631         const svm_problem *prob, const svm_parameter *param,
01632         double *alpha, Solver::SolutionInfo* si)
01633 {
01634         int l = prob->l;
01635         double C = param->C;
01636         double *alpha2 = new double[2*l];
01637         double *linear_term = new double[2*l];
01638         schar *y = new schar[2*l];
01639         int i;
01640 
01641         double sum = C * param->nu * l / 2;
01642         for(i=0;i<l;i++)
01643         {
01644                 alpha2[i] = alpha2[i+l] = min(sum,C);
01645                 sum -= alpha2[i];
01646 
01647                 linear_term[i] = - prob->y[i];
01648                 y[i] = 1;
01649 
01650                 linear_term[i+l] = prob->y[i];
01651                 y[i+l] = -1;
01652         }
01653 
01654         Solver_NU s;
01655         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01656                 alpha2, C, C, param->eps, si, param->shrinking);
01657 
01658         info("epsilon = %f\n",-si->r);
01659 
01660         for(i=0;i<l;i++)
01661                 alpha[i] = alpha2[i] - alpha2[i+l];
01662 
01663         delete[] alpha2;
01664         delete[] linear_term;
01665         delete[] y;
01666 }
01667 
01668 //
01669 // decision_function
01670 //
01671 struct decision_function
01672 {
01673         double *alpha;
01674         double rho;     
01675 };
01676 
01677 decision_function svm_train_one(
01678         const svm_problem *prob, const svm_parameter *param,
01679         double Cp, double Cn)
01680 {
01681         double *alpha = Malloc(double,prob->l);
01682         Solver::SolutionInfo si;
01683         switch(param->svm_type)
01684         {
01685                 case C_SVC:
01686                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01687                         break;
01688                 case NU_SVC:
01689                         solve_nu_svc(prob,param,alpha,&si);
01690                         break;
01691                 case ONE_CLASS:
01692                         solve_one_class(prob,param,alpha,&si);
01693                         break;
01694                 case EPSILON_SVR:
01695                         solve_epsilon_svr(prob,param,alpha,&si);
01696                         break;
01697                 case NU_SVR:
01698                         solve_nu_svr(prob,param,alpha,&si);
01699                         break;
01700         }
01701 
01702         info("obj = %f, rho = %f\n",si.obj,si.rho);
01703 
01704         // output SVs
01705 
01706         int nSV = 0;
01707         int nBSV = 0;
01708         for(int i=0;i<prob->l;i++)
01709         {
01710                 if(fabs(alpha[i]) > 0)
01711                 {
01712                         ++nSV;
01713                         if(prob->y[i] > 0)
01714                         {
01715                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01716                                         ++nBSV;
01717                         }
01718                         else
01719                         {
01720                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01721                                         ++nBSV;
01722                         }
01723                 }
01724         }
01725 
01726         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01727 
01728         decision_function f;
01729         f.alpha = alpha;
01730         f.rho = si.rho;
01731         return f;
01732 }
01733 
01734 //
01735 // svm_model
01736 //
01737 struct svm_model
01738 {
01739         svm_parameter param;    // parameter
01740         int nr_class;           // number of classes, = 2 in regression/one class svm
01741         int l;                  // total #SV
01742         svm_node **SV;          // SVs (SV[l])
01743         double **sv_coef;       // coefficients for SVs in decision functions (sv_coef[k-1][l])
01744         double *rho;            // constants in decision functions (rho[k*(k-1)/2])
01745         double *probA;          // pariwise probability information
01746         double *probB;
01747 
01748         // for classification only
01749 
01750         int *label;             // label of each class (label[k])
01751         int *nSV;               // number of SVs for each class (nSV[k])
01752                                 // nSV[0] + nSV[1] + ... + nSV[k-1] = l
01753         // XXX
01754         int free_sv;            // 1 if svm_model is created by svm_load_model
01755                                 // 0 if svm_model is created by svm_train
01756 };
01757 
01758 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01759 void sigmoid_train(
01760         int l, const double *dec_values, const double *labels, 
01761         double& A, double& B)
01762 {
01763         double prior1=0, prior0 = 0;
01764         int i;
01765 
01766         for (i=0;i<l;i++)
01767                 if (labels[i] > 0) prior1+=1;
01768                 else prior0+=1;
01769         
01770         int max_iter=100;       // Maximal number of iterations
01771         double min_step=1e-10;  // Minimal step taken in line search
01772         double sigma=1e-12;     // For numerically strict PD of Hessian
01773         double eps=1e-5;
01774         double hiTarget=(prior1+1.0)/(prior1+2.0);
01775         double loTarget=1/(prior0+2.0);
01776         double *t=Malloc(double,l);
01777         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01778         double newA,newB,newf,d1,d2;
01779         int iter; 
01780         
01781         // Initial Point and Initial Fun Value
01782         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01783         double fval = 0.0;
01784 
01785         for (i=0;i<l;i++)
01786         {
01787                 if (labels[i]>0) t[i]=hiTarget;
01788                 else t[i]=loTarget;
01789                 fApB = dec_values[i]*A+B;
01790                 if (fApB>=0)
01791                         fval += t[i]*fApB + log(1+exp(-fApB));
01792                 else
01793                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01794         }
01795         for (iter=0;iter<max_iter;iter++)
01796         {
01797                 // Update Gradient and Hessian (use H' = H + sigma I)
01798                 h11=sigma; // numerically ensures strict PD
01799                 h22=sigma;
01800                 h21=0.0;g1=0.0;g2=0.0;
01801                 for (i=0;i<l;i++)
01802                 {
01803                         fApB = dec_values[i]*A+B;
01804                         if (fApB >= 0)
01805                         {
01806                                 p=exp(-fApB)/(1.0+exp(-fApB));
01807                                 q=1.0/(1.0+exp(-fApB));
01808                         }
01809                         else
01810                         {
01811                                 p=1.0/(1.0+exp(fApB));
01812                                 q=exp(fApB)/(1.0+exp(fApB));
01813                         }
01814                         d2=p*q;
01815                         h11+=dec_values[i]*dec_values[i]*d2;
01816                         h22+=d2;
01817                         h21+=dec_values[i]*d2;
01818                         d1=t[i]-p;
01819                         g1+=dec_values[i]*d1;
01820                         g2+=d1;
01821                 }
01822 
01823                 // Stopping Criteria
01824                 if (fabs(g1)<eps && fabs(g2)<eps)
01825                         break;
01826 
01827                 // Finding Newton direction: -inv(H') * g
01828                 det=h11*h22-h21*h21;
01829                 dA=-(h22*g1 - h21 * g2) / det;
01830                 dB=-(-h21*g1+ h11 * g2) / det;
01831                 gd=g1*dA+g2*dB;
01832 
01833 
01834                 stepsize = 1;           // Line Search
01835                 while (stepsize >= min_step)
01836                 {
01837                         newA = A + stepsize * dA;
01838                         newB = B + stepsize * dB;
01839 
01840                         // New function value
01841                         newf = 0.0;
01842                         for (i=0;i<l;i++)
01843                         {
01844                                 fApB = dec_values[i]*newA+newB;
01845                                 if (fApB >= 0)
01846                                         newf += t[i]*fApB + log(1+exp(-fApB));
01847                                 else
01848                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01849                         }
01850                         // Check sufficient decrease
01851                         if (newf<fval+0.0001*stepsize*gd)
01852                         {
01853                                 A=newA;B=newB;fval=newf;
01854                                 break;
01855                         }
01856                         else
01857                                 stepsize = stepsize / 2.0;
01858                 }
01859 
01860                 if (stepsize < min_step)
01861                 {
01862                         info("Line search fails in two-class probability estimates\n");
01863                         break;
01864                 }
01865         }
01866 
01867         if (iter>=max_iter)
01868                 info("Reaching maximal iterations in two-class probability estimates\n");
01869         free(t);
01870 }
01871 
01872 double sigmoid_predict(double decision_value, double A, double B)
01873 {
01874         double fApB = decision_value*A+B;
01875         if (fApB >= 0)
01876                 return exp(-fApB)/(1.0+exp(-fApB));
01877         else
01878                 return 1.0/(1+exp(fApB)) ;
01879 }
01880 
01881 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01882 void multiclass_probability(int k, double **r, double *p)
01883 {
01884         int t,j;
01885         int iter = 0, max_iter=max(100,k);
01886         double **Q=Malloc(double *,k);
01887         double *Qp=Malloc(double,k);
01888         double pQp, eps=0.005/k;
01889         
01890         for (t=0;t<k;t++)
01891         {
01892                 p[t]=1.0/k;  // Valid if k = 1
01893                 Q[t]=Malloc(double,k);
01894                 Q[t][t]=0;
01895                 for (j=0;j<t;j++)
01896                 {
01897                         Q[t][t]+=r[j][t]*r[j][t];
01898                         Q[t][j]=Q[j][t];
01899                 }
01900                 for (j=t+1;j<k;j++)
01901                 {
01902                         Q[t][t]+=r[j][t]*r[j][t];
01903                         Q[t][j]=-r[j][t]*r[t][j];
01904                 }
01905         }
01906         for (iter=0;iter<max_iter;iter++)
01907         {
01908                 // stopping condition, recalculate QP,pQP for numerical accuracy
01909                 pQp=0;
01910                 for (t=0;t<k;t++)
01911                 {
01912                         Qp[t]=0;
01913                         for (j=0;j<k;j++)
01914                                 Qp[t]+=Q[t][j]*p[j];
01915                         pQp+=p[t]*Qp[t];
01916                 }
01917                 double max_error=0;
01918                 for (t=0;t<k;t++)
01919                 {
01920                         double error=fabs(Qp[t]-pQp);
01921                         if (error>max_error)
01922                                 max_error=error;
01923                 }
01924                 if (max_error<eps) break;
01925                 
01926                 for (t=0;t<k;t++)
01927                 {
01928                         double diff=(-Qp[t]+pQp)/Q[t][t];
01929                         p[t]+=diff;
01930                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01931                         for (j=0;j<k;j++)
01932                         {
01933                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01934                                 p[j]/=(1+diff);
01935                         }
01936                 }
01937         }
01938         if (iter>=max_iter)
01939                 info("Exceeds max_iter in multiclass_prob\n");
01940         for(t=0;t<k;t++) free(Q[t]);
01941         free(Q);
01942         free(Qp);
01943 }
01944 
01945 // Cross-validation decision values for probability estimates
01946 void svm_binary_svc_probability(
01947         const svm_problem *prob, const svm_parameter *param,
01948         double Cp, double Cn, double& probA, double& probB)
01949 {
01950         int i;
01951         int nr_fold = 5;
01952         int *perm = Malloc(int,prob->l);
01953         double *dec_values = Malloc(double,prob->l);
01954 
01955         // random shuffle
01956         for(i=0;i<prob->l;i++) perm[i]=i;
01957         for(i=0;i<prob->l;i++)
01958         {
01959                 int j = i+rand()%(prob->l-i);
01960                 swap(perm[i],perm[j]);
01961         }
01962         for(i=0;i<nr_fold;i++)
01963         {
01964                 int begin = i*prob->l/nr_fold;
01965                 int end = (i+1)*prob->l/nr_fold;
01966                 int j,k;
01967                 struct svm_problem subprob;
01968 
01969                 subprob.l = prob->l-(end-begin);
01970                 subprob.x = Malloc(struct svm_node*,subprob.l);
01971                 subprob.y = Malloc(double,subprob.l);
01972                         
01973                 k=0;
01974                 for(j=0;j<begin;j++)
01975                 {
01976                         subprob.x[k] = prob->x[perm[j]];
01977                         subprob.y[k] = prob->y[perm[j]];
01978                         ++k;
01979                 }
01980                 for(j=end;j<prob->l;j++)
01981                 {
01982                         subprob.x[k] = prob->x[perm[j]];
01983                         subprob.y[k] = prob->y[perm[j]];
01984                         ++k;
01985                 }
01986                 int p_count=0,n_count=0;
01987                 for(j=0;j<k;j++)
01988                         if(subprob.y[j]>0)
01989                                 p_count++;
01990                         else
01991                                 n_count++;
01992 
01993                 if(p_count==0 && n_count==0)
01994                         for(j=begin;j<end;j++)
01995                                 dec_values[perm[j]] = 0;
01996                 else if(p_count > 0 && n_count == 0)
01997                         for(j=begin;j<end;j++)
01998                                 dec_values[perm[j]] = 1;
01999                 else if(p_count == 0 && n_count > 0)
02000                         for(j=begin;j<end;j++)
02001                                 dec_values[perm[j]] = -1;
02002                 else
02003                 {
02004                         svm_parameter subparam = *param;
02005                         subparam.probability=0;
02006                         subparam.C=1.0;
02007                         subparam.nr_weight=2;
02008                         subparam.weight_label = Malloc(int,2);
02009                         subparam.weight = Malloc(double,2);
02010                         subparam.weight_label[0]=+1;
02011                         subparam.weight_label[1]=-1;
02012                         subparam.weight[0]=Cp;
02013                         subparam.weight[1]=Cn;
02014                         struct svm_model *submodel = svm_train(&subprob,&subparam);
02015                         for(j=begin;j<end;j++)
02016                         {
02017                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
02018                                 // ensure +1 -1 order; reason not using CV subroutine
02019                                 dec_values[perm[j]] *= submodel->label[0];
02020                         }               
02021                         svm_destroy_model(submodel);
02022                         svm_destroy_param(&subparam);
02023                 }
02024                 free(subprob.x);
02025                 free(subprob.y);
02026         }               
02027         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
02028         free(dec_values);
02029         free(perm);
02030 }
02031 
02032 // Return parameter of a Laplace distribution 
02033 double svm_svr_probability(
02034         const svm_problem *prob, const svm_parameter *param)
02035 {
02036         int i;
02037         int nr_fold = 5;
02038         double *ymv = Malloc(double,prob->l);
02039         double mae = 0;
02040 
02041         svm_parameter newparam = *param;
02042         newparam.probability = 0;
02043         svm_cross_validation(prob,&newparam,nr_fold,ymv);
02044         for(i=0;i<prob->l;i++)
02045         {
02046                 ymv[i]=prob->y[i]-ymv[i];
02047                 mae += fabs(ymv[i]);
02048         }               
02049         mae /= prob->l;
02050         double std=sqrt(2*mae*mae);
02051         int count=0;
02052         mae=0;
02053         for(i=0;i<prob->l;i++)
02054                 if (fabs(ymv[i]) > 5*std) 
02055                         count=count+1;
02056                 else 
02057                         mae+=fabs(ymv[i]);
02058         mae /= (prob->l-count);
02059         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02060         free(ymv);
02061         return mae;
02062 }
02063 
02064 
02065 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02066 // perm, length l, must be allocated before calling this subroutine
02067 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02068 {
02069         int l = prob->l;
02070         int max_nr_class = 16;
02071         int nr_class = 0;
02072         int *label = Malloc(int,max_nr_class);
02073         int *count = Malloc(int,max_nr_class);
02074         int *data_label = Malloc(int,l);        
02075         int i;
02076 
02077         for(i=0;i<l;i++)
02078         {
02079                 int this_label = (int)prob->y[i];
02080                 int j;
02081                 for(j=0;j<nr_class;j++)
02082                 {
02083                         if(this_label == label[j])
02084                         {
02085                                 ++count[j];
02086                                 break;
02087                         }
02088                 }
02089                 data_label[i] = j;
02090                 if(j == nr_class)
02091                 {
02092                         if(nr_class == max_nr_class)
02093                         {
02094                                 max_nr_class *= 2;
02095                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02096                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02097                         }
02098                         label[nr_class] = this_label;
02099                         count[nr_class] = 1;
02100                         ++nr_class;
02101                 }
02102         }
02103 
02104         int *start = Malloc(int,nr_class);
02105         start[0] = 0;
02106         for(i=1;i<nr_class;i++)
02107                 start[i] = start[i-1]+count[i-1];
02108         for(i=0;i<l;i++)
02109         {
02110                 perm[start[data_label[i]]] = i;
02111                 ++start[data_label[i]];
02112         }
02113         start[0] = 0;
02114         for(i=1;i<nr_class;i++)
02115                 start[i] = start[i-1]+count[i-1];
02116 
02117         *nr_class_ret = nr_class;
02118         *label_ret = label;
02119         *start_ret = start;
02120         *count_ret = count;
02121         free(data_label);
02122 }
02123 
02124 //
02125 // Interface functions
02126 //
02127 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02128 {
02129         svm_model *model = Malloc(svm_model,1);
02130         model->param = *param;
02131         model->free_sv = 0;     // XXX
02132 
02133         if(param->svm_type == ONE_CLASS ||
02134            param->svm_type == EPSILON_SVR ||
02135            param->svm_type == NU_SVR)
02136         {
02137                 // regression or one-class-svm
02138                 model->nr_class = 2;
02139                 model->label = NULL;
02140                 model->nSV = NULL;
02141                 model->probA = NULL; model->probB = NULL;
02142                 model->sv_coef = Malloc(double *,1);
02143 
02144                 if(param->probability && 
02145                    (param->svm_type == EPSILON_SVR ||
02146                     param->svm_type == NU_SVR))
02147                 {
02148                         model->probA = Malloc(double,1);
02149                         model->probA[0] = svm_svr_probability(prob,param);
02150                 }
02151 
02152                 decision_function f = svm_train_one(prob,param,0,0);
02153                 model->rho = Malloc(double,1);
02154                 model->rho[0] = f.rho;
02155 
02156                 int nSV = 0;
02157                 int i;
02158                 for(i=0;i<prob->l;i++)
02159                         if(fabs(f.alpha[i]) > 0) ++nSV;
02160                 model->l = nSV;
02161                 model->SV = Malloc(svm_node *,nSV);
02162                 model->sv_coef[0] = Malloc(double,nSV);
02163                 int j = 0;
02164                 for(i=0;i<prob->l;i++)
02165                         if(fabs(f.alpha[i]) > 0)
02166                         {
02167                                 model->SV[j] = prob->x[i];
02168                                 model->sv_coef[0][j] = f.alpha[i];
02169                                 ++j;
02170                         }               
02171 
02172                 free(f.alpha);
02173         }
02174         else
02175         {
02176                 // classification
02177                 int l = prob->l;
02178                 int nr_class;
02179                 int *label = NULL;
02180                 int *start = NULL;
02181                 int *count = NULL;
02182                 int *perm = Malloc(int,l);
02183 
02184                 // group training data of the same class
02185                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);            
02186                 svm_node **x = Malloc(svm_node *,l);
02187                 int i;
02188                 for(i=0;i<l;i++)
02189                         x[i] = prob->x[perm[i]];
02190 
02191                 // calculate weighted C
02192 
02193                 double *weighted_C = Malloc(double, nr_class);
02194                 for(i=0;i<nr_class;i++)
02195                         weighted_C[i] = param->C;
02196                 for(i=0;i<param->nr_weight;i++)
02197                 {       
02198                         int j;
02199                         for(j=0;j<nr_class;j++)
02200                                 if(param->weight_label[i] == label[j])
02201                                         break;
02202                         if(j == nr_class)
02203                                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02204                         else
02205                                 weighted_C[j] *= param->weight[i];
02206                 }
02207 
02208                 // train k*(k-1)/2 models
02209                 
02210                 bool *nonzero = Malloc(bool,l);
02211                 for(i=0;i<l;i++)
02212                         nonzero[i] = false;
02213                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02214 
02215                 double *probA=NULL,*probB=NULL;
02216                 if (param->probability)
02217                 {
02218                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02219                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02220                 }
02221 
02222                 int p = 0;
02223                 for(i=0;i<nr_class;i++)
02224                         for(int j=i+1;j<nr_class;j++)
02225                         {
02226                                 svm_problem sub_prob;
02227                                 int si = start[i], sj = start[j];
02228                                 int ci = count[i], cj = count[j];
02229                                 sub_prob.l = ci+cj;
02230                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02231                                 sub_prob.y = Malloc(double,sub_prob.l);
02232                                 int k;
02233                                 for(k=0;k<ci;k++)
02234                                 {
02235                                         sub_prob.x[k] = x[si+k];
02236                                         sub_prob.y[k] = +1;
02237                                 }
02238                                 for(k=0;k<cj;k++)
02239                                 {
02240                                         sub_prob.x[ci+k] = x[sj+k];
02241                                         sub_prob.y[ci+k] = -1;
02242                                 }
02243 
02244                                 if(param->probability)
02245                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02246 
02247                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02248                                 for(k=0;k<ci;k++)
02249                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02250                                                 nonzero[si+k] = true;
02251                                 for(k=0;k<cj;k++)
02252                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02253                                                 nonzero[sj+k] = true;
02254                                 free(sub_prob.x);
02255                                 free(sub_prob.y);
02256                                 ++p;
02257                         }
02258 
02259                 // build output
02260 
02261                 model->nr_class = nr_class;
02262                 
02263                 model->label = Malloc(int,nr_class);
02264                 for(i=0;i<nr_class;i++)
02265                         model->label[i] = label[i];
02266                 
02267                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02268                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02269                         model->rho[i] = f[i].rho;
02270 
02271                 if(param->probability)
02272                 {
02273                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02274                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02275                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02276                         {
02277                                 model->probA[i] = probA[i];
02278                                 model->probB[i] = probB[i];
02279                         }
02280                 }
02281                 else
02282                 {
02283                         model->probA=NULL;
02284                         model->probB=NULL;
02285                 }
02286 
02287                 int total_sv = 0;
02288                 int *nz_count = Malloc(int,nr_class);
02289                 model->nSV = Malloc(int,nr_class);
02290                 for(i=0;i<nr_class;i++)
02291                 {
02292                         int nSV = 0;
02293                         for(int j=0;j<count[i];j++)
02294                                 if(nonzero[start[i]+j])
02295                                 {       
02296                                         ++nSV;
02297                                         ++total_sv;
02298                                 }
02299                         model->nSV[i] = nSV;
02300                         nz_count[i] = nSV;
02301                 }
02302                 
02303                 info("Total nSV = %d\n",total_sv);
02304 
02305                 model->l = total_sv;
02306                 model->SV = Malloc(svm_node *,total_sv);
02307                 p = 0;
02308                 for(i=0;i<l;i++)
02309                         if(nonzero[i]) model->SV[p++] = x[i];
02310 
02311                 int *nz_start = Malloc(int,nr_class);
02312                 nz_start[0] = 0;
02313                 for(i=1;i<nr_class;i++)
02314                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02315 
02316                 model->sv_coef = Malloc(double *,nr_class-1);
02317                 for(i=0;i<nr_class-1;i++)
02318                         model->sv_coef[i] = Malloc(double,total_sv);
02319 
02320                 p = 0;
02321                 for(i=0;i<nr_class;i++)
02322                         for(int j=i+1;j<nr_class;j++)
02323                         {
02324                                 // classifier (i,j): coefficients with
02325                                 // i are in sv_coef[j-1][nz_start[i]...],
02326                                 // j are in sv_coef[i][nz_start[j]...]
02327 
02328                                 int si = start[i];
02329                                 int sj = start[j];
02330                                 int ci = count[i];
02331                                 int cj = count[j];
02332                                 
02333                                 int q = nz_start[i];
02334                                 int k;
02335                                 for(k=0;k<ci;k++)
02336                                         if(nonzero[si+k])
02337                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02338                                 q = nz_start[j];
02339                                 for(k=0;k<cj;k++)
02340                                         if(nonzero[sj+k])
02341                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02342                                 ++p;
02343                         }
02344                 
02345                 free(label);
02346                 free(probA);
02347                 free(probB);
02348                 free(count);
02349                 free(perm);
02350                 free(start);
02351                 free(x);
02352                 free(weighted_C);
02353                 free(nonzero);
02354                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02355                         free(f[i].alpha);
02356                 free(f);
02357                 free(nz_count);
02358                 free(nz_start);
02359         }
02360         return model;
02361 }
02362 
02363 // Stratified cross validation
02364 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02365 {
02366         int i;
02367         int *fold_start = Malloc(int,nr_fold+1);
02368         int l = prob->l;
02369         int *perm = Malloc(int,l);
02370         int nr_class;
02371 
02372         // stratified cv may not give leave-one-out rate
02373         // Each class to l folds -> some folds may have zero elements
02374         if((param->svm_type == C_SVC ||
02375             param->svm_type == NU_SVC) && nr_fold < l)
02376         {
02377                 int *start = NULL;
02378                 int *label = NULL;
02379                 int *count = NULL;
02380                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02381 
02382                 // random shuffle and then data grouped by fold using the array perm
02383                 int *fold_count = Malloc(int,nr_fold);
02384                 int c;
02385                 int *index = Malloc(int,l);
02386                 for(i=0;i<l;i++)
02387                         index[i]=perm[i];
02388                 for (c=0; c<nr_class; c++) 
02389                         for(i=0;i<count[c];i++)
02390                         {
02391                                 int j = i+rand()%(count[c]-i);
02392                                 swap(index[start[c]+j],index[start[c]+i]);
02393                         }
02394                 for(i=0;i<nr_fold;i++)
02395                 {
02396                         fold_count[i] = 0;
02397                         for (c=0; c<nr_class;c++)
02398                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02399                 }
02400                 fold_start[0]=0;
02401                 for (i=1;i<=nr_fold;i++)
02402                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02403                 for (c=0; c<nr_class;c++)
02404                         for(i=0;i<nr_fold;i++)
02405                         {
02406                                 int begin = start[c]+i*count[c]/nr_fold;
02407                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02408                                 for(int j=begin;j<end;j++)
02409                                 {
02410                                         perm[fold_start[i]] = index[j];
02411                                         fold_start[i]++;
02412                                 }
02413                         }
02414                 fold_start[0]=0;
02415                 for (i=1;i<=nr_fold;i++)
02416                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02417                 free(start);    
02418                 free(label);
02419                 free(count);    
02420                 free(index);
02421                 free(fold_count);
02422         }
02423         else
02424         {
02425                 for(i=0;i<l;i++) perm[i]=i;
02426                 for(i=0;i<l;i++)
02427                 {
02428                         int j = i+rand()%(l-i);
02429                         swap(perm[i],perm[j]);
02430                 }
02431                 for(i=0;i<=nr_fold;i++)
02432                         fold_start[i]=i*l/nr_fold;
02433         }
02434 
02435         for(i=0;i<nr_fold;i++)
02436         {
02437                 int begin = fold_start[i];
02438                 int end = fold_start[i+1];
02439                 int j,k;
02440                 struct svm_problem subprob;
02441 
02442                 subprob.l = l-(end-begin);
02443                 subprob.x = Malloc(struct svm_node*,subprob.l);
02444                 subprob.y = Malloc(double,subprob.l);
02445                         
02446                 k=0;
02447                 for(j=0;j<begin;j++)
02448                 {
02449                         subprob.x[k] = prob->x[perm[j]];
02450                         subprob.y[k] = prob->y[perm[j]];
02451                         ++k;
02452                 }
02453                 for(j=end;j<l;j++)
02454                 {
02455                         subprob.x[k] = prob->x[perm[j]];
02456                         subprob.y[k] = prob->y[perm[j]];
02457                         ++k;
02458                 }
02459                 struct svm_model *submodel = svm_train(&subprob,param);
02460                 if(param->probability && 
02461                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02462                 {
02463                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02464                         for(j=begin;j<end;j++)
02465                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02466                         free(prob_estimates);                   
02467                 }
02468                 else
02469                         for(j=begin;j<end;j++)
02470                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02471                 svm_destroy_model(submodel);
02472                 free(subprob.x);
02473                 free(subprob.y);
02474         }               
02475         free(fold_start);
02476         free(perm);     
02477 }
02478 
02479 
02480 int svm_get_svm_type(const svm_model *model)
02481 {
02482         return model->param.svm_type;
02483 }
02484 
02485 int svm_get_nr_class(const svm_model *model)
02486 {
02487         return model->nr_class;
02488 }
02489 
02490 void svm_get_labels(const svm_model *model, int* label)
02491 {
02492         if (model->label != NULL)
02493                 for(int i=0;i<model->nr_class;i++)
02494                         label[i] = model->label[i];
02495 }
02496 
02497 double svm_get_svr_probability(const svm_model *model)
02498 {
02499         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02500             model->probA!=NULL)
02501                 return model->probA[0];
02502         else
02503         {
02504                 info("Model doesn't contain information for SVR probability inference\n");
02505                 return 0;
02506         }
02507 }
02508 
02509 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02510 {
02511         if(model->param.svm_type == ONE_CLASS ||
02512            model->param.svm_type == EPSILON_SVR ||
02513            model->param.svm_type == NU_SVR)
02514         {
02515                 double *sv_coef = model->sv_coef[0];
02516                 double sum = 0;
02517                 for(int i=0;i<model->l;i++)
02518                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02519                 sum -= model->rho[0];
02520                 *dec_values = sum;
02521         }
02522         else
02523         {
02524                 int i;
02525                 int nr_class = model->nr_class;
02526                 int l = model->l;
02527                 
02528                 double *kvalue = Malloc(double,l);
02529                 for(i=0;i<l;i++)
02530                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02531 
02532                 int *start = Malloc(int,nr_class);
02533                 start[0] = 0;
02534                 for(i=1;i<nr_class;i++)
02535                         start[i] = start[i-1]+model->nSV[i-1];
02536 
02537                 int p=0;
02538                 for(i=0;i<nr_class;i++)
02539                         for(int j=i+1;j<nr_class;j++)
02540                         {
02541                                 double sum = 0;
02542                                 int si = start[i];
02543                                 int sj = start[j];
02544                                 int ci = model->nSV[i];
02545                                 int cj = model->nSV[j];
02546                                 
02547                                 int k;
02548                                 double *coef1 = model->sv_coef[j-1];
02549                                 double *coef2 = model->sv_coef[i];
02550                                 for(k=0;k<ci;k++)
02551                                         sum += coef1[si+k] * kvalue[si+k];
02552                                 for(k=0;k<cj;k++)
02553                                         sum += coef2[sj+k] * kvalue[sj+k];
02554                                 sum -= model->rho[p];
02555                                 dec_values[p] = sum;
02556                                 p++;
02557                         }
02558 
02559                 free(kvalue);
02560                 free(start);
02561         }
02562 }
02563 
02564 double svm_predict(const svm_model *model, const svm_node *x)
02565 {
02566         if(model->param.svm_type == ONE_CLASS ||
02567            model->param.svm_type == EPSILON_SVR ||
02568            model->param.svm_type == NU_SVR)
02569         {
02570                 double res;
02571                 svm_predict_values(model, x, &res);
02572                 
02573                 if(model->param.svm_type == ONE_CLASS)
02574                         return (res>0)?1:-1;
02575                 else
02576                         return res;
02577         }
02578         else
02579         {
02580                 int i;
02581                 int nr_class = model->nr_class;
02582                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02583                 svm_predict_values(model, x, dec_values);
02584 
02585                 int *vote = Malloc(int,nr_class);
02586                 for(i=0;i<nr_class;i++)
02587                         vote[i] = 0;
02588                 int pos=0;
02589                 for(i=0;i<nr_class;i++)
02590                         for(int j=i+1;j<nr_class;j++)
02591                         {
02592                                 if(dec_values[pos++] > 0)
02593                                         ++vote[i];
02594                                 else
02595                                         ++vote[j];
02596                         }
02597 
02598                 int vote_max_idx = 0;
02599                 for(i=1;i<nr_class;i++)
02600                         if(vote[i] > vote[vote_max_idx])
02601                                 vote_max_idx = i;
02602                 free(vote);
02603                 free(dec_values);
02604                 return model->label[vote_max_idx];
02605         }
02606 }
02607 
02608 double svm_predict_probability(
02609         const svm_model *model, const svm_node *x, double *prob_estimates)
02610 {
02611         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02612             model->probA!=NULL && model->probB!=NULL)
02613         {
02614                 int i;
02615                 int nr_class = model->nr_class;
02616                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02617                 svm_predict_values(model, x, dec_values);
02618 
02619                 double min_prob=1e-7;
02620                 double **pairwise_prob=Malloc(double *,nr_class);
02621                 for(i=0;i<nr_class;i++)
02622                         pairwise_prob[i]=Malloc(double,nr_class);
02623                 int k=0;
02624                 for(i=0;i<nr_class;i++)
02625                         for(int j=i+1;j<nr_class;j++)
02626                         {
02627                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02628                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02629                                 k++;
02630                         }
02631                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02632 
02633                 int prob_max_idx = 0;
02634                 for(i=1;i<nr_class;i++)
02635                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02636                                 prob_max_idx = i;
02637                 for(i=0;i<nr_class;i++)
02638                         free(pairwise_prob[i]);
02639                 free(dec_values);
02640                 free(pairwise_prob);         
02641                 return model->label[prob_max_idx];
02642         }
02643         else 
02644                 return svm_predict(model, x);
02645 }
02646 
02647 const char *svm_type_table[] =
02648 {
02649         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02650 };
02651 
02652 const char *kernel_type_table[]=
02653 {
02654         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02655 };
02656 
02657 int svm_save_model(const char *model_file_name, const svm_model *model)
02658 {
02659         FILE *fp = fopen(model_file_name,"w");
02660         if(fp==NULL) return -1;
02661 
02662         const svm_parameter& param = model->param;
02663 
02664         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02665         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02666 
02667         if(param.kernel_type == POLY)
02668                 fprintf(fp,"degree %d\n", param.degree);
02669 
02670         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02671                 fprintf(fp,"gamma %g\n", param.gamma);
02672 
02673         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02674                 fprintf(fp,"coef0 %g\n", param.coef0);
02675 
02676         int nr_class = model->nr_class;
02677         int l = model->l;
02678         fprintf(fp, "nr_class %d\n", nr_class);
02679         fprintf(fp, "total_sv %d\n",l);
02680         
02681         {
02682                 fprintf(fp, "rho");
02683                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02684                         fprintf(fp," %g",model->rho[i]);
02685                 fprintf(fp, "\n");
02686         }
02687         
02688         if(model->label)
02689         {
02690                 fprintf(fp, "label");
02691                 for(int i=0;i<nr_class;i++)
02692                         fprintf(fp," %d",model->label[i]);
02693                 fprintf(fp, "\n");
02694         }
02695 
02696         if(model->probA) // regression has probA only
02697         {
02698                 fprintf(fp, "probA");
02699                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02700                         fprintf(fp," %g",model->probA[i]);
02701                 fprintf(fp, "\n");
02702         }
02703         if(model->probB)
02704         {
02705                 fprintf(fp, "probB");
02706                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02707                         fprintf(fp," %g",model->probB[i]);
02708                 fprintf(fp, "\n");
02709         }
02710 
02711         if(model->nSV)
02712         {
02713                 fprintf(fp, "nr_sv");
02714                 for(int i=0;i<nr_class;i++)
02715                         fprintf(fp," %d",model->nSV[i]);
02716                 fprintf(fp, "\n");
02717         }
02718 
02719         fprintf(fp, "SV\n");
02720         const double * const *sv_coef = model->sv_coef;
02721         const svm_node * const *SV = model->SV;
02722 
02723         for(int i=0;i<l;i++)
02724         {
02725                 for(int j=0;j<nr_class-1;j++)
02726                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02727 
02728                 const svm_node *p = SV[i];
02729 
02730                 if(param.kernel_type == PRECOMPUTED)
02731                         fprintf(fp,"0:%d ",(int)(p->value));
02732                 else
02733                         while(p->index != -1)
02734                         {
02735                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02736                                 p++;
02737                         }
02738                 fprintf(fp, "\n");
02739         }
02740     if (ferror(fp) != 0) {
02741         fclose(fp);
02742         return -1;
02743     } else {
02744         if (fclose(fp) != 0)
02745             return -1;
02746         else
02747             return 0;
02748     }
02749 }
02750 
02751 svm_model *svm_load_model(const char *model_file_name)
02752 {
02753         FILE *fp = fopen(model_file_name,"r");
02754     char *p,*old_locale;
02755         if(fp==NULL) return NULL;
02756         
02757     // set numeric locale to C, for correct number output
02758     p = setlocale(LC_NUMERIC,NULL);
02759     old_locale = strdup(p);
02760     setlocale(LC_NUMERIC,"C");
02761 
02762         // read parameters
02763         svm_model *model = Malloc(svm_model,1);
02764         svm_parameter& param = model->param;
02765         model->rho = NULL;
02766         model->probA = NULL;
02767         model->probB = NULL;
02768         model->label = NULL;
02769         model->nSV = NULL;
02770 
02771         char cmd[100];
02772         while(1)
02773         {
02774                 fscanf(fp,"%100s",cmd);
02775 
02776                 if(strcmp(cmd,"svm_type")==0)
02777                 {
02778                         fscanf(fp,"%80s",cmd);
02779                         int i;
02780                         for(i=0;svm_type_table[i];i++)
02781                         {
02782                                 if(strcmp(svm_type_table[i],cmd)==0)
02783                                 {
02784                                         param.svm_type=i;
02785                                         break;
02786                                 }
02787                         }
02788                         if(svm_type_table[i] == NULL)
02789                         {
02790                                 fprintf(stderr,"unknown svm type.\n");
02791                                 free(model->rho);
02792                                 free(model->label);
02793                                 free(model->nSV);
02794                                 free(model);
02795                                 return NULL;
02796                         }
02797                 }
02798                 else if(strcmp(cmd,"kernel_type")==0)
02799                 {               
02800                         fscanf(fp,"%80s",cmd);
02801                         int i;
02802                         for(i=0;kernel_type_table[i];i++)
02803                         {
02804                                 if(strcmp(kernel_type_table[i],cmd)==0)
02805                                 {
02806                                         param.kernel_type=i;
02807                                         break;
02808                                 }
02809                         }
02810                         if(kernel_type_table[i] == NULL)
02811                         {
02812                                 fprintf(stderr,"unknown kernel function.\n");
02813                                 free(model->rho);
02814                                 free(model->label);
02815                                 free(model->nSV);
02816                                 free(model);
02817                                 return NULL;
02818                         }
02819                 }
02820                 else if(strcmp(cmd,"degree")==0)
02821                         fscanf(fp,"%d",&param.degree);
02822                 else if(strcmp(cmd,"gamma")==0)
02823                         fscanf(fp,"%lf",&param.gamma);
02824                 else if(strcmp(cmd,"coef0")==0)
02825                         fscanf(fp,"%lf",&param.coef0);
02826                 else if(strcmp(cmd,"nr_class")==0)
02827                         fscanf(fp,"%d",&model->nr_class);
02828                 else if(strcmp(cmd,"total_sv")==0)
02829                         fscanf(fp,"%d",&model->l);
02830                 else if(strcmp(cmd,"rho")==0)
02831                 {
02832                         int n = model->nr_class * (model->nr_class-1)/2;
02833                         model->rho = Malloc(double,n);
02834                         for(int i=0;i<n;i++)
02835                                 fscanf(fp,"%lf",&model->rho[i]);
02836                 }
02837                 else if(strcmp(cmd,"label")==0)
02838                 {
02839                         int n = model->nr_class;
02840                         model->label = Malloc(int,n);
02841                         for(int i=0;i<n;i++)
02842                                 fscanf(fp,"%d",&model->label[i]);
02843                 }
02844                 else if(strcmp(cmd,"probA")==0)
02845                 {
02846                         int n = model->nr_class * (model->nr_class-1)/2;
02847                         model->probA = Malloc(double,n);
02848                         for(int i=0;i<n;i++)
02849                                 fscanf(fp,"%lf",&model->probA[i]);
02850                 }
02851                 else if(strcmp(cmd,"probB")==0)
02852                 {
02853                         int n = model->nr_class * (model->nr_class-1)/2;
02854                         model->probB = Malloc(double,n);
02855                         for(int i=0;i<n;i++)
02856                                 fscanf(fp,"%lf",&model->probB[i]);
02857                 }
02858                 else if(strcmp(cmd,"nr_sv")==0)
02859                 {
02860                         int n = model->nr_class;
02861                         model->nSV = Malloc(int,n);
02862                         for(int i=0;i<n;i++)
02863                                 fscanf(fp,"%d",&model->nSV[i]);
02864                 }
02865                 else if(strcmp(cmd,"license")==0)
02866                 {
02867                         fscanf(fp,"%100s",cmd);
02868                         //std::cout << "License: " << cmd << std::endl;
02869                 
02870                 }
02871                 else if(strcmp(cmd,"SV")==0)
02872                 {
02873                         while(1)
02874                         {
02875                                 int c = getc(fp);
02876                                 if(c==EOF || c=='\n') break;    
02877                         }
02878                         break;
02879                 }
02880                 else
02881                 {
02882                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02883                         free(model->rho);
02884                         free(model->label);
02885                         free(model->nSV);
02886                         free(model);
02887             setlocale(LC_NUMERIC,old_locale);
02888             free(old_locale);
02889                         return NULL;
02890                 }
02891         }
02892 
02893         // read sv_coef and SV
02894 
02895         int elements = 0;
02896         long pos = ftell(fp);
02897 
02898         while(1)
02899         {
02900                 int c = fgetc(fp);
02901                 switch(c)
02902                 {
02903                         case '\n':
02904                                 // count the '-1' element
02905                         case ':':
02906                                 ++elements;
02907                                 break;
02908                         case EOF:
02909                                 goto out;
02910                         default:
02911                                 ;
02912                 }
02913         }
02914 out:
02915         fseek(fp,pos,SEEK_SET);
02916 
02917         int m = model->nr_class - 1;
02918         int l = model->l;
02919         model->sv_coef = Malloc(double *,m);
02920         int i;
02921         for(i=0;i<m;i++)
02922                 model->sv_coef[i] = Malloc(double,l);
02923         model->SV = Malloc(svm_node*,l);
02924         svm_node *x_space=NULL;
02925         if(l>0) x_space = Malloc(svm_node,elements);
02926 
02927         int j=0;
02928         for(i=0;i<l;i++)
02929         {
02930                 model->SV[i] = &x_space[j];
02931                 for(int k=0;k<m;k++)
02932                         fscanf(fp,"%lf",&model->sv_coef[k][i]);
02933                 while(1)
02934                 {
02935                         int c;
02936                         do {
02937                                 c = getc(fp);
02938                                 if(c=='\n') goto out2;
02939                         } while(isspace(c));
02940                         ungetc(c,fp);
02941                         fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
02942                         ++j;
02943                 }       
02944 out2:
02945                 x_space[j++].index = -1;
02946         }
02947         if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
02948 
02949         model->free_sv = 1;     // XXX
02950     setlocale(LC_NUMERIC,old_locale);
02951     free(old_locale);
02952         return model;
02953 }
02954 
02955 void svm_destroy_model(svm_model* model)
02956 {
02957         if(model->free_sv && model->l > 0)
02958                 free((void *)(model->SV[0]));
02959         for(int i=0;i<model->nr_class-1;i++)
02960                 free(model->sv_coef[i]);
02961         free(model->SV);
02962         free(model->sv_coef);
02963         free(model->rho);
02964         free(model->label);
02965         free(model->probA);
02966         free(model->probB);
02967         free(model->nSV);
02968         free(model);
02969 }
02970 
02971 void svm_destroy_param(svm_parameter* param)
02972 {
02973         free(param->weight_label);
02974         free(param->weight);
02975 }
02976 
02977 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02978 {
02979         // svm_type
02980 
02981         int svm_type = param->svm_type;
02982         if(svm_type != C_SVC &&
02983            svm_type != NU_SVC &&
02984            svm_type != ONE_CLASS &&
02985            svm_type != EPSILON_SVR &&
02986            svm_type != NU_SVR)
02987                 return "unknown svm type";
02988         
02989         // kernel_type, degree
02990         
02991         int kernel_type = param->kernel_type;
02992         if(kernel_type != LINEAR &&
02993            kernel_type != POLY &&
02994            kernel_type != RBF &&
02995            kernel_type != SIGMOID &&
02996            kernel_type != PRECOMPUTED)
02997                 return "unknown kernel type";
02998 
02999         if(param->degree < 0)
03000                 return "degree of polynomial kernel < 0";
03001 
03002         // cache_size,eps,C,nu,p,shrinking
03003 
03004         if(param->cache_size <= 0)
03005                 return "cache_size <= 0";
03006 
03007         if(param->eps <= 0)
03008                 return "eps <= 0";
03009 
03010         if(svm_type == C_SVC ||
03011            svm_type == EPSILON_SVR ||
03012            svm_type == NU_SVR)
03013                 if(param->C <= 0)
03014                         return "C <= 0";
03015 
03016         if(svm_type == NU_SVC ||
03017            svm_type == ONE_CLASS ||
03018            svm_type == NU_SVR)
03019                 if(param->nu <= 0 || param->nu > 1)
03020                         return "nu <= 0 or nu > 1";
03021 
03022         if(svm_type == EPSILON_SVR)
03023                 if(param->p < 0)
03024                         return "p < 0";
03025 
03026         if(param->shrinking != 0 &&
03027            param->shrinking != 1)
03028                 return "shrinking != 0 and shrinking != 1";
03029 
03030         if(param->probability != 0 &&
03031            param->probability != 1)
03032                 return "probability != 0 and probability != 1";
03033 
03034         if(param->probability == 1 &&
03035            svm_type == ONE_CLASS)
03036                 return "one-class SVM probability output not supported yet";
03037 
03038 
03039         // check whether nu-svc is feasible
03040         
03041         if(svm_type == NU_SVC)
03042         {
03043                 int l = prob->l;
03044                 int max_nr_class = 16;
03045                 int nr_class = 0;
03046                 int *label = Malloc(int,max_nr_class);
03047                 int *count = Malloc(int,max_nr_class);
03048 
03049                 int i;
03050                 for(i=0;i<l;i++)
03051                 {
03052                         int this_label = (int)prob->y[i];
03053                         int j;
03054                         for(j=0;j<nr_class;j++)
03055                                 if(this_label == label[j])
03056                                 {
03057                                         ++count[j];
03058                                         break;
03059                                 }
03060                         if(j == nr_class)
03061                         {
03062                                 if(nr_class == max_nr_class)
03063                                 {
03064                                         max_nr_class *= 2;
03065                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03066                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03067                                 }
03068                                 label[nr_class] = this_label;
03069                                 count[nr_class] = 1;
03070                                 ++nr_class;
03071                         }
03072                 }
03073         
03074                 for(i=0;i<nr_class;i++)
03075                 {
03076                         int n1 = count[i];
03077                         for(int j=i+1;j<nr_class;j++)
03078                         {
03079                                 int n2 = count[j];
03080                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03081                                 {
03082                                         free(label);
03083                                         free(count);
03084                                         return "specified nu is infeasible";
03085                                 }
03086                         }
03087                 }
03088                 free(label);
03089                 free(count);
03090         }
03091 
03092         return NULL;
03093 }
03094 
03095 int svm_check_probability_model(const svm_model *model)
03096 {
03097         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03098                 model->probA!=NULL && model->probB!=NULL) ||
03099                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03100                  model->probA!=NULL);
03101 }
03102 
03103 }; // namespace

Generated on Mon Sep 1 01:25:31 2014 for Hugintrunk by  doxygen 1.3.9.1